# Function Lib

In [None]:
from tqdm.notebook import tqdm  # For progress bars
from utils import dataset_analyze_rasp_ff, open_file_nf1, seconds_to_duration
import os
import plotly.express as px
import plotly.graph_objects as go
import time
from pathlib import Path
import pandas as pd
import numpy as np
import os

if 'result_df' not in globals():
    result_df = pd.DataFrame()

result_filenames = set(result_df['File name']) if not result_df.empty else set()

unique_filenames = set()

# Define paths
base_dir = './data/Experiment_Data'
reels_dir = os.path.join(base_dir, 'SIR_Experiment','Reels')
voice_dir = os.path.join(base_dir,'SIR_Experiment' ,'Voice call')
pubg_dir = os.path.join(base_dir, 'SIR_Experiment','pubg')

# Automatically collect all CSV files from both folders
reels_files = [os.path.join(reels_dir, f) for f in os.listdir(reels_dir) if f.endswith('.csv')]
voice_files = [os.path.join(voice_dir, f) for f in os.listdir(voice_dir) if f.endswith('.csv')]
pubg_files = [os.path.join(pubg_dir, f) for f in os.listdir(pubg_dir) if f.endswith('.csv')]

# Combine lists
file_list = reels_files + voice_files + pubg_files

# Manually defined blacklist of bad measurement files
blacklist_filenames = {
    "2_5_6pro_LTE_YTshorts_stat_64sps.csv",
    "1_5_6pro_LTE_tiktok_stat_64sps.csv",
    # Add more bad files here
}

# Process files
files_passed = 0
skipped = 0
duplicates_count = 0
problematic_files = []

print(len(file_list), "files total")  # Print total files
result_df = pd.DataFrame()
for file_path in file_list:
    file_name = os.path.basename(file_path)

    # Skip if already processed
    if file_name in result_filenames:
        print(f"{file_name} skipped because already processed")
        skipped += 1
        continue
    if file_name in blacklist_filenames:
        print(f"{file_name} skipped because it is blacklisted")
        skipped += 1
        continue
    try:
        files_passed += 1
        print(f"{file_name} passed. Count: {files_passed}")
        result_df=dataset_analyze_rasp_ff(file_path, result_df)
    except Exception as e:
        print(f"❌ Error with {file_name}: {e}")
        problematic_files.append(file_name)

print(f"\n✅ Done. {files_passed} files processed, {skipped} skipped (already in result_df).")
if problematic_files:
    print("⚠️ Problematic files:", problematic_files)


200 files total
3_5_6Pro_4G_tiktok_stat_64sps.csv passed. Count: 1
3_5_6pro_LTE_insta_Dyna_T1_Prefecture_INSA_64sps.csv passed. Count: 2
3_5_6pro_3G_tiktok_stat_64sps.csv passed. Count: 3
2_5_6pro_3G_YTshorts_stat_64sps.csv passed. Count: 4
1_5_6pro_LTE_tiktok_Dyna_T1_Doua_Auditorium_64sps.csv passed. Count: 5
1_5_6pro_LTE_insta_stat_64sps.csv passed. Count: 6
3_5_4G_tiktok_stat.csv passed. Count: 7
❌ Error with 3_5_4G_tiktok_stat.csv: 'V_BAT,I_BAT,P_BAT,V_BB,I_BB,P_BB,V_PA,I_PA,P_PA'
1_5_6pro_LTE_tiktok_stat_64sps.csv passed. Count: 8
1_5_6pro_3G_insta_stat_64sps.csv passed. Count: 9
1_5_6pro_LTE_YTshorts_stat_64sps.csv passed. Count: 10
2_5_6pro_LTE_tiktok_stat_64sps.csv passed. Count: 11
1_5_6pro_LTE_tiktok_Dyna_T1_Belcombe_Auditorium_64sps.csv passed. Count: 12
1_5_6Pro_5G_insta_stat_64sps.csv passed. Count: 13
3_5_6Pro_4G_insta_stat_64sps.csv passed. Count: 14
3_5_4G_YTshorts_stat.csv passed. Count: 15
❌ Error with 3_5_4G_YTshorts_stat.csv: 'V_BAT,I_BAT,P_BAT,V_BB,I_BB,P_BB,V_PA,I

In [2]:
import pandas as pd

# Load your result_df if not done already
# result_df = pd.read_csv("result_df.csv")

# Add scenario_id column
# Create scenario_id from the relevant columns
result_df['scenario_id'] = (
    result_df['Device'].astype(str).str.strip() + "_" +
    result_df['RAN Technology'].astype(str).str.strip() + "_" +
    result_df['Platform'].astype(str).str.strip() + "_" +
    result_df['Condition'].astype(str).str.strip()
)
# Compute average energy values per scenario
# Clean energy columns just in case
energy_cols = ['E_RF Jm', 'E_BAT Jm', 'E_BB Jm', 'E_PA Jm']
result_df[energy_cols] = result_df[energy_cols].apply(pd.to_numeric, errors='coerce')

# Group by scenario_id
scenario_summary_df = result_df.groupby('scenario_id')[energy_cols].mean().reset_index()

# Optional: rename for clarity
scenario_summary_df.columns = ['scenario_id', 'E_RF_Jm', 'E_BAT_Jm', 'E_BB_Jm', 'E_PA_Jm']

# Save to CSV for frontend usage
scenario_summary_df.to_csv("./website/server/scenario_summary_df.csv", index=False)


# Show preview in notebook
scenario_summary_df.head(10)


Unnamed: 0,scenario_id,E_RF_Jm,E_BAT_Jm,E_BB_Jm,E_PA_Jm
0,20250520_145728_None_None,44.5,131.45,42.89,1.61
1,20250520_152227_None_None,10.57,81.92,9.68,0.89
2,20250520_152728_None_None,49.67,125.82,46.96,2.71
3,6Pro_4G_Pubg_64sps,76.24,205.33,65.65,10.59
4,6Pro_4G_YTshorts_stat,53.0,194.47,50.19,2.81
5,6Pro_4G_insta_stat,39.46,187.32,37.41,2.06
6,6Pro_4G_tiktok_stat,48.04,163.43,45.51,2.53
7,6Pro_5G_Pubg_64sps,75.41,209.515,64.825,10.58
8,6Pro_5G_YTshorts_stat,72.12,198.41,56.76,15.36
9,6Pro_5G_insta_stat,80.75,227.19,61.12,19.63
