# Function Lib

In [None]:
from tqdm.notebook import tqdm  # For progress bars
from utils import dataset_analyze_rasp_ff, open_file_nf1, seconds_to_duration
import os
import plotly.express as px
import plotly.graph_objects as go
import time
from pathlib import Path
import pandas as pd
import numpy as np
import os

if 'result_df' not in globals():
    result_df = pd.DataFrame()

result_filenames = set(result_df['File name']) if not result_df.empty else set()

unique_filenames = set()

# Define paths
base_dir = './data/Experiment_Data/SIR_Experiment'
web_browsing_dir = os.path.join(base_dir,'Web browsing')

# Automatically collect all CSV files from both folders
web_browsing_files = [os.path.join(web_browsing_dir, f) for f in os.listdir(web_browsing_dir) if f.endswith('.csv')]

# Combine lists
file_list = web_browsing_files

# Manually defined blacklist of bad measurement files
blacklist_filenames = {
    "1_5_4G_web.csv",
    "1_5_4G_zoom_stat.csv",
    "1_5_5G_web_stat.csv",
    "1_5_5G_teams_stat.csv",
    "1_5_5G_zoom_stat.csv",
    # Add more bad files here
}

# Process files
files_passed = 0
skipped = 0
duplicates_count = 0
problematic_files = []

print(len(file_list), "files total")  # Print total files
result_df = pd.DataFrame()
for file_path in file_list:
    file_name = os.path.basename(file_path)

    # Skip if already processed
    if file_name in result_filenames:
        print(f"{file_name} skipped because already processed")
        skipped += 1
        continue
    if file_name in blacklist_filenames:
        print(f"{file_name} skipped because it is blacklisted")
        skipped += 1
        continue
    try:
        files_passed += 1
        print(f"{file_name} passed. Count: {files_passed}")
        result_df=dataset_analyze_rasp_ff(file_path, result_df)
    except Exception as e:
        print(f"❌ Error with {file_name}: {e}")
        problematic_files.append(file_name)

print(f"\n✅ Done. {files_passed} files processed, {skipped} skipped (already in result_df).")
if problematic_files:
    print("⚠️ Problematic files:", problematic_files)


In [None]:
import pandas as pd

# Load your result_df if not done already
# result_df = pd.read_csv("result_df.csv")

# Add scenario_id column
# Create scenario_id from the relevant columns
result_df['scenario_id'] = (
    result_df['Device'].astype(str).str.strip() + "_" +
    result_df['RAN Technology'].astype(str).str.strip() + "_" +
    result_df['Platform'].astype(str).str.strip() + "_" +
    result_df['Condition'].astype(str).str.strip()
)
# Compute average energy values per scenario
# Clean energy columns just in case
energy_cols = ['E_RF Jm', 'E_BAT Jm', 'E_BB Jm', 'E_PA Jm']
result_df[energy_cols] = result_df[energy_cols].apply(pd.to_numeric, errors='coerce')

# Group by scenario_id
scenario_summary_df = result_df.groupby('scenario_id')[energy_cols].mean().reset_index()

# Optional: rename for clarity
scenario_summary_df.columns = ['scenario_id', 'E_RF_Jm', 'E_BAT_Jm', 'E_BB_Jm', 'E_PA_Jm']

# Save to CSV for frontend usage
scenario_summary_df.to_csv("./website/server/others_scenario_summary_df.csv", index=False)


# Show preview in notebook
scenario_summary_df.head(10)
