In [2]:
import os
import pandas as pd

ID_PREFIX = "ID"  # Set the prefix for folder names
directories = [d for d in os.listdir() if os.path.isdir(d) and d.startswith(ID_PREFIX)]

rename_mapping = {
    "Lows": "Low",
    "Mediums": "Medium",
    "Highs": "High",
    "Very Highs": "Very High"
}

new_category_order = ["Low", "Medium", "High", "Very High"]

for directory in directories:
    files = ["all_glucose_prediction_results.csv", "all_glucose_prediction_results2.csv"]
    
    for file in files:
        file_path = os.path.join(directory, file)
        if not os.path.exists(file_path):
            continue

        data = pd.read_csv(file_path)
        required_columns = {'Category', 'Predicted Glucose Change', 'Predicted Variance',
                            'Actual Glucose Change', 'Actual Variance', 'Prediction Status'}
        
        if not required_columns.issubset(data.columns):
            print(f"Skipping {file} in {directory} due to missing columns")
            continue

        data['Category'] = data['Category'].replace(rename_mapping)

        # Compute mean statistics
        category_stats = data.groupby('Category')[[
            'Predicted Glucose Change', 'Predicted Variance',
            'Actual Glucose Change', 'Actual Variance'
        ]].mean().reset_index()

        # Compute total and correct windows
        total_windows = data['Category'].value_counts().reindex(new_category_order, fill_value=0).reset_index()
        total_windows.columns = ['Category', 'Total Windows']
        correct_predictions = data[data['Prediction Status'] == True]['Category'].value_counts().reindex(new_category_order, fill_value=0).reset_index()
        correct_predictions.columns = ['Category', 'Correct Predictions']

        accuracy_df = total_windows.merge(correct_predictions, on='Category')
        accuracy_df['Accuracy (%)'] = (accuracy_df['Correct Predictions'] / accuracy_df['Total Windows'] * 100).fillna(0)

        # Merge statistics and accuracy data
        final_stats = category_stats.merge(accuracy_df, on='Category', how='left')

        # Calculate MAE for incorrect predictions
        incorrect_preds = data[data['Prediction Status'] == False].copy()
        if not incorrect_preds.empty:
            incorrect_preds['Absolute Error Glucose'] = (incorrect_preds['Actual Glucose Change'] - incorrect_preds['Predicted Glucose Change']).abs()
            incorrect_preds['Absolute Error Variance'] = (incorrect_preds['Actual Variance'] - incorrect_preds['Predicted Variance']).abs()

            mae_stats = incorrect_preds.groupby('Category')[[
                'Absolute Error Glucose', 'Absolute Error Variance'
            ]].mean().reset_index().rename(columns={
                'Absolute Error Glucose': 'Mean Absolute Error Glucose',
                'Absolute Error Variance': 'Mean Absolute Error Variance'
            })
            final_stats = final_stats.merge(mae_stats, on='Category', how='left')
        else:
            final_stats['Mean Absolute Error Glucose'] = '-'
            final_stats['Mean Absolute Error Variance'] = '-'

        # Rename columns with units and formatting
        final_stats.rename(columns={
            'Predicted Glucose Change': 'Predicted Glucose Change (mmol/L)',
            'Predicted Variance': 'Predicted Variance (mmol/L)',
            'Actual Glucose Change': 'Actual Glucose Change (mmol/L)',
            'Actual Variance': 'Actual Variance (mmol/L)'
        }, inplace=True)

        # Reorder columns clearly
        final_stats = final_stats[['Category',
                                   'Predicted Glucose Change (mmol/L)',
                                   'Predicted Variance (mmol/L)',
                                   'Actual Glucose Change (mmol/L)',
                                   'Actual Variance (mmol/L)',
                                   'Total Windows',
                                   'Correct Predictions',
                                   'Accuracy (%)',
                                   'Mean Absolute Error Glucose',
                                   'Mean Absolute Error Variance']]

        file_suffix = "-2" if "results2" in file else ""
        output_file = f"{directory}{file_suffix}_summary.csv"
        final_stats.to_csv(output_file, index=False, float_format='%.3f')

        print(f"Processed and saved: {output_file}")

Processed and saved: ID2310_summary.csv
Processed and saved: ID2320_summary.csv
Processed and saved: ID2403_summary.csv
Processed and saved: ID2404_summary.csv
Processed and saved: ID2405_summary.csv
Processed and saved: ID2302_summary.csv
Processed and saved: ID2304_summary.csv
Processed and saved: ID2304-2_summary.csv
Processed and saved: ID2313_summary.csv
Processed and saved: ID2314_summary.csv
Processed and saved: ID2401_summary.csv
Processed and saved: ID2308_summary.csv
Processed and saved: ID2301_summary.csv
Processed and saved: ID2306_summary.csv
Processed and saved: ID2309_summary.csv


In [3]:
import pandas as pd
import glob
import os

# Define the path to the folder containing participant files
file_path = '*.csv'  # Update with your actual folder path
output_folder = ''   # Update with your desired output folder path

# Read all CSV files from the folder in alphabetical order
all_files = sorted(glob.glob(file_path))

# Initialize dictionaries to store data for each category
categories = ["Low", "Medium", "High", "Very High"]
category_data = {category: [] for category in categories}

# Loop through each file and organize data by category
for file in all_files:
    try:
        df = pd.read_csv(file)
        participant_id = os.path.basename(file).split('.')[0].replace('_summary', '')
        
        print(f"Processing {participant_id}...")

        for category in categories:
            cat_df = df[df['Category'] == category]
            if not cat_df.empty:
                total_windows = cat_df["Total Windows"].values[0]
                correct_windows = cat_df["Correct Predictions"].values[0]
                correct_ratio = f"{correct_windows}/{total_windows}"

                category_data[category].append({
                    "Participant ID": participant_id,
                    "Predicted Glucose Change (mmol/L)": cat_df["Predicted Glucose Change (mmol/L)"].values[0],
                    "Predicted Variance (mmol/L)": cat_df["Predicted Variance (mmol/L)"].values[0],
                    "Actual Glucose Change (mmol/L)": cat_df["Actual Glucose Change (mmol/L)"].values[0],
                    "Actual Variance (mmol/L)": cat_df["Actual Variance (mmol/L)"].values[0],
                    "Mean Absolute Error Glucose (mmol/L)": cat_df["Mean Absolute Error Glucose"].values[0],
                    "Mean Absolute Error Variance (mmol/L)": cat_df["Mean Absolute Error Variance"].values[0],
                    "Correct/Total Windows": correct_ratio,
                })
    except Exception as e:
        print(f"Failed to process {file}: {e}")

# Save each category's data to a separate Excel file
for category, data_list in category_data.items():
    if data_list:
        category_df = pd.DataFrame(data_list)
        output_path = os.path.join(output_folder, f"{category}_results.xlsx")
        category_df.to_excel(output_path, index=False)
        print(f"Data for {category} saved to {output_path}")

Processing ID2301...
Processing ID2302...
Processing ID2304-2...
Processing ID2304...
Processing ID2306...
Processing ID2308...
Processing ID2309...
Processing ID2310...
Processing ID2313...
Processing ID2314...
Processing ID2320...
Processing ID2401...
Processing ID2403...
Processing ID2404...
Processing ID2405...
Data for Low saved to Low_results.xlsx
Data for Medium saved to Medium_results.xlsx
Data for High saved to High_results.xlsx
Data for Very High saved to Very High_results.xlsx
