In [1]:
import os
import optuna
import numpy as np
import pandas as pd
import warnings
from two_class_nested_cv import NestedCVOptimizer

warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def nested_cv(X, y, groups):
    """
    Run nested cross-validation with comprehensive results.
    Returns a DataFrame with all metrics, fold results, and summary statistics.
    """
    # Initialize and run optimizer
    optimizer = NestedCVOptimizer(
        X=X,
        y=y,
        groups=groups,
        positive_class="void",
        n_outer_folds=5,
        n_inner_folds=3,
        n_trials=50,
        random_state=42
    )
    
    print("\nRunning nested cross-validation...")
    summaries = optimizer.run_nested_cv()

    # Display summary
    print(f"\nðŸ“Š RESULTS SUMMARY:")
    print(f"Best performing model: {summaries['best_model'].upper()}")
    print(f"Best F1 (positive): {summaries['best_f1_positive']:.4f}")
    print(f"Best accuracy: {summaries['best_accuracy']:.4f}")

    results = optimizer.get_results_dataframe()

    
    return results

In [None]:
files = [
    # 'two_class_raw_1s_no.csv',
    # 'two_class_raw_1s_0.5.csv',
    # 'two_class_raw_1s_0.8.csv',
    # 'two_class_rawfs_2s_no.csv',
    # 'two_class_raw_2s_no.csv',
    # 'two_class_raw_2s_0.5.csv',
    # 'two_class_raw_2s_0.8.csv',
    # 'two_class_raw_3s_no.csv',
    # 'two_class_raw_3s_0.5.csv',
    # 'two_class_raw_3s_0.8.csv',
    # 'two_class_raw_4s_no.csv',
    # 'two_class_raw_4s_0.5.csv',
    # 'two_class_raw_4s_0.8.csv',
    # 'two_class_raw_5s_no.csv',
    # 'two_class_raw_5s_0.5.csv',
    # 'two_class_raw_5s_0.8.csv'
]

base_path = '/home/edumaba/Public/MPhil_Thesis/Code/wear_uropatch/feature_datasets/subject_based_norm/feature_set_2'

1. Loop through all files
2. Perform nested cross validation on each file
3. Store result in this format

![Output format](/home/edumaba/Public/MPhil_Thesis/Code/wear_uropatch/output_format.png)


In [None]:
all_results = {}
for file in files:
    data_path = os.path.join(base_path, file)
    features = pd.read_csv(data_path)
    features.drop(columns=['center_time', 'start_time', 'end_time'], inplace=True)
    details = file.split('_')
    exp_name = f"{details[3]}_{details[-1].replace('.csv', '')}"
    print(f"Nested cross-validation for {exp_name}")
    
    X = features.drop(columns=['label', 'experiment_id'])
    y = features['label']
    groups = features['experiment_id']
    
    results_df = nested_cv(X, y, groups)

    # Save to csv
    


In [5]:
results_df.to_csv(f'/home/edumaba/Public/MPhil_Thesis/Code/wear_uropatch/subject_based_norm_nested_cv/two_class/nested_cv_results/feature_set_2/all_metrics_nested_cv_{exp_name}_zwei_rawfs.csv')
