In [None]:
import os
import pandas as pd
from pre_proc_functions import *

DATA_DIR = "data_pub"
OUTPUT_FILE = "metrics_summary.h5"

mat_files = []
for root, dirs, files in os.walk(DATA_DIR):
    for file in files:
        if file.endswith(".mat"):
            mat_files.append(os.path.join(root, file))

print(f"Found {len(mat_files)} .mat files.")

all_metrics = []

for file_path in mat_files:
    try:
        print(f"Processing: {file_path}")

        # Load and preprocess the dataset
        data = process_states(file_path)

        # Calculate metrics for all status periods
        metrics = analyze_metrics_by_status_period(data)

        # Add checkup identifier (folder + file)
        metrics["checkup"] = os.path.basename(file_path).replace(".mat", "")
        metrics["subfolder"] = os.path.basename(os.path.dirname(file_path))

        all_metrics.append(metrics)

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

if all_metrics:
    df_metrics = pd.concat(all_metrics, ignore_index=True)

    # Save to HDF5
    df_metrics.to_hdf(OUTPUT_FILE, key="metrics", mode="w", format="table")
    print(f"Metrics saved to {OUTPUT_FILE}")

    # Example: How to read it back later
    # df = pd.read_hdf(OUTPUT_FILE, key="metrics")

else:
    print("No metrics calculated. Please check the dataset.")

if 'df_metrics' in locals():
    display(df_metrics.head())

Found 4366 .mat files.
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-452_7844_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-447_7840_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-448_7841_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-455_7846_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-454_7845_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-449_7842_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-450_7843_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-458_5317_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-457_5316_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU007/BW-VTC-456_5315_CU_cyc_007_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU002/BW-VTC-452_7317_CU_cyc_002_BW-VTC-AgeVal.mat
Processing: data_pub/CU_Dynamic/CU002/BW-VTC-320_2985_CU_cyc_002

KeyboardInterrupt: 