In [1]:
import pandas as pd

# Load the CSV
df = pd.read_csv("S1_S3_CombinedData_cleaned.csv")

# # Define label mapping
# label_map = {
#     "Not drowsy": "alert",
#     "Not Drowsy": "alert",
#     "Slightly drowsy": "slightly",
#     "Slightly Drowsy": "slightly",
#     "Moderately Drowsy": "moderately",
#     "Moderately drowsy": "moderately",
#     "Very drowsy": "very",
#     "Very Drowsy": "very"
# }

# # Apply the mapping
# df["Label"] = df["Label"].replace(label_map)

# Optional: View value counts for verification
print(df["Label"].value_counts())

# # Save cleaned DataFrame
# df.to_csv("S1_S3_CombinedData_cleaned.csv", index=False)

Label
moderately    1432
slightly       926
alert          448
very           189
1back           81
2back           78
Name: count, dtype: int64


In [1]:
# Combine all CSVs in "Windowed and Cleaned Data" into S1_combined_cleaned_windowed.csv
import os
import glob
import pandas as pd

folder = "Windowed and Cleaned Data"
out_path = "S1_combined_cleaned_windowed.csv"

# Expected headers (order will be enforced)
cols = [
    "window_start","window_end","ECG_HR","GSR_mean","GSR_std",
    "laneDev_std","speed_mean","speed_std","swAngle_SWRR","ID","Label"
]

# Find CSVs
paths = sorted(glob.glob(os.path.join(folder, "*.csv")))
if not paths:
    raise FileNotFoundError(f"No CSV files found in: {folder}")

frames = []
for p in paths:
    try:
        df = pd.read_csv(p)
        # keep only known columns (and in the right order)
        missing = set(cols) - set(df.columns)
        if missing:
            print(f"Skipping {os.path.basename(p)} (missing columns: {missing})")
            continue
        df = df[cols]
        frames.append(df)
    except Exception as e:
        print(f"Skipping {os.path.basename(p)} (read error: {e})")

if not frames:
    raise RuntimeError("No valid CSVs to combine after header checks.")

combined = pd.concat(frames, axis=0, ignore_index=True)

# Optional: sort by ID then time
combined["window_start"] = pd.to_datetime(combined["window_start"])
combined["window_end"]   = pd.to_datetime(combined["window_end"])
combined = combined.sort_values(["ID", "window_start"], kind="mergesort").reset_index(drop=True)

# Save
combined.to_csv(out_path, index=False)
print(f"Combined {len(frames)} files → {out_path} (rows: {len(combined)})")


Combined 24 files → S1_combined_cleaned_windowed.csv (rows: 8249)


In [1]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import random

# Folder and output paths
folder = "S1_Data/Windowed and Cleaned Data"
combined_out_path = "Classification_Combined_Data/S1_combined_cleaned_windowed.csv"
train_out_path = "Classification_Combined_Data/S1_train_cleaned_windowed.csv"
test_out_path = "Classification_Combined_Data/S1_test_cleaned_windowed.csv"

# Feature and meta columns
feature_cols = ["ECG_HR", "GSR_mean", "GSR_std", "laneDev_std", "speed_mean", "speed_std", "swAngle_SWRR"]
meta_cols = ["window_start", "window_end", "ID", "Label"]
all_cols = meta_cols[:2] + feature_cols + meta_cols[2:]

# Locate CSVs
paths = sorted(glob.glob(os.path.join(folder, "*.csv")))
if len(paths) < 24:
    raise ValueError(f"Expected 24 files, found {len(paths)}")

# Shuffle and split into train/test by participant
random.seed(42)
random.shuffle(paths)
train_paths = paths[:18]
test_paths = paths[18:]

def process_file(path):
    df = pd.read_csv(path)

    # Check column validity
    if not set(all_cols).issubset(df.columns):
        raise ValueError(f"Missing columns in file {path}")

    df = df[all_cols]

    # Normalize feature columns per participant
    scaler = StandardScaler()
    df[feature_cols] = scaler.fit_transform(df[feature_cols])

    return df

# Process each group
train_frames = []
test_frames = []

for path in train_paths:
    try:
        df = process_file(path)
        train_frames.append(df)
    except Exception as e:
        print(f"Skipping train file {os.path.basename(path)} due to error: {e}")

for path in test_paths:
    try:
        df = process_file(path)
        test_frames.append(df)
    except Exception as e:
        print(f"Skipping test file {os.path.basename(path)} due to error: {e}")

# Combine
train_combined = pd.concat(train_frames, axis=0, ignore_index=True)
test_combined = pd.concat(test_frames, axis=0, ignore_index=True)
full_combined = pd.concat([train_combined, test_combined], axis=0, ignore_index=True)

# Sort chronologically per participant
for df in [train_combined, test_combined, full_combined]:
    df["window_start"] = pd.to_datetime(df["window_start"])
    df["window_end"] = pd.to_datetime(df["window_end"])
    df.sort_values(by=["ID", "window_start"], inplace=True)
    df.reset_index(drop=True, inplace=True)

# Save results
train_combined.to_csv(train_out_path, index=False)
test_combined.to_csv(test_out_path, index=False)
full_combined.to_csv(combined_out_path, index=False)

print(f"✅ Combined {len(paths)} files")
print(f"→ Training set: {train_out_path} ({len(train_combined)} rows)")
print(f"→ Testing set: {test_out_path} ({len(test_combined)} rows)")
print(f"→ Full dataset: {combined_out_path} ({len(full_combined)} rows)")

✅ Combined 24 files
→ Training set: Classification_Combined_Data/S1_train_cleaned_windowed.csv (6556 rows)
→ Testing set: Classification_Combined_Data/S1_test_cleaned_windowed.csv (1693 rows)
→ Full dataset: Classification_Combined_Data/S1_combined_cleaned_windowed.csv (8249 rows)
