In [None]:
import pandas as pd
from pathlib import Path
import os

# 1. Set the data directory - your files are in the same folder as the script
data_dir = Path("C:/Users/patil/Downloads/STData/merged_data")  # Current directory where script is running

# 2. Define the specific columns we want from each file
psy_columns = ['Key', 'participant_id', 'routineStart', 'routineEnd', 'verdict', 'ResponseTime']
eeg_columns = ['UnixTime', 'participant_id', 'Alpha_TP9', 'Alpha_AF7', 'Alpha_AF8', 'Alpha_TP10', 'Beta_TP9', 'Beta_AF7', 'Beta_AF8', 'Beta_TP10', 'Gamma_TP9', 'Gamma_AF7', 'Gamma_AF8', 'Gamma_TP10']
gsr_columns = ['UnixTime', 'participant_id', 'GSR Conductance CAL']
eye_columns = ['UnixTime', 'participant_id', 'ET_PupilLeft', 'ET_PupilRight', 'ET_ValidityLeft', 'ET_ValidityRight']
tiva_columns = ['UnixTime', 'participant_id', 'Joy', 'Anger', 'Surprise', 'Fear', 'Sadness', 'Disgust', 'Contempt', 'Neutral', 'Engagement', 'Valence', 'Attention', 'Blink', 'BlinkRate', 'Eye Closure', 'Eye Widen', 'Brow Furrow', 'Brow Raise', 'Cheek Raise', 'Lip Corner Depressor', 'Smile']
ivt_columns = ['UnixTime', 'participant_id', 'Fixation Index', 'Fixation Duration', 'Fixation Dispersion', 'Saccade Index', 'Saccade Amplitude', 'Gaze Velocity']

# 3. Load each CSV file with the CORRECT file names
print("Loading PSY data...")
psy_df = pd.read_csv(data_dir / 'PSY_merged.csv', usecols=psy_columns)  # Changed from PSY.csv

print("Loading EEG data...")
eeg_df = pd.read_csv(data_dir / 'EEG_merged.csv', usecols=eeg_columns)  # Changed from EEG.csv

print("Loading GSR data...")
# Note: Your file is called GSK_merged.csv, not GSR.csv
gsr_df = pd.read_csv(data_dir / 'GSR_merged.csv', usecols=gsr_columns)  # Changed from GSR.csv

print("Loading EYE data...")
eye_df = pd.read_csv(data_dir / 'EYE_merged.csv', usecols=eye_columns)  # Changed from EYE.csv

print("Loading TIVA data...")
tiva_df = pd.read_csv(data_dir / 'TIVA_merged.csv', usecols=tiva_columns)  # Changed from TIVA.csv

print("Loading IVT data...")
# Note: Your file is called INT_merged.csv, not IVT.csv
ivt_df = pd.read_csv(data_dir / 'IVT_merged.csv', usecols=ivt_columns)  # Changed from IVT.csv

# 4. Convert 'UnixTime' to datetime for easier handling
print("Converting UnixTime to datetime...")
for df in [eeg_df, gsr_df, eye_df, tiva_df, ivt_df]:
    df['UnixTime'] = pd.to_datetime(df['UnixTime'], unit='s')

# 5. Print the shape and first few rows of each dataframe to verify
print("\nData Loading Complete! Summary:")
print(f"PSY DataFrame Shape: {psy_df.shape}")
print(f"EEG DataFrame Shape: {eeg_df.shape}")
print(f"GSR DataFrame Shape: {gsr_df.shape}")
print(f"EYE DataFrame Shape: {eye_df.shape}")
print(f"TIVA DataFrame Shape: {tiva_df.shape}")
print(f"IVT DataFrame Shape: {ivt_df.shape}")

print("\nSample of PSY data (showing trial windows):")
print(psy_df.head())

# 6. Save the extracted data to new files
output_dir = Path("processed_data/extracted_data")
output_dir.mkdir(parents=True, exist_ok=True)

print(f"\nSaving extracted data to: {output_dir}")
psy_df.to_csv(output_dir / 'PSY_extracted.csv', index=False)
eeg_df.to_csv(output_dir / 'EEG_extracted.csv', index=False)
gsr_df.to_csv(output_dir / 'GSR_extracted.csv', index=False)
eye_df.to_csv(output_dir / 'EYE_extracted.csv', index=False)
tiva_df.to_csv(output_dir / 'TIVA_extracted.csv', index=False)
ivt_df.to_csv(output_dir / 'IVT_extracted.csv', index=False)

print("Done! Extracted data has been saved.")