<a href="https://colab.research.google.com/github/AsifMiah-CS/AI_Models/blob/main/Publication-Research.git/Multimodal_Deep_Learning_for_Early_Detection_of_Depression_and_Anxiety.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
current_dir = os.getcwd()
print(current_dir)

/content


In [3]:
import os
import gc
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from transformers import AutoTokenizer, AutoModel
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [5]:
daic_woz_dir = '/content/drive/MyDrive/Datasets/Multimodal Anxiety and Depression Detection/DAIC-WOZ'
train_dir = os.path.join(daic_woz_dir, 'train')
test_dir = os.path.join(daic_woz_dir, 'test')
validation_or_development_dir = os.path.join(daic_woz_dir,'validation_or_development')

# Load the train_split and test_split CSV file
train_split_path = os.path.join(daic_woz_dir, 'train_split_Depression_AVEC2017.csv')
train_split_df = pd.read_csv(train_split_path)
test_split_path = os.path.join(daic_woz_dir, 'test_split_Depression_AVEC2017.csv')
test_split_df = pd.read_csv(test_split_path)
validation_or_development_path = os.path.join(daic_woz_dir, 'dev_split_Depression_AVEC2017.csv')
validation_or_development_split_df = pd.read_csv(validation_or_development_path)


# Convert Participant_ID to string and remove '.0' if present
train_split_df['Participant_ID'] = train_split_df['Participant_ID'].astype(str).str.replace(r'\.0$', '', regex=True)
train_split_df = train_split_df.iloc[:107, :]
test_split_df['Participant_ID'] = test_split_df['Participant_ID'].astype(str).str.replace(r'\.0$', '', regex=True)
test_split_df = test_split_df.iloc[:10, :]
validation_or_development_split_df['Participant_ID'] = validation_or_development_split_df['Participant_ID'].astype(str).str.replace(r'\.0$', '', regex=True)
validation_or_development_split_df = validation_or_development_split_df.iloc[:19, :]




# # Initialize data structures
# train_multimodal_data = {
#     'participants': [],
#     'text': [],
#     'audio': [],
#     'visual': [],
#     'phq8_scores': []
# }

# test_multimodal_data = {
#     'participants': [],
#     'text': [],
#     'audio': [],
#     'visual': [],
#     'phq8_scores': []
# }

# validation_or_development_multimodal_data = {
#     'participants': [],
#     'text': [],
#     'audio': [],
#     'visual': [],
#     'phq8_scores': []
# }



def load_text_modality(transcript_path):
    """Load and process text transcript"""
    try:
        print("Loading transcript...")
        df = pd.read_csv(transcript_path)
        print("Transcript loaded successfully")
        return df
    except FileNotFoundError:
        print(f"⚠️ Transcript not found at {transcript_path}")
        return pd.DataFrame() # Return empty DataFrame on File Not Found
    except Exception as e:
        print(f"❌ Error loading transcript: {str(e)}")
        return pd.DataFrame() # Return empty DataFrame on other errors


def load_audio_modality(covarep_path):
    """Load COVAREP audio features"""
    try:
        df = pd.read_csv(covarep_path)
        return df.values
    except FileNotFoundError:
        print(f"⚠️ COVAREP file not found at {covarep_path}")
        return np.array([]) # Return empty array on File Not Found
    except Exception as e:
        print(f"❌ Error loading COVAREP file: {str(e)}")
        return np.array([]) # Return empty array on other errors


def combine_visual_features(au_path, gaze_path, pose_path):
    """Combine the three visual modality files with proper cleaning"""
    try:
        def safe_read(file_path):
            try:
                df = pd.read_csv(file_path, delim_whitespace=True, header=0)
                df.columns = df.columns.str.strip().str.rstrip(',')
                df = df.applymap(lambda x: str(x).strip().rstrip(','))
                df = df.apply(pd.to_numeric, errors='coerce')
                return df
            except FileNotFoundError:
                print(f"⚠️ Visual file not found at {file_path}")
                return pd.DataFrame() # Return empty DataFrame on File Not Found
            except Exception as e:
                print(f"❌ Error reading visual file {file_path}: {str(e)}")
                return pd.DataFrame() # Return empty DataFrame on other errors


        au = safe_read(au_path)
        gaze = safe_read(gaze_path)
        pose = safe_read(pose_path)

        # Check if any dataframe is empty
        if au.empty or gaze.empty or pose.empty:
             print("Skipping visual feature combination due to missing or empty files.")
             return np.array([])


        # Filter rows with success >= 1.0 if available
        if 'success' in au.columns:
            au = au[au['success'] >= 1.0]
        if 'success' in gaze.columns:
            gaze = gaze[gaze['success'] >= 1.0]
        if 'success' in pose.columns:
            pose = pose[pose['success'] >= 1.0]

        # Trim to smallest length to match shapes
        min_len = min(len(au), len(gaze), len(pose))
        au, gaze, pose = au.iloc[:min_len], gaze.iloc[:min_len], pose.iloc[:min_len]

        # Return numerical array
        return np.hstack([au.values, gaze.values, pose.values])

    except Exception as e:
        print(f"❌ Error combining visual features: {str(e)}")
        return np.array([]) # Return empty array on error



# Data Loader Main function
def data_loader(data_dir, data_split_df, multimodal_data):
    """
    Load multimodal data for a given set of participants and append to multimodal_data dict.
    If multimodal_data is empty, it initializes required keys.
    """

    # Ensure multimodal_data has the necessary keys
    if 'participants' not in multimodal_data: multimodal_data['participants'] = []
    if 'text' not in multimodal_data: multimodal_data['text'] = []
    if 'audio' not in multimodal_data: multimodal_data['audio'] = []
    if 'visual' not in multimodal_data: multimodal_data['visual'] = []
    if 'phq8_scores' not in multimodal_data: multimodal_data['phq8_scores'] = []
    if 'phq8_binary' not in multimodal_data: multimodal_data['phq8_binary'] = []


    print("\nStarting data loading process...")
    print(f"Found {len(data_split_df)} participants in CSV")

    for index, row in data_split_df.iterrows():
        participant_id = str(row['Participant_ID']).strip()
        folder_name = f"{participant_id}_P"
        participant_dir = os.path.join(data_dir, folder_name)

        print(f"\nProcessing participant {participant_id}")
        if not os.path.exists(participant_dir):
            print(f"⚠️ Directory not found: {participant_dir}")
            multimodal_data['participants'].append(participant_id) # Still add participant ID even if directory is missing
            multimodal_data['text'].append(pd.DataFrame()) # Append empty DataFrame
            multimodal_data['audio'].append(np.array([])) # Append empty array
            multimodal_data['visual'].append(np.array([])) # Append empty array
            multimodal_data['phq8_scores'].append(row.get('PHQ8_Score', None) )   # Append label or None
            multimodal_data['phq8_binary'].append(row.get('PHQ8_Binary', None))

            continue

        try:
            # Load text
            transcript_path = os.path.join(participant_dir, f"{participant_id}_TRANSCRIPT.csv")
            text_data = load_text_modality(transcript_path)
            multimodal_data['text'].append(text_data)


            # Load audio
            covarep_path = os.path.join(participant_dir, f"{participant_id}_COVAREP.csv")
            audio_data = load_audio_modality(covarep_path)
            multimodal_data['audio'].append(audio_data)


            # Load visual
            au_path = os.path.join(participant_dir, f"{participant_id}_CLNF_AUs.txt")
            gaze_path = os.path.join(participant_dir, f"{participant_id}_CLNF_gaze.txt")
            pose_path = os.path.join(participant_dir, f"{participant_id}_CLNF_pose.txt")
            visual_data = combine_visual_features(au_path, gaze_path, pose_path)
            multimodal_data['visual'].append(visual_data)


            # Append participant ID and score
            multimodal_data['participants'].append(participant_id)
            multimodal_data['phq8_scores'].append(row.get('PHQ8_Score', None))
            multimodal_data['phq8_binary'].append(row.get('PHQ8_Binary', None))

            if text_data.empty or audio_data.size == 0 or visual_data.size == 0:
                 print(f"⚠️ Warning: Partial data loaded for participant {participant_id}")
            else:
                print(f"✔ Successfully loaded data for participant {participant_id}")


        except Exception as e:
            print(f"❌ Error processing participant {participant_id}: {str(e)}")
            # Append empty data structures in case of an unexpected error
            multimodal_data['text'].append(pd.DataFrame())
            multimodal_data['audio'].append(np.array([]))
            multimodal_data['visual'].append(np.array([]))
            multimodal_data['participants'].append(participant_id) # Still add participant ID
            multimodal_data['phq8_scores'].append(row.get('PHQ8_Score', None)) # Append label or None
            multimodal_data['phq8_binary'].append(row.get('PHQ8_Binary', None))

    print(f"\nAttempted to load data for {len(data_split_df)} participants. Loaded structures for {len(multimodal_data['participants'])}.")

# Processing and features extraction


In [6]:

# --- Lightweight TCN Feature Extractor ---
class VisualTCNFeatureExtractor(nn.Module):
    def __init__(self, input_dim, output_dim=256):
        super(VisualTCNFeatureExtractor, self).__init__()
        self.tcn = nn.Sequential(
            nn.Conv1d(input_dim, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(128, output_dim, kernel_size=3, padding=1),
            nn.AdaptiveAvgPool1d(1),  # Global average pooling over time
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch, seq_len, feat_dim) → (batch, feat_dim, seq_len)
        x = self.tcn(x)         # (batch, output_dim, 1)
        return x.squeeze(-1)    # (batch, output_dim)


In [7]:
# Lightweight TCN for audio (can reuse same as visual)
class AudioTCNFeatureExtractor(nn.Module):
    def __init__(self, input_dim, output_dim=256):
        super(AudioTCNFeatureExtractor, self).__init__()
        self.tcn = nn.Sequential(
            nn.Conv1d(input_dim, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv1d(128, output_dim, kernel_size=3, padding=1),
            nn.AdaptiveAvgPool1d(1),
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch, seq_len, feat_dim) → (batch, feat_dim, seq_len)
        x = self.tcn(x)         # (batch, output_dim, 1)
        return x.squeeze(-1)    # (batch, output_dim)





In [8]:
class EmbeddingReducer(nn.Module):
    def __init__(self, input_dim=768, output_dim=256):
        super().__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)




bert_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
bert_model = AutoModel.from_pretrained('bert-base-uncased').to(device)
bert_model.eval()  # Set to evaluation mode

@torch.no_grad()
def get_bert_embedding(text_list, max_utt=100):
    """
    text_list: List of utterances (strings) from one participant
    Returns a tensor: (max_utt, 256)
    """
    tokenized = bert_tokenizer(
    text_list,
    return_tensors='pt',
    padding=True,
    truncation=True,
    max_length=64
    ).to(device)


    outputs = bert_model(**tokenized)
    embeddings = outputs.last_hidden_state[:, 0, :]  # [CLS] token embeddings → shape: (N, 768)

    reducer = EmbeddingReducer().to(device)
    embeddings = reducer(embeddings)  # embeddings: [N, 768] → new_embeddings: [N, 256]

    # Pad or truncate to max_utt
    if embeddings.size(0) > max_utt:
        embeddings = embeddings[:max_utt, :]
    elif embeddings.size(0) < max_utt:
        pad_len = max_utt - embeddings.size(0)
        pad_tensor = torch.zeros((pad_len, 256), device = device)
        embeddings = torch.cat([embeddings, pad_tensor], dim=0)

    return embeddings  # (max_utt, 258)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [9]:
# --- Process Visual Features ---
def process_visual_features(visual_data_list):

    #max_len = max(f.shape[0] for f in visual_data_list)
    max_len = 22000

    feat_dim = visual_data_list[0].shape[1]

    # Stack all data first
    stacked = []
    for features in visual_data_list:
        # Truncate or pad
        if features.shape[0] > max_len:
            features = features[:max_len]
        elif features.shape[0] < max_len:
            pad_len = max_len - features.shape[0]
            features = np.vstack([features, np.zeros((pad_len, feat_dim))])

        stacked.append(features)

    # Convert to tensor early to offload from CPU
    stacked_tensor = torch.tensor(np.array(stacked), dtype=torch.float32).to(device)

    # Fit scaler on full dataset before transform
    scaler = StandardScaler()
    B, T, D = stacked_tensor.shape
    scaled_flat = scaler.fit_transform(stacked_tensor.view(-1, D).cpu().numpy())
    scaled_tensor = torch.tensor(scaled_flat, dtype=torch.float32, device=device).view(B, T, D)

    # Feature extractor
    feature_extractor = VisualTCNFeatureExtractor(input_dim=D, output_dim=256).to(device)
    for param in feature_extractor.parameters():
        param.requires_grad = False

    with torch.no_grad():
        visual_features = feature_extractor(scaled_tensor)  # Shape: (batch, 256)

    del stacked, stacked_tensor, scaled_tensor
    gc.collect()

    return visual_features



def process_audio_features(audio_data_list):

    # max_len = max(f.shape[0] for f in audio_data_list)
    max_len = 65000

    feat_dim = audio_data_list[0].shape[1]

    stacked = []
    for features in audio_data_list:
        # Replace inf/nan values
        features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)

        # Truncate or pad
        if features.shape[0] > max_len:
            features = features[:max_len]
        elif features.shape[0] < max_len:
            pad = np.zeros((max_len - features.shape[0], feat_dim))
            features = np.vstack([features, pad])

        stacked.append(features)

    # Convert to tensor early
    stacked_tensor = torch.tensor(np.array(stacked), dtype=torch.float32).to(device)

    # Batch scaling
    scaler = StandardScaler()
    B, T, D = stacked_tensor.shape
    scaled_flat = scaler.fit_transform(stacked_tensor.view(-1, D).cpu().numpy())
    scaled_tensor = torch.tensor(scaled_flat, dtype=torch.float32, device=device).view(B, T, D)

    # Feature extractor
    feature_extractor = AudioTCNFeatureExtractor(input_dim=D, output_dim=256).to(device)
    for param in feature_extractor.parameters():
        param.requires_grad = False

    with torch.no_grad():
        audio_features = feature_extractor(scaled_tensor)

    del stacked, stacked_tensor, scaled_tensor
    gc.collect()

    return audio_features



def process_text_features(text_data_list):
    """
    text_data_list: List of Pandas DataFrames (each transcript.csv)
    Returns: Tensor (num_samples, max_seq_len, 768)
    """

    max_utt = max(
    sum(
        1 for row in df.itertuples(index=False)
        if (len(row[0].split("\t")) == 4 and row[0].split("\t")[2].lower() == 'participant' and row[0].split("\t")[3].strip())
    )
    for df in text_data_list
    )



    all_participant_features = []

    for df in text_data_list:
        utterances = []

        for row in df.itertuples(index=False):
            try:
                split = row[0].split("\t")  # whole line in one string
                if len(split) == 4:
                    speaker, text = split[2], split[3]
                    if speaker.lower() == 'participant' and text.strip():
                        utterances.append(text.strip())
            except Exception as e:
                print(f"Error parsing row: {row} -> {e}")

        if not utterances:
            # If no valid utterances, fill with zeros
            features = torch.zeros((256,), device=device)
        else:
            embeddings = get_bert_embedding(utterances, max_utt= max_utt)  # (T, 768)
            features = embeddings.mean(dim=0)  # (768,) → mean pooling

        all_participant_features.append(features)

    # Stack into a 3D tensor
    text_features_tensor = torch.stack(all_participant_features).to(device)  # (batch_size, max_seq_len, 768)
    return text_features_tensor

In [10]:
def create_feature_pipeline(multimodal_data_batch):

    # 1. Extract features from each modality
    text_features = process_text_features(multimodal_data_batch['text'])       # shape: (B, 256)
    audio_features = process_audio_features(multimodal_data_batch['audio'])    # shape: (B, 256)
    visual_features = process_visual_features(multimodal_data_batch['visual']) # shape: (B, 256)

    return  visual_features, audio_features, text_features







def batched_data_processor(
    data_dir,
    split_df,
    batch_size=16,
    replace_none_with_mean=True
):
    """
    Processes multimodal data in batches and returns concatenated tensors for each modality
    and the labels (if available).
    """
    all_visual_features = []
    all_audio_features = []
    all_text_features = []
    all_labels = []
    all_binary_labels = []
    valid_scores = []  # To track all valid scores for mean calculation

    num_samples = len(split_df)
    for start_idx in range(0, num_samples, batch_size):
        end_idx = min(start_idx + batch_size, num_samples)
        batch_df = split_df.iloc[start_idx:end_idx].reset_index(drop=True)

        # Prepare an empty dict to fill with multimodal data for the batch
        multimodal_data_batch = {}

        # Load data into the dict using your existing loader (for this batch only)
        data_loader(data_dir, batch_df, multimodal_data_batch)

        # Extract and process features
        visual_feats, audio_feats, text_feats = create_feature_pipeline(multimodal_data_batch)

        all_visual_features.append(visual_feats)
        all_audio_features.append(audio_feats)
        all_text_features.append(text_feats)

        # Handle PHQ-8 scores
        if 'phq8_scores' in multimodal_data_batch:
            batch_scores = multimodal_data_batch['phq8_scores']

            # Convert to numpy array for NaN handling
            scores = np.array(batch_scores, dtype=np.float32)

            # Track valid scores for mean calculation
            valid_mask = ~np.isnan(scores)
            current_valid = scores[valid_mask].tolist()
            valid_scores.extend(current_valid)

            # Process labels for this batch
            if replace_none_with_mean and len(valid_scores) > 0:
                # Calculate current mean of all valid scores we've seen so far
                current_mean = np.mean(valid_scores)
                # Replace NaN values with current mean
                scores[np.isnan(scores)] = current_mean

            scores_binary = (scores >= 10).astype(np.float32)

            # Scale scores to range [0, 1]
            scores = scores / 24.0

        # Convert to tensor (scaled values)
        labels = torch.tensor(scores, dtype=torch.float32, device=device)
        all_labels.append(labels)
        all_binary_labels.append(torch.tensor(scores_binary, dtype=torch.float32, device=device))

        # Clear memory
        del multimodal_data_batch
        gc.collect()

    # Concatenate all batches into a single tensor
    visual_tensor = torch.cat(all_visual_features, dim=0)
    audio_tensor = torch.cat(all_audio_features, dim=0)
    text_tensor = torch.cat(all_text_features, dim=0)
    labels_tensor = torch.cat(all_labels, dim=0) if all_labels else None
    binary_labels_tensor = torch.cat(all_binary_labels, dim=0) if all_binary_labels else None

    return visual_tensor, audio_tensor, text_tensor, labels_tensor, binary_labels_tensor







print("Processing train data in batches...")
train_processed_visual_features, train_processed_audio_features, train_processed_text_features, train_labels, train_binary_labels = batched_data_processor(
    train_dir, train_split_df, batch_size=8
)

print("Processing validation data in batches...")
validation_processed_visual_features, validation_processed_audio_features, validation_processed_text_features, validation_labels, validation_binary_labels = batched_data_processor(
    validation_or_development_dir, validation_or_development_split_df, batch_size=8
)

print("Processing test data in batches...")
test_processed_visual_features, test_processed_audio_features, test_processed_text_features, _ , _ = batched_data_processor(
    test_dir, test_split_df, batch_size=8
)

Processing train data in batches...

Starting data loading process...
Found 8 participants in CSV

Processing participant 303
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 303

Processing participant 304
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 304

Processing participant 305
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 305

Processing participant 310
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 310

Processing participant 312
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 312

Processing participant 313
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 313

Processing participant 315
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 315

Processing participant 316
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 316

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 317
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 317

Processing participant 318
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 318

Processing participant 319
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 319

Processing participant 320
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 320

Processing participant 321
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 321

Processing participant 322
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 322

Processing participant 324
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 324

Processing participant 325
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 325

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 326
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 326

Processing participant 327
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 327

Processing participant 328
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 328

Processing participant 330
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 330

Processing participant 333
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 333

Processing participant 336
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 336

Processing participant 338
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 338

Processing participant 339
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 339

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 340
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 340

Processing participant 341
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 341

Processing participant 343
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 343

Processing participant 344
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 344

Processing participant 345
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 345

Processing participant 347
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 347

Processing participant 348
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 348

Processing participant 350
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 350

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 351
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 351

Processing participant 352
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 352

Processing participant 353
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 353

Processing participant 355
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 355

Processing participant 356
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 356

Processing participant 357
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 357

Processing participant 358
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 358

Processing participant 360
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 360

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 362
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 362

Processing participant 363
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 363

Processing participant 364
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 364

Processing participant 366
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 366

Processing participant 368
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 368

Processing participant 369
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 369

Processing participant 370
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 370

Processing participant 371
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 371

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 372
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 372

Processing participant 374
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 374

Processing participant 375
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 375

Processing participant 376
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 376

Processing participant 379
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 379

Processing participant 380
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 380

Processing participant 383
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 383

Processing participant 385
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 385

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 386
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 386

Processing participant 391
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 391

Processing participant 392
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 392

Processing participant 393
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 393

Processing participant 397
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 397

Processing participant 400
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 400

Processing participant 401
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 401

Processing participant 402
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 402

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 409
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 409

Processing participant 412
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 412

Processing participant 414
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 414

Processing participant 415
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 415

Processing participant 416
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 416

Processing participant 419
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 419

Processing participant 423
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 423

Processing participant 425
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 425

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 426
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 426

Processing participant 427
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 427

Processing participant 428
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 428

Processing participant 429
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 429

Processing participant 430
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 430

Processing participant 433
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 433

Processing participant 434
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 434

Processing participant 437
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 437

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 441
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 441

Processing participant 443
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 443

Processing participant 444
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 444

Processing participant 445
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 445

Processing participant 446
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 446

Processing participant 447
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 447

Processing participant 448
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 448

Processing participant 449
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 449

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 454
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 454

Processing participant 455
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 455

Processing participant 456
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 456

Processing participant 457
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 457

Processing participant 459
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 459

Processing participant 463
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 463

Processing participant 464
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 464

Processing participant 468
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 468

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 471
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 471

Processing participant 473
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 473

Processing participant 474
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 474

Processing participant 475
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 475

Processing participant 478
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 478

Processing participant 479
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 479

Processing participant 485
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 485

Processing participant 486
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 486

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 3 participants in CSV

Processing participant 487
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 487

Processing participant 488
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 488

Processing participant 491
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 491

Attempted to load data for 3 participants. Loaded structures for 3.
Processing validation data in batches...

Starting data loading process...
Found 8 participants in CSV

Processing participant 302
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 302

Processing participant 307
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 307

Processing participant 331
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 331

Processing participant 335
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 335

Processing participant 346
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 346

Processing participant 367
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 367

Processing participant 377
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 377

Processing participant 381
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 381

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 8 participants in CSV

Processing participant 382
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 382

Processing participant 388
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 388

Processing participant 389
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 389

Processing participant 390
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 390

Processing participant 395
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 395

Processing participant 403
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 403

Processing participant 404
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 404

Processing participant 406
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 406

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 3 participants in CSV

Processing participant 413
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 413

Processing participant 417
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 417

Processing participant 418
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 418

Attempted to load data for 3 participants. Loaded structures for 3.
Processing test data in batches...

Starting data loading process...
Found 8 participants in CSV

Processing participant 300
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 300

Processing participant 301
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 301

Processing participant 306
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 306

Processing participant 308
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 308

Processing participant 309
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 309

Processing participant 311
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 311

Processing participant 314
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 314

Processing participant 323
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 323

Attempted to load data for 8 participants. Loaded structures for 8.

Starting data loading process...
Found 2 participants in CSV

Processing participant 329
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))


✔ Successfully loaded data for participant 329

Processing participant 332
Loading transcript...
Transcript loaded successfully


  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)
  df = df.applymap(lambda x: str(x).strip().rstrip(','))
  df = pd.read_csv(file_path, delim_whitespace=True, header=0)


✔ Successfully loaded data for participant 332

Attempted to load data for 2 participants. Loaded structures for 2.


In [11]:
print(train_processed_visual_features.shape)
# print(test_processed_visual_features.shape)
print(validation_processed_visual_features.shape)


torch.Size([107, 256])
torch.Size([19, 256])


In [12]:
# class MultimodalDataset(Dataset):
#     def __init__(self, data_dir, split_df, batch_size=32):
#         """
#         Initialize the dataset with directory, split dataframe and batch size
#         """
#         self.data_dir = data_dir
#         self.split_df = split_df
#         self.batch_size = batch_size
#         self.participant_ids = split_df['Participant_ID'].tolist()
#         self.labels = split_df['PHQ8_Score'].tolist()

#     def __len__(self):
#         return len(self.participant_ids)

#     def __getitem__(self, idx):
#         """
#         Load and process a single sample (not used in batch processing mode)
#         """
#         participant_id = self.participant_ids[idx]
#         label = self.labels[idx]

#         # Load single sample data
#         sample_data = {}
#         data_loader(self.data_dir, self.split_df[self.split_df['Participant_ID'] == participant_id], sample_data)

#         # Process features
#         visual_feat, audio_feat, text_feat = create_feature_pipeline(sample_data)

#         return {
#             'visual': visual_feat,
#             'audio': audio_feat,
#             'text': text_feat,
#             'label': torch.tensor(label, dtype=torch.float32)
#         }

# def create_feature_pipeline(multimodal_dict):
#     """Process all modalities and return ready-to-model features"""
#     # Process each modality
#     processed_visual_features = process_visual_features(multimodal_dict['visual'])
#     del multimodal_dict['visual']
#     gc.collect()
#     processed_audio_features = process_audio_features(multimodal_dict['audio'])
#     del multimodal_dict['audio']
#     gc.collect()
#     processed_text_features = process_text_features(multimodal_dict['text'])
#     del multimodal_dict['text']
#     gc.collect()

#     print(processed_visual_features.shape)
#     print(processed_audio_features.shape)
#     print(processed_text_features.shape)

#     return processed_visual_features,  processed_audio_features,  processed_text_features


# def batch_data_loader(data_dir, split_df, batch_size=32):
#     """
#     Generator function that loads and processes data in batches
#     """
#     participant_ids = split_df['Participant_ID'].tolist()

#     # Check if 'PHQ8_Score' column exists
#     if 'PHQ8_Score' in split_df.columns:
#         labels = split_df['PHQ8_Score'].tolist()
#     else:
#         # If not, create a list of None or a placeholder
#         labels = [None] * len(participant_ids)


#     for i in range(0, len(participant_ids), batch_size):
#         batch_ids = participant_ids[i:i+batch_size]
#         batch_labels = labels[i:i+batch_size]

#         # Initialize batch data dictionary with all required keys
#         batch_data = {
#             'participants': [],
#             'text': [],
#             'audio': [],
#             'visual': [],
#             'phq8_scores': batch_labels
#         }

#         # Load batch data
#         batch_split_df = split_df[split_df['Participant_ID'].isin(batch_ids)]
#         data_loader(data_dir, batch_split_df, batch_data)

#         # Process features for the batch
#         visual_feats, audio_feats, text_feats = create_feature_pipeline(batch_data)

#         # Convert labels to tensor if not None
#         if any(batch_labels):
#           batch_labels_tensor = torch.tensor(batch_labels, dtype=torch.float32)
#         else:
#           batch_labels_tensor = None # Handle case with no labels


#         # Clear memory
#         del batch_data
#         gc.collect()

#         yield visual_feats, audio_feats, text_feats, batch_labels_tensor

# def process_entire_dataset(data_dir, split_df, batch_size=32):
#     """
#     Process entire dataset in batches and return concatenated features
#     """
#     visual_features_list = []
#     audio_features_list = []
#     text_features_list = []
#     labels_list = []

#     for visual_feats, audio_feats, text_feats, labels in batch_data_loader(data_dir, split_df, batch_size):
#         visual_features_list.append(visual_feats)
#         audio_features_list.append(audio_feats)
#         text_features_list.append(text_feats)
#         if labels is not None:
#             labels_list.append(labels)


#         # Clear memory
#         gc.collect()

#     # Concatenate all batches
#     visual_features = torch.cat(visual_features_list, dim=0)
#     audio_features = torch.cat(audio_features_list, dim=0)
#     text_features = torch.cat(text_features_list, dim=0)
#     all_labels = torch.cat(labels_list, dim=0) if labels_list else None

#     return visual_features, audio_features, text_features, all_labels

# # Usage example:
# batch_size = 8  # Adjust based on your memory capacity

# # Process training data
# print("Processing training data...")
# train_processed_visual_features, train_processed_audio_features, train_processed_text_features, train_labels = process_entire_dataset(
#     train_dir, train_split_df, batch_size)

# # Process validation data
# print("Processing validation data...")
# validation_or_development_processed_visual_features, validation_or_development_processed_audio_features, validation_or_development_processed_text_features, validation_or_development_labels = process_entire_dataset(
#     validation_or_development_dir, validation_or_development_split_df, batch_size)

# # Process test data
# print("Processing test data...")
# test_processed_visual_features, test_processed_audio_features, test_processed_text_features, _ = process_entire_dataset(
#     test_dir, test_split_df, batch_size)

#Model is being train by using corr matrics -> cnn

In [13]:
def compute_correlation_matrix(visual, audio, text):
    """
    visual, audio, text: torch tensors of shape (batch, feature_size)
    Returns: (batch, 3, 3) correlation matrix for each sample
    """
    batch_size = visual.size(0)

    # Stack: (batch, 3, feature_size)
    features = torch.stack([visual, audio, text], dim=1)

    # Center the features
    features_centered = features - features.mean(dim=2, keepdim=True)

    # Compute covariance: (batch, 3, 3)
    cov = torch.matmul(features_centered, features_centered.transpose(1, 2)) / (features.size(2) - 1)

    # Compute std dev
    std = features_centered.std(dim=2, keepdim=True)
    std_matrix = torch.matmul(std, std.transpose(1, 2)) + 1e-8  # Avoid division by 0

    # Pearson correlation
    corr = cov / std_matrix
    return corr  # (batch, 3, 3)


In [14]:
import torch.nn as nn
import torch.nn.functional as F

class CorrelationCNN(nn.Module):
    def __init__(self):
        super(CorrelationCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=2)  # Input: (batch, 1, 3, 3) → Output: (batch, 16, 2, 2)
        self.fc1 = nn.Linear(16 * 2 * 2, 32)
        self.out = nn.Linear(32, 1)  # Output: PHQ-8 score (regression)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # (batch, 16, 2, 2)
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        return self.out(x)  # (batch, 1)


In [15]:
from torch.utils.data import TensorDataset, DataLoader

# Compute correlation matrices
train_corr = compute_correlation_matrix(train_processed_visual_features,
                                        train_processed_audio_features,
                                        train_processed_text_features)  # (batch, 3, 3)

test_corr = compute_correlation_matrix(test_processed_visual_features,
                                       test_processed_audio_features,
                                       test_processed_text_features)

# CNN expects 4D input: (batch, channels=1, 3, 3)
train_corr = train_corr.unsqueeze(1)
test_corr = test_corr.unsqueeze(1)


train_corr = train_corr.to(device)
test_corr = test_corr.to(device)

# Datasets
train_dataset = TensorDataset(train_corr, train_labels.unsqueeze(1))
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Initialize model
model = CorrelationCNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 15
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x_batch, y_batch in train_loader:
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        pred = model(x_batch)
        loss = criterion(pred, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")


Epoch 1/15, Loss: 0.6456
Epoch 2/15, Loss: 0.3987
Epoch 3/15, Loss: 0.3699
Epoch 4/15, Loss: 0.3780
Epoch 5/15, Loss: 0.3709
Epoch 6/15, Loss: 0.3573
Epoch 7/15, Loss: 0.3600
Epoch 8/15, Loss: 0.3744
Epoch 9/15, Loss: 0.3538
Epoch 10/15, Loss: 0.3640
Epoch 11/15, Loss: 0.3572
Epoch 12/15, Loss: 0.3644
Epoch 13/15, Loss: 0.3564
Epoch 14/15, Loss: 0.3594
Epoch 15/15, Loss: 0.3679


In [16]:

model.eval()
with torch.no_grad():
    test_preds = model(test_corr).squeeze()  # (batch,)
print(test_preds)


tensor([0.2791, 0.2740, 0.2769, 0.2664, 0.2720, 0.2707, 0.2778, 0.2803, 0.2770,
        0.2745])


#Multimodal Transformer Fusion Module

In [69]:
class MultimodalDataset(torch.utils.data.Dataset):
    def __init__(self, visual, audio, text, labels=None, binary_labels=None):  # labels can be None for test set
        self.visual = visual
        self.audio = audio
        self.text = text
        self.labels = labels
        self.binary_labels = binary_labels


    def __len__(self):
        return self.visual.size(0)

    def __getitem__(self, idx):
        sample = {
            'visual': self.visual[idx],
            'audio': self.audio[idx],
            'text': self.text[idx],
        }
        if self.labels is not None:
            sample['label'] = self.labels[idx]
            sample['binary_labels'] = self.binary_labels[idx]


        return sample







train_dataset_fusion = MultimodalDataset(
    train_processed_visual_features,
    train_processed_audio_features,
    train_processed_text_features,
    train_labels,
    train_binary_labels

)


validation_dataset_fusion = MultimodalDataset(
    validation_processed_visual_features,
    validation_processed_audio_features,
    validation_processed_text_features,
    validation_labels,
    validation_binary_labels

)


test_dataset_fusion = MultimodalDataset(
    test_processed_visual_features,
    test_processed_audio_features,
    test_processed_text_features,  # correct input!
    labels=None,
    binary_labels=None

    # No labels for test
)


train_loader_fusion = torch.utils.data.DataLoader(train_dataset_fusion, batch_size=8, shuffle=True)
validation_loader_fusion = torch.utils.data.DataLoader(validation_dataset_fusion, batch_size=8, shuffle=False)
test_loader_fusion = torch.utils.data.DataLoader(test_dataset_fusion, batch_size=8, shuffle=False)





In [70]:
class MultimodalTransformerFusion(nn.Module):
    def __init__(self, input_dim=256, n_heads=4, hidden_dim=256, num_layers=1, dropout=0.4):
        super().__init__()

        self.visual_proj = nn.Linear(input_dim, input_dim)
        self.audio_proj = nn.Linear(input_dim, input_dim)
        self.text_proj = nn.Linear(input_dim, input_dim)
        self.dropout = nn.Dropout(dropout)  # Add this in __init__

        # ✅ Fix here
        encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=n_heads, dim_feedforward=hidden_dim, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # ✅ Output shape fix
        self.output_layer = nn.Linear(input_dim, 1)



    def forward(self, visual_feat, audio_feat, text_feat):
        visual = self.dropout(self.visual_proj(visual_feat))
        audio = self.dropout(self.audio_proj(audio_feat))
        text = self.dropout(self.text_proj(text_feat))

        fused = torch.stack([visual, audio, text], dim=1)  # (batch, 3, 256)
        fused = fused.permute(1, 0, 2)  # (seq_len=3, batch, 256)

        encoded = self.transformer_encoder(fused)  # (seq_len=3, batch, 256)
        encoded = encoded.permute(1, 0, 2).mean(dim=1)  # (batch, 256)
        encoded = self.dropout(encoded)  # Apply dropout after fusion

        return self.output_layer(encoded).squeeze(1)  # (batch,)



In [71]:
model_fusion = MultimodalTransformerFusion().to(device)
criterion_fusion = nn.MSELoss()
optimizer_fusion = torch.optim.Adam(model_fusion.parameters(), lr=1e-3)
# scheduler = StepLR(optimizer_fusion, step_size=10, gamma=0.5)  # Reduce LR every 10 epochs by half
scheduler = ReduceLROnPlateau(optimizer_fusion, mode='min', factor=0.5, patience=5, verbose=True)




In [72]:
def train_model_fusion(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=50, patience=10):
    best_val_loss = float('inf')
    trigger_times = 0

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0

        for batch in train_loader:
            visual = batch['visual'].to(device)
            audio = batch['audio'].to(device)
            text = batch['text'].to(device)
            labels = batch['label'].to(device)

            preds = model(visual, audio, text)
            loss = criterion(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        # 🔍 Validation
        model.eval()
        total_val_loss = 0.0

        with torch.no_grad():
            for batch in val_loader:
                visual = batch['visual'].to(device)
                audio = batch['audio'].to(device)
                text = batch['text'].to(device)
                labels = batch['label'].to(device)

                preds = model(visual, audio, text)
                loss = criterion(preds, labels)
                total_val_loss += loss.item()

        avg_val_loss = total_val_loss / len(val_loader)

        # Early stopping logic
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            trigger_times = 0
            torch.save(model.state_dict(), 'best_model_fusion.pth')
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f"⏹️ Early stopping triggered at epoch {epoch+1}. Best Val Loss: {best_val_loss:.4f}")
                break

        # Scheduler step (for ReduceLROnPlateau)
        scheduler.step(avg_val_loss)

        # 📢 Logging
        current_lr = optimizer.param_groups[0]['lr']
        print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}, LR: {current_lr:.6f}")


In [73]:
for param_group in optimizer.param_groups:
    print("Current LR:", param_group['lr'])


Current LR: 0.001


In [74]:
train_model_fusion(model_fusion, train_loader_fusion, validation_loader_fusion, criterion_fusion, optimizer_fusion,scheduler, num_epochs = 200,  patience=15)


[Epoch 1] Train Loss: 0.3369 | Val Loss: 0.2700, LR: 0.001000
[Epoch 2] Train Loss: 0.2518 | Val Loss: 0.1762, LR: 0.001000
[Epoch 3] Train Loss: 0.1796 | Val Loss: 0.1220, LR: 0.001000
[Epoch 4] Train Loss: 0.1244 | Val Loss: 0.0920, LR: 0.001000
[Epoch 5] Train Loss: 0.1117 | Val Loss: 0.1664, LR: 0.001000
[Epoch 6] Train Loss: 0.0715 | Val Loss: 0.1583, LR: 0.001000
[Epoch 7] Train Loss: 0.0679 | Val Loss: 0.1041, LR: 0.001000
[Epoch 8] Train Loss: 0.0864 | Val Loss: 0.0971, LR: 0.001000
[Epoch 9] Train Loss: 0.0469 | Val Loss: 0.1367, LR: 0.001000
[Epoch 10] Train Loss: 0.0647 | Val Loss: 0.0963, LR: 0.000500
[Epoch 11] Train Loss: 0.0601 | Val Loss: 0.1284, LR: 0.000500
[Epoch 12] Train Loss: 0.0472 | Val Loss: 0.1073, LR: 0.000500
[Epoch 13] Train Loss: 0.0374 | Val Loss: 0.1140, LR: 0.000500
[Epoch 14] Train Loss: 0.0413 | Val Loss: 0.1061, LR: 0.000500
[Epoch 15] Train Loss: 0.0418 | Val Loss: 0.1094, LR: 0.000500
[Epoch 16] Train Loss: 0.0356 | Val Loss: 0.1073, LR: 0.000250
[

In [75]:
for param_group in optimizer.param_groups:
    print("Current LR:", param_group['lr'])


Current LR: 0.001


In [76]:
def predict_model_fusion(model, dataloader):
    model.eval()
    all_preds = []

    with torch.no_grad():
        for batch in dataloader:
            visual = batch['visual'].to(device)
            audio = batch['audio'].to(device)
            text = batch['text'].to(device)

            preds = model(visual, audio, text)  # (batch_size,)
            all_preds.extend(preds.cpu().numpy())  # Convert to NumPy list

    return np.array(all_preds)

In [77]:
# Evaluate the fusion model on the test set
print("Evaluating fusion model on the test set...")
test_preds_fusion = predict_model_fusion(model_fusion, test_loader_fusion)*24

print("\nTest Predictions (first 10):")
print(test_preds_fusion[:10]) # Print first 10 predictions

Evaluating fusion model on the test set...

Test Predictions (first 10):
[5.7031193 6.8835883 9.064289  7.966008  7.515954  9.137443  8.938904
 6.8875027 8.649598  8.872978 ]


In [78]:
print(validation_labels)
print(train_labels)
print(_)

tensor([0.1667, 0.1667, 0.3333, 0.5000, 0.9583, 0.7917, 0.6667, 0.6667, 0.0000,
        0.7083, 0.5833, 0.3750, 0.2917, 0.0000, 0.0000, 0.0833, 0.4167, 0.2917,
        0.4167])
tensor([0.0000, 0.2500, 0.2917, 0.1667, 0.0833, 0.2917, 0.0833, 0.2500, 0.3333,
        0.1250, 0.5417, 0.4583, 0.8333, 0.2083, 0.2083, 0.4167, 0.0833, 0.1667,
        0.1667, 0.5000, 0.2083, 0.2917, 0.6250, 0.4583, 0.0417, 0.2917, 0.3750,
        0.4583, 0.6250, 0.6667, 0.8333, 0.4583, 0.5833, 0.4167, 0.4583, 0.4167,
        0.4167, 0.2917, 0.2917, 0.1667, 0.8333, 0.0000, 0.0000, 0.0000, 0.2917,
        0.0000, 0.0000, 0.3750, 0.5417, 0.0833, 0.2083, 0.5000, 0.0833, 0.4167,
        0.2917, 0.3333, 0.4583, 0.3750, 0.0417, 0.0833, 0.2083, 0.2917, 0.3750,
        0.4583, 0.4167, 0.5000, 0.6667, 0.1250, 0.1250, 0.1250, 0.0000, 0.2500,
        0.8333, 0.2083, 0.0000, 0.0417, 0.1250, 0.4167, 0.0833, 0.0000, 0.7500,
        0.0417, 0.2917, 0.0417, 0.0000, 0.0417, 0.7500, 0.0833, 0.0417, 0.0417,
        0.2500, 0.1250,

#Multimodal Model for Binary Classifier

In [79]:
class MultimodalBinaryClassifier(nn.Module):
    def __init__(self, input_dim_per_modality=256, hidden_dim=128):
        super().__init__()
        total_input_dim = input_dim_per_modality * 3

        self.classifier = nn.Sequential(
            nn.Linear(total_input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.5),  # Increased dropout to fight overfitting
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, visual_feat, audio_feat, text_feat):
        x = torch.cat([visual_feat, audio_feat, text_feat], dim=1)
        return self.classifier(x).squeeze(1)


In [80]:
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
import copy

def train_binary_classifier_with_early_stopping(model, train_loader, val_loader,
                                                num_epochs=100, patience=10,
                                                lr=1e-4, weight_decay=1e-4,
                                                scheduler_patience=3):
    model = model.to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Scheduler (Reduce LR if val_loss plateaus)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=scheduler_patience, verbose=True
    )

    best_val_loss = float('inf')
    best_model_wts = copy.deepcopy(model.state_dict())
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        train_losses, train_preds, train_targets = [], [], []

        for batch in train_loader:
            vis = batch['visual'].to(device)
            aud = batch['audio'].to(device)
            txt = batch['text'].to(device)
            labels = batch['binary_labels'].float().to(device)

            optimizer.zero_grad()
            outputs = model(vis, aud, txt)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())
            train_preds.extend((outputs > 0.5).cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

        train_acc = accuracy_score(train_targets, train_preds)

        # -------- Validation --------
        model.eval()
        val_losses, val_preds, val_targets = [], [], []

        with torch.no_grad():
            for batch in val_loader:
                vis = batch['visual'].to(device)
                aud = batch['audio'].to(device)
                txt = batch['text'].to(device)
                labels = batch['binary_labels'].float().to(device)

                outputs = model(vis, aud, txt)
                loss = criterion(outputs, labels)

                val_losses.append(loss.item())
                val_preds.extend((outputs > 0.5).cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

        val_loss = sum(val_losses) / len(val_losses)
        val_acc = accuracy_score(val_targets, val_preds)

        # 🔁 Step scheduler
        scheduler.step(val_loss)

        print(f"[Epoch {epoch+1}] Train Loss: {sum(train_losses)/len(train_losses):.4f}, "
              f"Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, "
              f"LR: {optimizer.param_groups[0]['lr']:.6f}")

        # 🛑 Early stopping logic
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print("Early stopping triggered.")
            break

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [81]:
model = MultimodalBinaryClassifier()
model = train_binary_classifier_with_early_stopping(
    model,
    train_loader_fusion,
    validation_loader_fusion,
    num_epochs=100,
    patience=10,
    lr=1e-4,
    weight_decay=1e-5
)


[Epoch 1] Train Loss: 0.6757, Train Acc: 0.7196, Val Loss: 0.6973, Val Acc: 0.5263, LR: 0.000100
[Epoch 2] Train Loss: 0.6669, Train Acc: 0.7196, Val Loss: 0.6978, Val Acc: 0.5263, LR: 0.000100
[Epoch 3] Train Loss: 0.6616, Train Acc: 0.7103, Val Loss: 0.6981, Val Acc: 0.5263, LR: 0.000100
[Epoch 4] Train Loss: 0.6583, Train Acc: 0.7196, Val Loss: 0.6984, Val Acc: 0.5263, LR: 0.000100
[Epoch 5] Train Loss: 0.6530, Train Acc: 0.7290, Val Loss: 0.6987, Val Acc: 0.5263, LR: 0.000050
[Epoch 6] Train Loss: 0.6461, Train Acc: 0.7196, Val Loss: 0.6988, Val Acc: 0.5263, LR: 0.000050
[Epoch 7] Train Loss: 0.6420, Train Acc: 0.7290, Val Loss: 0.6989, Val Acc: 0.5263, LR: 0.000050
[Epoch 8] Train Loss: 0.6418, Train Acc: 0.7290, Val Loss: 0.6992, Val Acc: 0.5263, LR: 0.000050
[Epoch 9] Train Loss: 0.6356, Train Acc: 0.7196, Val Loss: 0.6994, Val Acc: 0.5263, LR: 0.000025
[Epoch 10] Train Loss: 0.6338, Train Acc: 0.7383, Val Loss: 0.6995, Val Acc: 0.5263, LR: 0.000025
[Epoch 11] Train Loss: 0.6328