In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Function to add the feature descriptions as column names to a DataFrame
def add_feature_descriptions(speaker_df, descriptions_df):
    # Set the new column headers
    descriptions_df.columns = ['feature_name', 'feature_description']
    # Use the 'feature_name' column as the column names for speaker_df
    speaker_df.columns = descriptions_df['feature_name'].values
    return speaker_df


# Function to add the labels to the speaker DataFrame
def add_labels(speaker_df, labels_df, participant_id):
    # Find the matching row in the labels DataFrame by participant ID
    labels_row = labels_df.loc[labels_df['Participant_ID'] == participant_id]
    # If the row is found, add the Depression and Gender values as new columns
    if not labels_row.empty:
        speaker_df['Depression'] = labels_row['Depression'].values[0]
        speaker_df['Gender'] = labels_row['Gender'].values[0]
    return speaker_df

# Paths to CSV files
features_description_path = '/Users/Ayan/Desktop/CS5622_Final_Project/feature_description.csv'
labels_path = '/Users/Ayan/Desktop/CS5622_Final_Project/labels.csv'
features_train_dir = '/Users/Ayan/Desktop/CS5622_Final_Project/features_train'

# Read the feature descriptions and labels CSV files
features_description_df = pd.read_csv(features_description_path, encoding='latin-1', header=None)
labels_df = pd.read_csv(labels_path, encoding='latin-1')

# Iterate over each speaker CSV file in the features_train directory
for file in os.listdir(features_train_dir):
    if file.endswith('.csv'):
        # Extract the participant ID from the file name
        participant_id = int(file.split('_')[1].split('.')[0])
        
        # Read the speaker CSV file
        speaker_df = pd.read_csv(os.path.join(features_train_dir, file), encoding='latin-1')
        
        # Add feature descriptions as column headers
        speaker_df = add_feature_descriptions(speaker_df, features_description_df)
        
        # Add labels based on participant ID
        speaker_df = add_labels(speaker_df, labels_df, participant_id)
        
        # Save the augmented speaker DataFrame back to CSV or proceed with further processing
        speaker_df.to_csv(os.path.join(features_train_dir, f'processed_{file}'), index=False)
