Feature Processing Pipeline

In [5]:
import os
import subprocess


wav_files_directory = 'hw3_speech_files'
output_path = 'output_features_final_part2.csv'

if not os.path.isdir(wav_files_directory):
    raise ValueError("Directory not found.")

if not any(f.endswith('.wav') for f in os.listdir(wav_files_directory)):
    raise ValueError("No WAV files found in the specified directory.")

master_exists = os.path.exists(output_path)

for file_name in sorted(os.listdir(wav_files_directory)):
    if file_name.endswith('.wav'):
        file_path = os.path.join(wav_files_directory, file_name)
        temp_output_path = file_path.replace('.wav', '.csv')

        command = [
            './opensmile/build/progsrc/smilextract/SMILExtract',
            '-C', 'opensmile/config/is09-13/IS09_emotion.conf',
            '-I', file_path,
            '-csvoutput', temp_output_path
        ]

        result = subprocess.run(command, capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Error processing {file_name}: {result.stderr}")
        else:
            if os.path.exists(temp_output_path) and os.path.getsize(temp_output_path) > 0:
                with open(temp_output_path, 'r') as features_file:
                    features_data = features_file.readlines()
                with open(output_path, 'a') as master_file:
                    for i, line in enumerate(features_data):
                        if i == 0 and not master_exists:
                            master_file.write(line)
                            master_exists = True
                        elif i > 0:
                            updated_line = line.replace("'unknown'", f"'{file_name}'", 1)
                            master_file.write(updated_line)
                os.remove(temp_output_path)

print("All files processed and features combined into a single CSV.")


All files processed and features combined into a single CSV.


In [84]:
import pandas as pd

file_path = 'output_features_final_part2.csv'
data = pd.read_csv(file_path, delimiter=';')

def extract_speaker_emotion(filename):
    parts = filename.strip("'").split('_')
    speaker_id = parts[0]
    emotion = parts[2]
    return pd.Series([speaker_id, emotion])

data[['speaker_id', 'emotion']] = data['name'].apply(extract_speaker_emotion)
data = data.drop(columns='name')

corrected_csv_path = 'features_part2_.csv'
data.to_csv(corrected_csv_path, index=False )

corrected_csv_path

'features_part2_.csv'

In [91]:
import pandas as pd

file_path = 'features_part2_.csv'
data = pd.read_csv(file_path, delimiter=',')

# Drop the frameTime column - has only 0 values
data.drop(columns=['frameTime'], inplace=True)

# Normalization process 
for speaker in data['speaker_id'].unique():
    indices = data['speaker_id'] == speaker
    numeric_cols = data.loc[indices, :].select_dtypes(include=['float64', 'int']).columns
    for col in numeric_cols:
        speaker_mean = data.loc[indices, col].mean()
        speaker_std = data.loc[indices, col].std()
        if speaker_std != 0:
            data.loc[indices, col] = (data.loc[indices, col] - speaker_mean) / speaker_std
        else:
            data.loc[indices, col] = 0 

normalized_output_path = 'normalized_features_p2.csv'
data.to_csv(normalized_output_path, index=False)
print(f"Normalized data saved to {normalized_output_path}")

Normalized data saved to normalized_features_p2.csv


In [125]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

def load_data(filepath):
    data = pd.read_csv(filepath)
    data = data.dropna()
    return data

def preprocess_data(data):
    X = data.drop(['emotion', 'speaker_id'], axis=1).values
    y = data['emotion'].values
    groups = data['speaker_id'].values
    return X, y, groups

def perform_cross_validation(X, y, groups):
    logo = LeaveOneGroupOut()
    y_true_all = []
    y_pred_all = []
    best_models = []

    for train_index, test_index in logo.split(X, y, groups):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        rf_classifier = RandomForestClassifier(random_state=42)
        param_grid = {
            'n_estimators': [100, 200],
            'max_depth': [10, 20],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 2]
        }
        grid_search = GridSearchCV(rf_classifier, param_grid, cv=3, n_jobs=-1)
        grid_search.fit(X_train, y_train)
        rf_classifier_best = grid_search.best_estimator_
        best_models.append(rf_classifier_best)

        y_pred = rf_classifier_best.predict(X_test)
        y_true_all.append(y_test)
        y_pred_all.append(y_pred)

    return y_true_all, y_pred_all, best_models


In [127]:
def main():
    data = load_data('normalized_features_p2.csv')
    X, y, groups = preprocess_data(data)
    
    print(f"Unique speakers before preprocessing: {data['speaker_id'].nunique()}")
    print(data['speaker_id'].value_counts())
    print(f"Data shapes: X={X.shape}, y={y.shape}, groups={len(groups)}")
    print(f"Unique groups after preprocessing: {np.unique(groups)}")

    y_true_all, y_pred_all, best_models = perform_cross_validation(X, y, groups)
    
    accuracies = []
    f1_macros = []
    f1_weighteds = []

 
    for i, (y_true, y_pred, model) in enumerate(zip(y_true_all, y_pred_all, best_models)):
        print(f"Fold {i+1} best model: {model}")
        print(classification_report(y_true, y_pred, zero_division=0)) 
        accuracies.append(accuracy_score(y_true, y_pred))
        f1_macros.append(f1_score(y_true, y_pred, average='macro'))
        f1_weighteds.append(f1_score(y_true, y_pred, average='weighted'))

    # Aggregated scores
    print(f"Aggregated Accuracy: {np.mean(accuracies):.4f}")
    print(f"Aggregated F1-Score (Macro): {np.mean(f1_macros):.4f}")
    print(f"Aggregated F1-Score (Weighted): {np.mean(f1_weighteds):.4f}")

if __name__ == "__main__":
    main()


Unique speakers before preprocessing: 7
gg    420
mk    397
cl    368
mm    302
mf    299
jg    273
cc    265
Name: speaker_id, dtype: int64
Data shapes: X=(2324, 384), y=(2324,), groups=2324
Unique groups after preprocessing: ['cc' 'cl' 'gg' 'jg' 'mf' 'mk' 'mm']
Fold 1 best model: RandomForestClassifier(max_depth=20, n_estimators=200, random_state=42)
              precision    recall  f1-score   support

     anxiety       0.02      0.10      0.03        10
     boredom       0.05      0.07      0.05        15
  cold-anger       0.13      0.13      0.13        15
    contempt       0.17      0.23      0.19        22
     despair       0.00      0.00      0.00         9
     disgust       0.43      0.10      0.16        31
     elation       0.17      0.25      0.21        16
       happy       0.23      0.22      0.22        23
   hot-anger       0.42      0.71      0.53        14
    interest       0.13      0.12      0.12        17
     neutral       0.00      0.00      0.00       