In [1]:
# Imports 
import librosa
import librosa.display
import librosa.effects
import librosa.util

import numpy as np
import sys, os
import pandas as pd

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from tqdm import tqdm

In [2]:
#Features we want right now: min f0, max f0, and mean f0 and maybe rms (not sure exactly what that is but was used in the paper)
path = './CREMA-D/AudioWAV/'
files = os.listdir(path)

summary = pd.read_csv('./CREMA-D/processedResults/summaryTable.csv')

num_files = len(os.listdir(path)) #not sure how you want to count files
count = 0
X = np.zeros((num_files, 6))
Y = np.zeros(num_files).astype(str)
for sample in tqdm(files): #depends on how you access
    file = os.path.join(path,sample)
    current_wav, current_sr = librosa.load(file) #fix for set up 
    f0_series = librosa.yin(current_wav, librosa.note_to_hz('C2'), librosa.note_to_hz('C7'))
    rms_series = librosa.feature.rms(y=current_wav)
    f0_max = np.amax(f0_series)
    f0_min = np.amin(f0_series)
    f0_mean = np.mean(f0_series)
    rms_max = np.amax(rms_series)
    rms_min = np.amin(rms_series)
    rms_mean = np.mean(rms_series)
    x = np.array([f0_min, f0_max, f0_mean, rms_min, rms_max, rms_mean])
    X[count,:] = x
    
    # Get the label for VoiceVote
    info = summary.loc[summary['FileName'] == sample.split('.')[0]]
    try:
        Y[count] = info['VoiceVote'].values[0]
    except Exception as ex:
        print(f'info: {info}')
        print(f'index count: {count}')
        print(f'unable to find file: {sample}')
    count += 1
print(f'shape of train data: {X.shape}')
print(f'shape of labels: {Y.shape}')

 80%|████████  | 5955/7442 [06:12<01:29, 16.68it/s]

info: Empty DataFrame
Columns: [Unnamed: 0, FileName, VoiceVote, VoiceLevel, FaceVote, FaceLevel, MultiModalVote, MultiModalLevel]
Index: []
index count: 5952
unable to find file: 1040_ITH_SAD_X.wav


100%|██████████| 7442/7442 [07:44<00:00, 16.03it/s]

shape of train data: (7442, 6)
shape of labels: (7442,)





In [3]:
#For Logistic Regression, can use sklearn.linear_model.LogisticRegression
# !pip install -U scikit-learn
# import sklearn
# from sklearn.linear_model import LogisticRegression
# Remove the file that wasn't in results
print(f'shape of train data: {X.shape}')
print(f'shape of labels: {Y.shape}')
print(Y[5955:5960])

shape of train data: (7442, 6)
shape of labels: (7442,)
['N:S' 'N' 'N' 'N' 'F:S']


In [4]:
# Get data and labels for training
train_split = int(0.8 * num_files)
print('train_split: ', train_split)
print(f'train size: {X[:train_split].shape}, val size: {X[train_split:].shape}')

model = LogisticRegression().fit(X[:train_split], Y[:train_split])

# Predict on validation/test (80-20 split)
predictions = model.predict(X[train_split:])

# Output score (mean accuracy)
score = model.score(X[train_split:],Y[train_split:])
print(f'Test accuracy score: {score}')

f1 = f1_score(Y[train_split:], predictions, average='macro')
print(f'macro f1 score: {f1}')
f1 = f1_score(Y[train_split:], predictions, average='micro')
print(f'micro f1 score: {f1}')

train_split:  5953
train size: (5953, 6), val size: (1489, 6)
Test accuracy score: 0.5406312961719275
macro f1 score: 0.03243248626451181
micro f1 score: 0.5406312961719275


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [5]:
# Some basic stats for the dataset
avg = np.mean(X, axis=0)
stats = ['f0_min', 'f0_max', 'f0_mean', 'rms_min', 'rms_max', 'rms_mean']
for j,stat in enumerate(stats):
    print(f'{stat} average: {avg[j]}')

f0_min average: 68.42755242039749
f0_max average: 1108.2239746103453
f0_mean average: 197.60158873789413
rms_min average: 0.003896712870773289
rms_max average: 0.10690546616422825
rms_mean average: 0.027573393515514667
