In [None]:
# Imports 
import librosa
import librosa.display
import librosa.effects
import librosa.util

import numpy as np
import sys, os
import pandas as pd

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from tqdm import tqdm
from sklearn.svm import SVC

In [None]:
# Would use similar 
# wav_agent, sr_agent = librosa.load('./sample/agent/0002f70f7386445b.wav')
# wav_caller, sr_caller = librosa.load('./sample/caller/0002f70f7386445b.wav')

In [None]:
#Features we want right now: min f0, max f0, and mean f0 and maybe rms (not sure exactly what that is but was used in the paper)
#more features: pitch range , 
path = './CREMA-D/AudioWAV/'
files = os.listdir(path)

summary = pd.read_csv('./CREMA-D/processedResults/summaryTable.csv')

num_files = len(os.listdir(path)) #not sure how you want to count files
count = 0
X = np.zeros((num_files, 6))
Y = np.zeros(num_files).astype(str)
for sample in tqdm(files): #depends on how you access
  file = os.path.join(path,sample) 
  current_wav, current_sr = librosa.load(file) #fix for set up 
  f0_series = librosa.yin(current_wav, librosa.note_to_hz('C2'), librosa.note_to_hz('C7'))
  rms_series = librosa.feature.rms(y=current_wav)
  f0_max = np.amax(f0_series)
  f0_min = np.amin(f0_series)
  f0_mean = np.mean(f0_series)
  rms_max = np.amax(rms_series)
  rms_min = np.amin(rms_series)
  rms_mean = np.mean(rms_series)
  x = np.array([f0_min, f0_max, f0_mean, rms_min, rms_max, rms_mean])
  X[count,:] = x
  info = summary.loc[summary['FileName'] == sample.split('.')[0]]
  try:
    Y[count] = info['VoiceVote'].values[0]
  except Exception as ex:
    print(f'info: {info}')
    print(f'index count: {count}')
    print(f'unable to find file: {sample}')
  count+=1
print(f'shape of train data: {X.shape}')
print(f'shape of labels: {Y.shape}')

In [None]:
#For Logistic Regression, can use sklearn.linear_model.LogisticRegression
# !pip install -U scikit-learn
# import sklearn
# from sklearn.linear_model import LogisticRegression

In [None]:
# Get data and labels for training
train_split = int(0.8 * num_files)
print('train_split: ', train_split)
print(f'train size: {X[:train_split].shape}, val size: {X[train_split:].shape}')

model =  SVC()
model.fit(X[:train_split], Y[:train_split])
predictions = model.predict(X[train_split:])
score = model.score(X[train_split:],Y[train_split:])
print(f'Test accuracy score: {score}')

f1 = f1_score(Y[train_split:], predictions, average='macro')
print(f'macro f1 score: {f1}')
f1 = f1_score(Y[train_split:], predictions, average='micro')
print(f'micro f1 score: {f1}')

model2 =  SVC(kernel="linear")
model2.fit(X[:train_split], Y[:train_split])
predictions = model2.predict(X[train_split:])
score = model2.score(X[train_split:],Y[train_split:])
print(f'Test accuracy score, model2: {score}')

f1 = f1_score(Y[train_split:], predictions, average='macro')
print(f'macro f1 score, model2: {f1}')
f1 = f1_score(Y[train_split:], predictions, average='micro')
print(f'micro f1 score, model2: {f1}')


In [None]:
# Some basic stats for the dataset
avg = np.mean(X, axis=0)
stats = ['f0_min', 'f0_max', 'f0_mean', 'rms_min', 'rms_max', 'rms_mean']
for j,stat in enumerate(stats):
    print(f'{stat} average: {avg[j]}')