In [2]:
import wave
import sys
import matplotlib.pyplot as plt
from scipy.io.wavfile import read
import librosa.display
import numpy as np
from scipy import stats
from spafe.frequencies.fundamental_frequencies import FundamentalFrequenciesExtractor
from spafe.frequencies.dominant_frequencies import get_dominant_frequencies 
from scipy.fft import fft,fftfreq,fftshift
import librosa
import os
import pandas as pd
from tqdm import tqdm
import re

In [10]:
def spectral_properties(y: np.ndarray, fs: int) -> dict:
    """ This function takes signal and its rate,
    Outputs the feature values which we are going to explain here"""
    spec = np.abs(np.fft.rfft(y)) # Uses one dimensional Fast Furie Transformation and finds the absolute value of it 
    freq = np.fft.rfftfreq(len(y), d=1 / fs) # Discrete fast furie transformation
    amp = spec / spec.sum()
    mean = (freq * amp).sum()
    sd = np.sqrt(np.sum(amp * ((freq - mean) ** 2)))
    amp_cumsum = np.cumsum(amp)
    median = freq[len(amp_cumsum[amp_cumsum <= 0.5]) + 1]
    mode_val = freq[amp.argmax()]
    
    Q25 = freq[len(amp_cumsum[amp_cumsum <= 0.25]) + 1]
    Q75 = freq[len(amp_cumsum[amp_cumsum <= 0.75]) + 1]
    IQR = Q75 - Q25
    z = amp - amp.mean()
    w = amp.std()
    skew = ((z ** 3).sum() / (len(spec) - 1)) / w ** 3
    kurt = ((z ** 4).sum() / (len(spec) - 1)) / w ** 4
    sp_entr = -np.sum(amp*  np.log(amp))/np.log(len(amp))
    
    spectral_flatness = np.mean(librosa.feature.spectral_flatness(y,))
    
    fft_mode = np.mean(stats.mode(freq).mode)
    normalized_frequencies = np.linspace(0, 1, len(spec))
    spectral_centroid = np.sum(amp * normalized_frequencies)
    
    peakf = max(freq)
    
    fund_freqs_extractor = FundamentalFrequenciesExtractor(debug = False)
    pitches, harmonic_rates, argmins, times = fund_freqs_extractor.main(sig = y,fs = fs)
    meanf = np.mean(pitches)
    minf = min(pitches)
    maxf = max(pitches)
    
    
    dom_freq = get_dominant_frequencies(sig = np.array(y).reshape(-1,1),fs = int(fs),
                                        lower_cutoff = 50,
                                        upper_cutoff = 30000,
                                        nfft = 512,
                                        win_len = 0.02,
                                        win_hop = 0.015,
                                        win_type = 'hamming',
                                        debug = False)
    diff = np.abs(dom_freq[:-1] - dom_freq[1:])
    min_max = dom_freq.max() - dom_freq.min()
    if min_max == 0:
        mod_id = 0
    else:
        mod_id = diff.mean()/min_max
    
    dom_mean = dom_freq.mean()
    dom_min = dom_freq.min()
    dom_max = dom_freq.max()
    dom_max_min = dom_max - dom_min
    
    
    result_d = {
        'mean': mean,
        'sd': sd,
        'median': median,
        'mode_val': mode_val,
        'Q25': Q25/fs,
        'Q75': Q75,
        'IQR': IQR,
        'skew': skew,
        'kurt': kurt,
        'sp.ent':sp_entr,
        'sfm' : spectral_flatness,
        'fft_mode' : fft_mode,
        'centroid' : spectral_centroid,
        'peakf' : peakf,
        'meanf' : meanf,
        'minf' : minf,
        'maxf' : maxf,
        'modid' : mod_id,
        'dom_mean' : dom_mean,
        'dom_max' : dom_max,
        'dom_min' : dom_min,
        'dom_max_min' : dom_max_min
    }

    return result_d

In [12]:
df_info = pd.read_csv('../CREMA-D/SentenceFilenames.csv')
df_demo = pd.read_csv('../CREMA-D/VideoDemographics.csv')

In [13]:
def find_sex(id : np.int64):
    return df_demo[df_demo["ActorID"]==id]["Sex"]

In [17]:
list_of_voices = []

data_folder = '../CREMA-D/AudioWAV/'

for each in tqdm(os.listdir(data_folder)):

    list_of_voices.append(librosa.load(data_folder + each))



  return f(*args, **kwargs)
  0%|                                                  | 0/7442 [00:00<?, ?it/s]


NoBackendError: 

In [None]:
import warnings
warnings.filterwarnings('ignore')

data_folder = '../CREMA-D/AudioWAV/'
list_of_data = []
names = []
labels = []
for each in tqdm(os.listdir(data_folder)):

    sample, sample_rate = librosa.load(data_folder + each)
    smp = spectral_properties(sample,sample_rate)
    names = list(smp.keys()) + ["label"]
    lb = find_sex(int(re.findall(r'\d+', each)[0])).values[0]
    labels.append(lb)
    list_of_data.append(list(smp.values()) + [lb])



In [68]:
df = pd.DataFrame(data = list_of_data,columns=names)

In [None]:
df.to_csv("ExtractedFeatures.csv")

## Trying Different Models

In [None]:
# def classify(model,x_train,y_train,x_test,y_test):
#     from sklearn.metrics import classification_report
#     target_names = ['female', 'male']
#     model.fit(x_train,y_train)
#     y_pred=model.predict(x_test)
#     print(classification_report(y_test, y_pred, target_names=target_names, digits=4))

In [None]:
df = pd.read_csv("ExtractedFeatures.csv")
df.fillna(1e9,inplace = True)

In [None]:
X = df.drop(["label"],axis = 1)
y = df["label"]

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

Random Forest

In [62]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
model.score(X_test,y_test)
# classify(model,X_train,y_train,X_test,y_test)

0.7356260075228371

In [61]:
tuned_parameters = {'bootstrap': [True, False],
              'max_depth': [20,  40,  60, None],
              'max_features': ['auto', 'sqrt'],
              'min_samples_split': [2, 5, 10],
              'n_estimators': [ 200, 400, 600]}
cv = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5, scoring='accuracy', return_train_score=True)
cv.fit(X_train, y_train)

bootstrap = cv.best_params_['bootstrap']
max_depth = cv.best_params_['max_depth']
max_features = cv.best_params_['max_features']
min_samples_split = cv.best_params_['min_samples_split']
n_estimators = cv.best_params_['n_estimators']

model_rf = RandomForestClassifier(bootstrap = bootstrap, max_depth  = max_depth, max_features = max_features, min_samples_split = min_samples_split, n_estimators = n_estimators )
model_rf.fit(X_train, y_train)
model_rf.score(X_test,y_test)

0.7367006985491671

XGB

In [24]:
model = XGBClassifier()
model.fit(X_train, y_train)
model.score(X_test,y_test)
# classify(model,X_train,y_train,X_test,y_test)





0.754433100483611

In [51]:
tuned_parameters = [{'learning_rate':[0.1, 0.01],
                     'max_depth':[ 6, 7, 8 ],
                     'min_child_weight':[1, 2, 3, 4]}]
cv1 = GridSearchCV(XGBClassifier(), tuned_parameters, cv=5, scoring='accuracy', return_train_score=True)
cv1.fit(X_train, y_train)

learning_rate = clf.best_params_['learning_rate']
max_depth = clf.best_params_['max_depth']
min_child_weight = clf.best_params_['min_child_weight']
model_xgb = XGBClassifier(learning_rate = learning_rate, max_depth  = max_depth, min_child_weight = min_child_weight)
model_xgb.fit(X_train, y_train)
model_xgb.score(X_test,y_test)





































































































































































































































































































































































































































































































0.7517463729177861

KNN

In [31]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)
model.score(X_test,y_test)

0.6195593766792047

In [49]:
tuned_parameters = [{'n_neighbors':[4,5,6],
                     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']}]
cv2 = GridSearchCV(KNeighborsClassifier(), tuned_parameters, cv=5, scoring='accuracy', return_train_score=True)
cv2.fit(X_train, y_train)

n_neighbors = cv2.best_params_['n_neighbors']
algorithm = cv2.best_params_['algorithm']

model_knn = KNeighborsClassifier(n_neighbors = n_neighbors, algorithm = algorithm)
model_knn.fit(X_train, y_train)
model_knn.score(X_test,y_test)

0.6195593766792047