In [107]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold,train_test_split,cross_val_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier 

import subprocess
import os 

In [108]:
feats_test = False
feats_supplementary = False 

In [109]:
# Test Data 

folder = os.path.join( os.getcwd(), 'test' ) 
test_out = os.path.join( os.getcwd(), 'test.csv')
rscript_file= os.path.join( os.getcwd(), 'features.R')

if feats_test:
    subprocess.call(["Rscript", rscript_file, folder,test_out],stdin=subprocess.PIPE, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
    

In [101]:
# Supplementary Data

male_folder = os.path.join( os.getcwd(), 'male' ) 
male_out = os.path.join( os.getcwd(), 'male.csv')
female_folder = os.path.join( os.getcwd(), 'male' ) 
female_out = os.path.join( os.getcwd(), 'female.csv')
rscript_file= os.path.join( os.getcwd(), 'features.R')

if feats_supplementary:
    subprocess.call(["Rscript", rscript_file, male_folder,male_out],
                    stdin=subprocess.PIPE, 
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT)
    subprocess.call(["Rscript", rscript_file, female_folder,female_out],
                    stdin=subprocess.PIPE, 
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT)
    male = pd.read_csv(male_out,index_col='sound.files')
    male['label'] = 'male'
    female = pd.read_csv(female_out,index_col='sound.files')
    female['label'] = 'female'
    

In [110]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv(test_out,index_col='sound.files')

print(df_train.shape)

if feats_supplementary:
    df_train = pd.concat([df_train,male,female],ignore_index=True)

print(df_train.shape,df_test.shape)
df_test.head(2)

(3168, 21)
(3168, 21) (125, 20)


Unnamed: 0_level_0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,mode,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx
sound.files,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
CEO_F_XRX.wav,0.18806,0.060616,0.192284,0.16828,0.22344,0.05516,3.195981,21.594193,0.931335,0.527679,0.001,0.18806,0.142097,0.047337,0.27907,0.674508,0.0,6.304688,6.304688,0.071972
CEO_M_ACM_20121204.wav,0.158859,0.06413,0.136024,0.116171,0.22224,0.106069,2.603506,12.043091,0.940926,0.547693,0.119521,0.158859,0.11301,0.043235,0.279114,0.323591,0.0,5.03877,5.03877,0.052861


In [111]:
df_train.loc[df_train['label']=="male",'label'] = 0
df_train.loc[df_train['label']=="female",'label'] = 1

scaler = StandardScaler()
scaler.fit(df_train.iloc[:,0:20])
X_train = scaler.transform(df_train.iloc[:,0:20])
X_test = scaler.transform(df_test.iloc[:,0:20])
y_train = list(df_train['label'].values)

In [112]:
# Cross Validation 

classifiers_cv = {}
classifiers_cv['lr'] = LogisticRegression()
classifiers_cv['dtree'] = DecisionTreeClassifier()
classifiers_cv['rf'] = RandomForestClassifier()
classifiers_cv['xgb'] = XGBClassifier()

kfold = KFold(n_splits=10)

for c in classifiers_cv:
    cv_results = cross_val_score(classifiers_cv[c], X_train, y_train, cv=kfold)
    print(c,np.mean(cv_results))
    

lr 0.9586441320928005
dtree 0.9475981312143114
rf 0.9592700555045323
xgb 0.9614822505290899


In [114]:
# Test Data 
classifiers = {}

classifiers['lr'] = LogisticRegression()
classifiers['dtree'] = DecisionTreeClassifier()
classifiers['rf'] = RandomForestClassifier()
classifiers['xgb'] = XGBClassifier()

for c in classifiers:
    classifiers[c].fit(X_train, y_train)
    test[c+'_mprob'] = [ i[0] for i in classifiers[c].predict_proba(X_test) ] 


In [115]:
test.head(20)

Unnamed: 0_level_0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,maxfun,meandom,mindom,maxdom,dfrange,modindx,lr_mprob,dtree_mprob,rf_mprob,xgb_mprob
sound.files,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CEO_F_XRX.wav,0.18806,0.060616,0.192284,0.16828,0.22344,0.05516,3.195981,21.594193,0.931335,0.527679,...,0.27907,0.674508,0.0,6.304688,6.304688,0.071972,0.128026,0.0,0.11,0.007806
CEO_M_ACM_20121204.wav,0.158859,0.06413,0.136024,0.116171,0.22224,0.106069,2.603506,12.043091,0.940926,0.547693,...,0.279114,0.323591,0.0,5.03877,5.03877,0.052861,0.997188,1.0,0.97,0.999978
CEO_M_ACOR_20101101.wav,0.190159,0.074095,0.214538,0.138425,0.250145,0.11172,3.123715,31.386355,0.954016,0.638049,...,0.279114,0.507385,0.0,4.543506,4.543506,0.070798,0.972793,1.0,0.79,0.999061
CEO_M_CEO.wav,0.220645,0.058651,0.238043,0.206487,0.259946,0.05346,2.150894,7.640835,0.913592,0.402908,...,0.279114,0.773843,0.0,3.445312,3.445312,0.121313,0.952276,0.0,0.53,0.985797
CEO_M_DECK_2015.wav,0.196579,0.062538,0.212788,0.148977,0.248844,0.099868,1.778891,7.013549,0.939455,0.463386,...,0.279114,0.432656,0.0,2.562451,2.562451,0.086693,0.990657,1.0,0.93,0.99991
CEO_M_DHI.wav,0.201928,0.054029,0.209587,0.161679,0.249345,0.087666,1.191536,4.081774,0.935225,0.372938,...,0.277358,0.538863,0.0,3.725244,3.725244,0.102359,0.968801,1.0,0.92,0.99913
CEO_M_DPZ.wav,0.165515,0.098971,0.208087,0.067162,0.258046,0.190884,2.935425,14.460266,0.955672,0.686813,...,0.279114,0.748279,0.0,3.552979,3.552979,0.108451,0.999501,1.0,0.69,0.99896
CEO_M_DRE.wav,0.197471,0.080091,0.235942,0.144276,0.255246,0.11097,2.809935,12.015616,0.93682,0.593144,...,0.279114,0.98616,0.0,3.552979,3.552979,0.206006,0.902683,1.0,0.71,0.99851
CEO_M_DVA.wav,0.211143,0.06353,0.231441,0.199436,0.253295,0.05386,2.282766,8.372576,0.919327,0.457822,...,0.279114,0.817628,0.0,4.263574,4.263574,0.111622,0.885345,0.0,0.55,0.966681
CEO_M_DYN.wav,0.195599,0.070461,0.212188,0.178332,0.245844,0.067512,2.506984,16.568872,0.933022,0.476447,...,0.279114,0.413576,0.0,3.251514,3.251514,0.075222,0.878351,0.0,0.55,0.488498
