In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from natsort import natsorted
import h5py
import matplotlib
import sklearn
from sklearn.svm import SVC
from sklearn.preprocessing import MaxAbsScaler
from sklearn.model_selection import KFold, cross_val_score,GridSearchCV,StratifiedKFold

In [None]:
# Import timefrequency representations of each acoustic features
#features shape = (10,315,50) --> 10 features (see features_names)/ 315 different stimuli / 50 values due to time frequency decomposition
acoustic_features = h5py.File("\\data\\acoustics\\TF_representation_acoustic_features.h5", "r")
features = np.asarray(acoustic_features['TF_representation'])
features_names = acoustic_features['TF_representation'].attrs['features_names']
features_freqs = np.asarray([float(i) for i in acoustic_features['TF_representation'].attrs['freqs']])
acoustic_features.close()
# Import relevant information for each acoustic features
features_info = pd.read_csv("data\\acoustics\\info_features.csv")

In [None]:
#create labels for the multiples classifications

labelsclasif = features_info['all_id'].values

# syllabic time scale conditions 3Hz/6Hz/9Hz 
inds = [31, 32,33, 61,62, 63,91, 92,  93]
idxsyll = np.concatenate([np.where(labelsclasif ==i)[0] for i in inds])
labelsyll = np.repeat([1,2,3],105)

# syll*phon  
idxsyllxphon = features_info.sort_values('syll*phon').index.values #env_info.index.values
labelsyllxphon = np.concatenate((np.repeat([1,2,3,4],35),np.repeat(5,70),np.repeat([6,7,8],35)))

# syllabic low
inds = [31, 61,91]
idxsyllow = np.concatenate([np.where(labelsclasif ==i)[0] for i in inds])
labelsyllow = labelsclasif[idxsyllow]

# syllabic mid
inds = [32, 62,92]
idxsyllmid = np.concatenate([np.where(labelsclasif ==i)[0] for i in inds])
labelsyllmid = labelsclasif[idxsyllmid]

# syllabic hig
inds = [33, 63,93]
idxsyllhig = np.concatenate([np.where(labelsclasif ==i)[0] for i in inds])
labelsyllhig = labelsclasif[idxsyllhig]

# 3Hz cond
label3 = labelsclasif[labelsclasif <60]
idx3 = np.where(labelsclasif <60)[0]

# 6Hz cond
idx6 = np.where(np.logical_and(labelsclasif > 40, labelsclasif < 90))[0]
label6 = labelsclasif[idx6]

# 9Hz cond
label9 = labelsclasif[labelsclasif >80]
idx9 = np.where(labelsclasif >80)[0]

In [None]:
# Run the multiple Classifications with nested cross validation (inner and outer cross val) 
# /!\ quite time and ressources consumming! 
#nfolds = 10


LABELS=[labelsyll,labelsyllxphon,labelsyllow,labelsyllmid,labelsyllhig,label3,label6,label9]
INDEXS=[idxsyll,idxsyllxphon,idxsyllow,idxsyllmid,idxsyllhig,idx3,idx6,idx9]


# Number of random trials
NUM_TRIALS = 10

# Set up possible values of parameters to optimize over with gridsearch
p_grid = {"C": [0.5, 1, 10, 100,1000], "gamma": [0.01, 0.1,1,10,100,1000]}

# We will use a Support Vector Classifier
svm = SVC()

#container for accuracy results
acc = np.zeros((features.shape[0],len(LABELS),NUM_TRIALS))

for i in range(features.shape[0]):
    # loop across labels [phon syll syll low syll mid syll hig 3Hz 6Hz 9Hz 
    for v in range(8):
        # MaxAbs scaling to help classification
        trials = MaxAbsScaler().fit_transform(features[i,:,:][INDEXS[v]])
        # Arrays to store scores
        nested_scores = np.zeros(NUM_TRIALS)
        # Loop for each trial
        for j in range(NUM_TRIALS):
            # Choose cross-validation techniques for the inner and outer loops,
            inner_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=j)
            outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=j)
            # Nested CV with parameter optimization
            clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=inner_cv)
            nested_score = cross_val_score(clf, X=trials, y=LABELS[v], cv=outer_cv)
            nested_scores[j] = nested_score.mean()
        acc[i,v,:]=nested_scores       

In [None]:
# create a well organized dataframe
cond = np.repeat([0, 1, 2,3,4,5,6,7],10)
dat = acc.reshape(10,80)
cv_df = pd.DataFrame(dat.T,columns=features_names)
cv_df['cond'] = cond
#cv_df.to_csv("features_accuracy_scores.csv",index=None)