# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 

In [81]:
!source myenv/bin/activate

In [82]:
# samples in 5 seconds of audio, 16 KHz sample rate 
LENGTH_CHOSEN =  80000

In [83]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold


from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam 
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

[]

# Utils

In [84]:
def load_files(df):
    X = []
    for i in tqdm(df['path']): 
        X.append(librosa.load(i, res_type='kaiser_fast', sr=16000))
    return X

def extract_samples(X): 
    samples = []
    for ind,i in enumerate(X):
        samples.append(i[0])
    return samples 

def extract_labels(df): 
    labels = df['emotion_label'].copy()
    return labels 

def compute_lengths(samples): 
    lengths = [len(x) for x in samples]
    return lengths 

def check_outliers(lengths):
    # outliers
    lengths = np.array(lengths)
    print((lengths > 300000).sum())
    new_lengths = lengths[lengths < 300000]
    return new_lengths 

def compute_mean_length(lengths): 
    return lengths.mean()

def cut_and_pad(samples, labels, length_chosen = LENGTH_CHOSEN): 
    X_new = []
    y_new = []
    count = 0 
    for ind,i in enumerate(samples):
        if i.shape[0] < 300000:
            if i.shape[0] > length_chosen:
                new = i[:length_chosen]
                X_new.append(new)
            elif i.shape[0] < length_chosen:
                new = np.pad(i,math.ceil((length_chosen-i.shape[0])/2), mode='median')
                X_new.append(new)
            else:
                X_new.append(i)
            y_new.append(labels[count])
        count+=1
    
    return X_new, y_new
    
def compute_mfccs(samples, n_mfcc): 
    mfccs = []
    for i in tqdm(samples):
        mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=n_mfcc)
        mfcc = mfcc.T
        mfcc = np.array(mfcc)
        mfccs.append(mfcc[:, 1:]) # get rid of the first component 
    mfccs = np.array(mfccs)
    return mfccs


def feature_extractor(df_train, df_val, df_test, n_mfcc): 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    
    
  
    load_val = load_files(df_val)
    samples_val = extract_samples(load_val)
    labels_val = extract_labels(df_val)
    samples_val, labels_val = cut_and_pad(samples_val, labels_val)
    samples_val = np.array(samples_val)
    labels_val = np.array(labels_val)
    mfccs_val = compute_mfccs(samples_val, n_mfcc = n_mfcc)
    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    

    return mfccs_train, labels_train,  mfccs_val, labels_val, mfccs_test, labels_test
    

def feature_extractor_tess(df_train,  df_test, n_mfcc): 
    # we do not have the validation set here 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    
    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    
    return mfccs_train, labels_train, mfccs_test, labels_test
    
def encode_labels(labels_train, labels_val, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    y_val = pd.Series(labels_val).map(emotion_enc)
    return y_train, y_val, y_test 


def encode_labels_tess(labels_train, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    return y_train, y_test
    
def standard_scaling(X_train, X_val, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    return X_train, X_val, X_test, scaler 
    
def standard_scaling_tess(X_train, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    return X_train, X_test, scaler   
    
# Data Augmentation 
'''
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

# Data Augmentation 
def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)


def compute_mfccs_augmentation(samples, labels): 
    mfccs = []
    counter = 0 
    for i in tqdm(samples):

       # Weiner Filtering on original noise 
        samples_weiner = scipy.signal.wiener(i)
        is_fin = np.isfinite(samples_weiner).all()


        # Data Augmentation - Noise 
        noise_audio = noise(samples_weiner)

        # Data Augmentation - Pitch 
        pitch_audio = pitch(samples_weiner, sampling_rate=16000)


        # Data Augmentation -  pitch + noise 
        pn = pitch(noise_audio, sampling_rate = 16000)


        if is_fin: 
          # MFCC

          mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=13)
          mfcc = mfcc.T
          mfccs.append(mfcc[:, 1:])

          mfcc_augmented = librosa.feature.mfcc(y=samples_weiner, sr=16000, n_mfcc=13)
          mfcc_augmented = mfcc_augmented.T
          mfccs.append(mfcc_augmented[:, 1:])

          mfcc_augmented_pitch = librosa.feature.mfcc(y=noise_audio, sr=16000, n_mfcc=13)
          mfcc_augmented_pitch = mfcc_augmented_pitch.T
          mfccs.append(mfcc_augmented_pitch[:, 1:])

          mfcc_augmented_p = librosa.feature.mfcc(y=pitch_audio, sr=16000, n_mfcc=13)
          mfcc_augmented_p = mfcc_augmented_p.T
          mfccs.append(mfcc_augmented_p[:, 1:]) 

          mfcc_augmented_pn = librosa.feature.mfcc(y=pn, sr=16000, n_mfcc=13)
          mfcc_augmented_pn = mfcc_augmented_pn.T
          mfccs.append(mfcc_augmented_pn[:, 1:]) 
    
    mfccs = np.array(mfccs)
    
    # Copy labels 
    y_prov = []
    y = labels 
    for i in range(len(y)): 
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
    y = np.asarray(y_prov)

    return mfccs, y 


'''


'\ndef noise(data):\n    noise_amp = 0.035*np.random.uniform()*np.amax(data)\n    data = data + noise_amp*np.random.normal(size=data.shape[0])\n    return data\n\n# Data Augmentation \ndef pitch(data, sampling_rate, pitch_factor=0.7):\n    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)\n\n\ndef compute_mfccs_augmentation(samples, labels): \n    mfccs = []\n    counter = 0 \n    for i in tqdm(samples):\n\n       # Weiner Filtering on original noise \n        samples_weiner = scipy.signal.wiener(i)\n        is_fin = np.isfinite(samples_weiner).all()\n\n\n        # Data Augmentation - Noise \n        noise_audio = noise(samples_weiner)\n\n        # Data Augmentation - Pitch \n        pitch_audio = pitch(samples_weiner, sampling_rate=16000)\n\n\n        # Data Augmentation -  pitch + noise \n        pn = pitch(noise_audio, sampling_rate = 16000)\n\n\n        if is_fin: \n          # MFCC\n\n          mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=13)\n          mf

# Compute dataframes for datasets and split in Train, Val, Test 

In [85]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

## RADVESS

In [86]:
lst = []
emotion = []
voc_channel = []
full_path = []
modality = []
intensity = []
actors = []
phrase =[]

for root, dirs, files in tqdm(os.walk(RAV)):
    for file in files:
        try:
            #Load librosa array, obtain mfcss, store the file and the mfcss information in a new array
            # X, sample_rate = librosa.load(os.path.join(root,file), res_type='kaiser_fast')
            # mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
            # The instruction below converts the labels (from 1 to 8) to a series from 0 to 7
            # This is because our predictor needs to start from 0 otherwise it will try to predict also 0.
           
            modal = int(file[1:2])
            vchan = int(file[4:5])
            lab = int(file[7:8])
            ints = int(file[10:11])
            phr = int(file[13:14])
            act = int(file[18:20])
            # arr = mfccs, lab
            # lst.append(arr)
            
            modality.append(modal)
            voc_channel.append(vchan)
            emotion.append(lab) #only labels
            intensity.append(ints)
            phrase.append(phr)
            actors.append(act)
            
            full_path.append((root, file)) # only files
          # If the file is not valid, skip it
        except ValueError:
            continue

25it [00:00, 824.03it/s]


In [87]:
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# merge neutral and calm
emotions_list = ['neutral', 'neutral', 'happy', 'sadness', 'angry', 'fear', 'disgust', 'surprise']
emotion_dict = {em[0]+1:em[1] for em in enumerate(emotions_list)}

df = pd.DataFrame([emotion, voc_channel, modality, intensity, actors, actors,phrase, full_path]).T
df.columns = ['emotion', 'voc_channel', 'modality', 'intensity', 'actors', 'gender', 'phrase', 'path']
df['emotion'] = df['emotion'].map(emotion_dict)
df['voc_channel'] = df['voc_channel'].map({1: 'speech', 2:'song'})
df['modality'] = df['modality'].map({1: 'full AV', 2:'video only', 3:'audio only'})
df['intensity'] = df['intensity'].map({1: 'normal', 2:'strong'})
df['actors'] = df['actors']
df['gender'] = df['actors'].apply(lambda x: 'female' if x%2 == 0 else 'male')
df['phrase'] = df['phrase'].map({1: 'Kids are talking by the door', 2:'Dogs are sitting by the door'})
df['path'] = df['path'].apply(lambda x: x[0] + '/' + x[1])

In [88]:
# remove files with noise to apply the same noise to all files for data augmentation 
df = df[~df.path.str.contains('noise')]

In [89]:
df.head()

Unnamed: 0,emotion,voc_channel,modality,intensity,actors,gender,phrase,path
0,disgust,speech,audio only,normal,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [90]:
# only speech
RAV_df = df
RAV_df = RAV_df.loc[RAV_df.voc_channel == 'speech']

In [91]:
RAV_df.insert(0, "emotion_label", RAV_df.emotion, True)

In [92]:
RAV_df = RAV_df.drop(['emotion', 'voc_channel', 'modality', 'intensity', 'phrase'], 1)

In [93]:
RAV_df

Unnamed: 0,emotion_label,actors,gender,path
0,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
...,...,...,...,...
2871,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2873,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2875,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2877,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [94]:
RAV_train = []
RAV_val = []
RAV_test = []

In [95]:
for index, row in RAV_df.iterrows():
    if row['actors'] in range(1,21): 
        RAV_train.append(row) 
    elif row['actors'] in range(21,23): 
        RAV_val.append(row)
    elif row['actors'] in range(23,25): 
        RAV_test.append(row)
len(RAV_train), len(RAV_val), len(RAV_test)

(1200, 120, 120)

In [96]:
RAV_train = pd.DataFrame(RAV_train)
RAV_val = pd.DataFrame(RAV_val)
RAV_test = pd.DataFrame(RAV_test)

In [97]:
RAV_train = RAV_train.drop(['actors'], 1)
RAV_val = RAV_val.drop(['actors'], 1)
RAV_test = RAV_test.drop(['actors'], 1)

In [98]:
RAV_train.reset_index(drop=True, inplace = True) 
RAV_val.reset_index(drop=True, inplace = True) 
RAV_test.reset_index(drop=True, inplace = True ) 

## SAVEE

In [99]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
actors = []
gender = []
for i in dir_list:
    actors.append(i[:2])
    if i[-8:-6]=='_a':
        emotion.append('angry')
        gender.append('male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
        gender.append('male')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        gender.append('male')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        gender.append('male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
        gender.append('male')
    elif i[-8:-6]=='sa':
        emotion.append('sadness')
        gender.append('male')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        gender.append('male') 
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
                      
SAVEE_df = pd.concat([SAVEE_df,
                      pd.DataFrame(actors, columns = ['actors']),
                      pd.DataFrame(gender, columns = ['gender']), 
                      pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

neutral     120
sadness      60
surprise     60
happy        60
disgust      60
fear         60
angry        60
Name: emotion_label, dtype: int64

In [100]:
SAVEE_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,neutral,DC,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [101]:
SAVEE_train = []
SAVEE_val = []
SAVEE_test = []

In [102]:
#DC, JE, JK, KL
for index, row in SAVEE_df.iterrows(): 
    if row['actors'] == 'DC' or row ['actors'] == 'JE':
        SAVEE_train.append(row)
    elif row['actors'] == 'JK': 
        SAVEE_val.append(row)
    else: 
        SAVEE_test.append(row)
len(SAVEE_train), len(SAVEE_val), len(SAVEE_test)

(240, 120, 120)

In [103]:
SAVEE_train = pd.DataFrame(SAVEE_train)
SAVEE_val = pd.DataFrame(SAVEE_val)
SAVEE_test = pd.DataFrame(SAVEE_test)

In [104]:
SAVEE_train = SAVEE_train.drop(['actors'], 1)
SAVEE_val = SAVEE_val.drop(['actors'], 1)
SAVEE_test = SAVEE_test.drop(['actors'], 1)

In [105]:
SAVEE_train = SAVEE_train.reset_index(drop=True) 
SAVEE_val = SAVEE_val.reset_index(drop=True) 
SAVEE_test = SAVEE_test.reset_index(drop=True) 

## TESS

In [106]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []
gender = []
actors = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry':
            emotion.append('angry')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_angry': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_disgust' :
            emotion.append('disgust')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_disgust': 
            emotion.append('disgust')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_Fear':
            emotion.append('fear')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_fear': 
            emotion.append('fear')
            gender.append('female')
            actors.append('YAF') 
            
            
        elif i == 'OAF_happy' :
            emotion.append('happy')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_happy': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_neutral':
            emotion.append('neutral')
            gender.append('female')
            actors.append('OAF')   
        elif i == 'YAF_neutral': 
            emotion.append('neutral')
            gender.append('female')
            actors.append('YAF')      
            
                
        elif i == 'OAF_Pleasant_surprise':
            emotion.append('surprise')
            gender.append('female')
            actors.append('OAF')
        
        elif i == 'YAF_pleasant_surprised': 
            emotion.append('surprise')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_Sad':
            emotion.append('sadness')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_sad': 
            emotion.append('sadness')
            gender.append('female')
            actors.append('YAF')            
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns = ['gender']), 
                     pd.DataFrame(actors, columns= ['actors']),
                     pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

angry       1200
fear         800
surprise     800
sadness      800
disgust      800
neutral      800
happy        400
Name: emotion_label, dtype: int64

In [107]:
TESS_df= TESS_df[~TESS_df.path.str.contains('noise')]

In [108]:
TESS_train = []
TESS_test = []

In [109]:
for index, row in TESS_df.iterrows(): 
    if row['actors'] == 'YAF': 
        TESS_train.append(row)
    else: 
        TESS_test.append(row)
len(TESS_train), len(TESS_test)

(1400, 1400)

In [110]:
TESS_train = pd.DataFrame(TESS_train)
TESS_test = pd.DataFrame(TESS_test)

In [111]:
TESS_train = TESS_train.reset_index(drop=True) 
TESS_test  = TESS_test.reset_index(drop=True) 

## CREMA-D

In [112]:
males = [1,
5,
11,
14,
15,
16,
17,
19,
22,
23,
26,
27,
31,
32,
33,
34,
35,
36,
38,
39,
41,
42,
44,
45,
48,
50,
51,
57,
59, 
62, 
64,
65, 
66,
67,
68,
69,
70,
71,
77, 
80, 
81, 
83, 
85, 
86, 
87,
88, 
90]

In [113]:
females = [ 2,
3,
4,
6,
7,
8,
9,
10,
12,
13,
18,
20,
21,
24,
25,
28,
29,
30,
37,
40,
43,
46,
47,
49,
52,
53,
54,
55,
56, 
58, 
60,
61,
63,
72, 
73, 
74, 
75, 
76, 
78, 
79, 
82, 
84, 
89, 
91]

In [114]:
crema_directory_list = os.listdir(CREMA)

file_emotion = []
file_path = []
actors = []
gender = []




for file in crema_directory_list:

    # storing file emotions
    part=file.split('_')
    
    # use only high intensity files
    if "HI" in part[3] :
        actor = part[0][2:]
        actors.append(actor)
        if int(actor) in males:
            gender.append('male')
        else: 
            gender.append('female')
    
        # storing file paths
        file_path.append(CREMA + file)
        if part[2] == 'SAD':
            file_emotion.append('sadness')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['emotion_label'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['path'])
actors_df = pd.DataFrame(actors, columns=['actors'])
gender_df = pd.DataFrame(gender, columns=['gender'])                      
Crema_df = pd.concat([emotion_df, actors_df, gender_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,happy,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,angry,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,fear,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [115]:
Crema_df.shape

(455, 4)

In [116]:
actor_files = {}

for index, row in Crema_df.iterrows():
    actor = row['actors']
    if actor not in actor_files.keys(): 
        actor_files[actor] = 1
    else: 
        actor_files[actor]+=1

In [117]:
actor_files

{'91': 5,
 '90': 5,
 '89': 5,
 '88': 5,
 '87': 5,
 '86': 5,
 '85': 5,
 '84': 5,
 '83': 5,
 '82': 5,
 '81': 5,
 '80': 5,
 '79': 5,
 '78': 5,
 '77': 5,
 '76': 5,
 '75': 5,
 '74': 5,
 '73': 5,
 '72': 5,
 '71': 5,
 '70': 5,
 '69': 5,
 '68': 5,
 '67': 5,
 '66': 5,
 '65': 5,
 '64': 5,
 '63': 5,
 '62': 5,
 '61': 5,
 '60': 5,
 '59': 5,
 '58': 5,
 '57': 5,
 '56': 5,
 '55': 5,
 '54': 5,
 '53': 5,
 '52': 5,
 '51': 5,
 '50': 5,
 '49': 5,
 '48': 5,
 '47': 5,
 '46': 5,
 '45': 5,
 '44': 5,
 '43': 5,
 '42': 5,
 '41': 5,
 '40': 5,
 '39': 5,
 '38': 5,
 '37': 5,
 '36': 5,
 '35': 5,
 '34': 5,
 '33': 5,
 '32': 5,
 '31': 5,
 '30': 5,
 '29': 5,
 '28': 5,
 '27': 5,
 '26': 5,
 '25': 5,
 '24': 5,
 '23': 5,
 '22': 5,
 '21': 5,
 '20': 5,
 '19': 5,
 '18': 5,
 '17': 5,
 '16': 5,
 '15': 5,
 '14': 5,
 '13': 5,
 '12': 5,
 '11': 5,
 '10': 5,
 '09': 5,
 '08': 5,
 '07': 5,
 '06': 5,
 '05': 5,
 '04': 5,
 '03': 5,
 '02': 5,
 '01': 5}

In [118]:
count_males = 0 
count_females = 0 
male_list = []
for index, row in Crema_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1

In [119]:
count_males, count_females

(235, 220)

Since there are more males than females we will remove randomly 3 male actors (since there are exactly 5 audio files per actor)

In [120]:
import random 
random.seed(42)
males_to_remove = random.sample(male_list, 3)
males_to_remove

['17', '80', '88']

In [121]:
new_df = []
for index, row in Crema_df.iterrows(): 
    if row['actors'] not in males_to_remove: 
        new_df.append(row)

In [122]:
CREMA_df = pd.DataFrame(new_df)

In [123]:
for index, row in CREMA_df.iterrows(): 
    if row['actors'] == '17': 
        print("Elements not removed")

In [124]:
count_males = 0 
count_females = 0 
male_list = []
female_list = []
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1
        if actor not in female_list: 
            female_list.append(actor)

In [125]:
count_males, count_females

(220, 220)

In [126]:
len(female_list)

44

In [127]:
len(male_list)

44

In [128]:
CREMA_train = []
CREMA_val = []
CREMA_test = []

In [129]:
females_train = random.sample(female_list, 32)
males_train = random.sample(male_list, 32)

# remove the elements assigned to train 
for element in females_train:
    if element in female_list:
        female_list.remove(element)
        
for element in males_train:
    if element in male_list:
        male_list.remove(element)

         
females_val = random.sample(female_list, 6) 
males_val = random.sample(male_list, 6) 

# remove the elements assigned to val
for element in females_val:
    if element in female_list:
        female_list.remove(element)
        
for element in males_val:
    if element in male_list:
        male_list.remove(element)
        
females_test = random.sample(female_list, 6) 
males_test = random.sample(male_list, 6)        

In [130]:
females_train, males_train, females_val, males_val, females_test, males_test

(['54',
  '56',
  '58',
  '74',
  '76',
  '13',
  '78',
  '29',
  '84',
  '89',
  '09',
  '60',
  '04',
  '55',
  '52',
  '91',
  '02',
  '07',
  '46',
  '49',
  '37',
  '10',
  '20',
  '75',
  '21',
  '53',
  '06',
  '28',
  '18',
  '63',
  '30',
  '03'],
 ['57',
  '69',
  '65',
  '45',
  '77',
  '81',
  '41',
  '15',
  '44',
  '23',
  '59',
  '86',
  '34',
  '01',
  '85',
  '66',
  '31',
  '33',
  '05',
  '48',
  '50',
  '67',
  '51',
  '22',
  '36',
  '87',
  '71',
  '39',
  '42',
  '11',
  '32',
  '14'],
 ['43', '61', '40', '47', '73', '24'],
 ['62', '68', '64', '83', '70', '26'],
 ['08', '79', '12', '25', '72', '82'],
 ['16', '19', '38', '35', '27', '90'])

In [131]:
train = females_train + males_train 
val = females_val + males_val 
test = females_test + males_test

In [132]:
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if actor in train: 
        CREMA_train.append(row)
    elif actor in val: 
        CREMA_val.append(row)
    else:
        CREMA_test.append(row)

In [133]:
CREMA_train = pd.DataFrame(CREMA_train) 
CREMA_val = pd.DataFrame(CREMA_val) 
CREMA_test = pd.DataFrame(CREMA_test)

In [134]:
CREMA_train.shape, CREMA_val.shape, CREMA_test.shape

((320, 4), (60, 4), (60, 4))

In [135]:
CREMA_train = CREMA_train.reset_index(drop=True) 
CREMA_val = CREMA_val.reset_index(drop = True) 

# Model

In [136]:
def create_model( init_mode='glorot_uniform', lr = 0.001, input_dim=(157, 25)):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=input_dim, kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.5))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

# Experiment 3.1 : RAVDESS

In [137]:
df_train = RAV_train
df_val = RAV_val
df_test = RAV_test

In [138]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

In [139]:
df_train.head()

Unnamed: 0,emotion_label,gender,path
0,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


## Feature Extraction

In [140]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|██████████████████████████████████████| 1200/1200 [00:02<00:00, 574.87it/s]
100%|███████████████████████████████████████| 1200/1200 [00:24<00:00, 49.07it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1246.98it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 60.15it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1356.12it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 68.38it/s]


In [141]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [142]:
np.size(y_val)

120

In [143]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [144]:
X_train.shape

(1200, 157, 25)

## Shuffle training data

In [145]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [146]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_1.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [65]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [66]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-15 16:23:19.898354: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-15 16:23:19.898682: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-15 16:23:19.945768: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-15 16:23:19.945803: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-15 16:23:19.994297: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
100/100 - 4s - loss: 0.9377 - accuracy: 0.5600
Epoch 1/50
100/100 - 5s - loss: 2.9155 - accuracy: 0.5188
Epoch 1/50
100/100 - 5s - loss: 0.9253 - accuracy: 0.5688
Epoch 1/50
100/100 - 5s - loss: 1.2704 - accuracy: 0.5163
Epoch 1/50
100/100 - 5s - loss: 1.1782 - accuracy: 0.5250
Epoch 1/50
100/100 - 6s - loss: 1.1333 - accuracy: 0.5375
Epoch 1/50
100/100 - 6s - loss: 2.5745 - accuracy: 0.5138
Epoch 1/50
100/100 - 6s - loss: 1.0059 - accuracy: 0.5512
Epoch 2/50
100/100 - 4s - loss: 0.6701 - accuracy: 0.6338
Epoch 2/50
100/100 - 4s - loss: 2.2129 - accuracy: 0.5562
Epoch 2/50
100/100 - 4s - loss: 0.9243 - accuracy: 0.5587
Epoch 2/50
100/100 - 4s - loss: 1.0198 - accuracy: 0.5650
Epoch 2/50
100/100 - 5s - loss: 0.6430 - accuracy: 0.6488
Epoch 2/50
100/100 - 4s - loss: 0.9344 - accuracy: 0.5700
Epoch 2/50
100/100 - 4s - loss: 0.6501 - accuracy: 0.6488
Epoch 2/50
100/100 - 4s - loss: 1.8843 - accuracy: 0.5387
Epoch 3/50
100/100 - 4s - loss: 0.6365 - accuracy: 0.6475
Epoch 3/50
100

2021-09-15 16:37:33.830931: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-15 16:37:33.831550: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-15 16:37:34.075029: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-15 16:37:34.093569: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/50
150/150 - 1s - loss: 0.8781 - accuracy: 0.5883
Epoch 2/50
150/150 - 1s - loss: 0.6650 - accuracy: 0.6408
Epoch 3/50
150/150 - 1s - loss: 0.6169 - accuracy: 0.6742
Epoch 4/50
150/150 - 1s - loss: 0.5652 - accuracy: 0.7008
Epoch 5/50
150/150 - 1s - loss: 0.5774 - accuracy: 0.7042
Epoch 6/50
150/150 - 1s - loss: 0.5220 - accuracy: 0.7442
Epoch 7/50
150/150 - 1s - loss: 0.5113 - accuracy: 0.7558
Epoch 8/50
150/150 - 1s - loss: 0.4743 - accuracy: 0.7667
Epoch 9/50
150/150 - 1s - loss: 0.4621 - accuracy: 0.7833
Epoch 10/50
150/150 - 1s - loss: 0.4087 - accuracy: 0.8008
Epoch 11/50
150/150 - 1s - loss: 0.4163 - accuracy: 0.8042
Epoch 12/50
150/150 - 1s - loss: 0.3776 - accuracy: 0.8317
Epoch 13/50
150/150 - 1s - loss: 0.3773 - accuracy: 0.8408
Epoch 14/50
150/150 - 1s - loss: 0.3353 - accuracy: 0.8367
Epoch 15/50
150/150 - 1s - loss: 0.3333 - accuracy: 0.8550
Epoch 16/50
150/150 - 1s - loss: 0.3304 - accuracy: 0.8550
Epoch 17/50
150/150 - 1s - loss: 0.3136 - accuracy: 0.8658
Epoch 

In [69]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8658333420753479 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8217, std=0.01434 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8658, std=0.01359 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7908, std=0.01312 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.84, std=0.0108 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8483, std=0.01586 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.815, std=0.01429 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7375, std=0.01275 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8583, std=0.002357 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8575, std=0.009354 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8525, std=0.004082 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_siz

## Train with best parameters

In [70]:
# Best Accuracy 0.8658333420753479 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [71]:
seed = 7
np.random.seed(seed)

In [72]:
model = create_model()

In [73]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [74]:
import datetime, os

In [75]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [76]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-15 16:41:25.791476: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-15 16:41:25.791506: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-15 16:41:25.843205: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [77]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [78]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 32/150 [=====>........................] - ETA: 0s - loss: 1.6999 - accuracy: 0.5199

2021-09-15 16:41:32.206645: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-15 16:41:32.206668: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-15 16:41:32.216560: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-15 16:41:32.219486: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-15 16:41:32.223339: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210915-164125/train/plugins/profile/2021_09_15_16_41_32
2021-09-15 16:41:32.224146: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210915-164125/train/plugins/profile/2021_09_15_16_41_32/helemanc-Latitude-5410.trace.json.gz
2021-09-15 16:41:32.229913: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210915-164125/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500

Epoch 00038: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500

Epoch 00042: ReduceLROnPlateau reducing learning rate to 1.56250007421

In [79]:
%tensorboard --logdir logs

In [80]:
model.evaluate(X_test, y_test, batch_size=8)



[0.4025122821331024, 0.8916666507720947]

In [81]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.86      0.91      0.89        56
           1       0.92      0.88      0.90        64

    accuracy                           0.89       120
   macro avg       0.89      0.89      0.89       120
weighted avg       0.89      0.89      0.89       120



## Save best model 

In [82]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_1")

2021-09-15 16:43:48.496280: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_1/assets


# Experiment 3.2 : RAVDESS noise

## Read dataframes

In [147]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [149]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|█████████████████████████████████████| 2400/2400 [00:02<00:00, 1023.47it/s]
100%|███████████████████████████████████████| 2400/2400 [01:06<00:00, 36.35it/s]
100%|████████████████████████████████████████| 120/120 [00:00<00:00, 988.06it/s]
100%|█████████████████████████████████████████| 120/120 [00:02<00:00, 50.95it/s]
100%|████████████████████████████████████████| 120/120 [00:00<00:00, 817.24it/s]
100%|█████████████████████████████████████████| 120/120 [00:02<00:00, 53.99it/s]


In [150]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [151]:
np.size(y_val)

120

In [152]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [153]:
X_train.shape

(2400, 157, 25)

## Shuffle training data

In [154]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [155]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_2.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [90]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [91]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-15 16:47:28.456664: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-15 16:47:28.456900: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-15 16:47:28.463650: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-15 16:47:28.463791: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-15 16:47:28.472611: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
200/200 - 8s - loss: 0.9037 - accuracy: 0.5487
Epoch 1/50
200/200 - 8s - loss: 1.2370 - accuracy: 0.5325
Epoch 1/50
200/200 - 8s - loss: 3.0378 - accuracy: 0.5231
Epoch 1/50
200/200 - 9s - loss: 0.8634 - accuracy: 0.5369
Epoch 1/50
200/200 - 9s - loss: 1.2810 - accuracy: 0.5156
Epoch 1/50
200/200 - 9s - loss: 3.1534 - accuracy: 0.5163
Epoch 1/50
200/200 - 9s - loss: 1.3522 - accuracy: 0.5256
Epoch 1/50
200/200 - 9s - loss: 0.9378 - accuracy: 0.5419
Epoch 2/50
200/200 - 8s - loss: 0.6729 - accuracy: 0.6100
Epoch 2/50
200/200 - 8s - loss: 0.8694 - accuracy: 0.5738
Epoch 2/50
200/200 - 8s - loss: 1.9227 - accuracy: 0.5425
Epoch 2/50
200/200 - 8s - loss: 0.6991 - accuracy: 0.5625
Epoch 2/50
200/200 - 8s - loss: 2.0534 - accuracy: 0.5375
Epoch 2/50
200/200 - 8s - loss: 0.9294 - accuracy: 0.5444
Epoch 2/50
200/200 - 8s - loss: 0.9691 - accuracy: 0.5425
Epoch 2/50
200/200 - 8s - loss: 0.6907 - accuracy: 0.5944
Epoch 3/50
200/200 - 8s - loss: 0.8020 - accuracy: 0.5756
Epoch 3/50
200

In [92]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8795833587646484 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8037, std=0.01327 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8662, std=0.01203 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7117, std=0.01595 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7767, std=0.03376 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8796, std=0.004714 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7979, std=0.003584 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.665, std=0.04048 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8046, std=0.01276 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8575, std=0.009071 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8704, std=0.01042 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size':

## Train with best parameters

In [93]:
#Best Accuracy 0.8795833587646484 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}

def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [94]:
seed = 7
np.random.seed(seed)

In [95]:
model = create_model()

In [96]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [97]:
import datetime, os

In [98]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [99]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 11:00:53.269821: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 11:00:53.269862: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 11:00:53.269912: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [100]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [101]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 22/300 [=>............................] - ETA: 2s - loss: 0.9664 - accuracy: 0.4805

2021-09-16 11:01:00.690686: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 11:01:00.690720: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 11:01:00.747564: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 11:01:00.748316: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 11:01:00.749534: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-110052/train/plugins/profile/2021_09_16_11_01_00
2021-09-16 11:01:00.750258: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-110052/train/plugins/profile/2021_09_16_11_01_00/helemanc-Latitude-5410.trace.json.gz
2021-09-16 11:01:00.751316: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-110052/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500

Epoch 00035: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500

Epoch 00039: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 40/500
Epoch 41/500
Ep

In [102]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 35968), started 18:20:21 ago. (Use '!kill 35968' to kill it.)

In [106]:
model.evaluate(X_test, y_test, batch_size=8)



[0.2821905314922333, 0.8999999761581421]

In [107]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.92      0.86      0.89        56
           1       0.88      0.94      0.91        64

    accuracy                           0.90       120
   macro avg       0.90      0.90      0.90       120
weighted avg       0.90      0.90      0.90       120



## Save best model 

In [105]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_2")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_2/assets


# Experiment 3.3: TESS

In [156]:
df_train = TESS_train
df_test = TESS_test

In [157]:
df_train.reset_index(drop = True, inplace = True) 
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [158]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train,  df_test, 26)

100%|███████████████████████████████████████| 1400/1400 [00:37<00:00, 37.27it/s]
100%|███████████████████████████████████████| 1400/1400 [00:29<00:00, 47.31it/s]
100%|█████████████████████████████████████| 1400/1400 [00:01<00:00, 1047.26it/s]
100%|███████████████████████████████████████| 1400/1400 [00:24<00:00, 56.07it/s]


In [160]:
y_train, y_test = encode_labels_tess(y_train, y_test)

In [161]:
np.size(y_test)

1400

In [162]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train,  X_test)

In [163]:
X_train.shape

(1400, 157, 25)

## Shuffle training data

In [164]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [165]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_3.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [116]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [117]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 11:08:32.024424: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 11:08:32.025042: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 11:08:32.167622: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 11:08:32.167780: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 11:08:32.171299: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
117/117 - 4s - loss: 0.2172 - accuracy: 0.9175
Epoch 1/50
117/117 - 5s - loss: 0.9356 - accuracy: 0.6463
Epoch 1/50
117/117 - 5s - loss: 0.8742 - accuracy: 0.6799
Epoch 1/50
117/117 - 6s - loss: 0.9357 - accuracy: 0.6785
Epoch 1/50
117/117 - 6s - loss: 2.1989 - accuracy: 0.6484
Epoch 1/50
117/117 - 6s - loss: 0.2900 - accuracy: 0.9046
Epoch 1/50
117/117 - 6s - loss: 0.2628 - accuracy: 0.9336
Epoch 1/50
117/117 - 7s - loss: 2.6385 - accuracy: 0.6206
Epoch 2/50
117/117 - 5s - loss: 0.0320 - accuracy: 0.9882
Epoch 2/50
117/117 - 5s - loss: 0.2978 - accuracy: 0.8767
Epoch 2/50
117/117 - 5s - loss: 0.2913 - accuracy: 0.8929
Epoch 2/50
117/117 - 5s - loss: 0.3116 - accuracy: 0.8703
Epoch 2/50
117/117 - 5s - loss: 0.8863 - accuracy: 0.8264
Epoch 2/50
117/117 - 5s - loss: 0.0057 - accuracy: 0.9979
Epoch 2/50
117/117 - 5s - loss: 0.0280 - accuracy: 0.9914
Epoch 2/50
117/117 - 5s - loss: 0.9221 - accuracy: 0.7996
Epoch 3/50
117/117 - 4s - loss: 0.0057 - accuracy: 0.9979
Epoch 3/50
117

In [118]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 1.0 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9864, std=0.01922 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9993, std=0.001009 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.9986, std=0.002023 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9993, std=0.001009 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [119]:
#Best Accuracy 0.7160703738530477 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
def create_model( init_mode='lecun_uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [120]:
seed = 7
np.random.seed(seed)

In [121]:
model = create_model()

In [122]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [123]:
import datetime, os

In [124]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [125]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 11:39:33.236141: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 11:39:33.236171: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 11:39:33.236211: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [126]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [127]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, 
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 21/175 [==>...........................] - ETA: 1s - loss: 1.4662 - accuracy: 0.6108

2021-09-16 11:39:35.456987: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 11:39:35.457012: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 11:39:35.522690: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 11:39:35.523533: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 11:39:35.524856: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-113933/train/plugins/profile/2021_09_16_11_39_35
2021-09-16 11:39:35.525603: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-113933/train/plugins/profile/2021_09_16_11_39_35/helemanc-Latitude-5410.trace.json.gz
2021-09-16 11:39:35.526762: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-113933/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500

Epoch 00023: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500

Epoch 00027: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500

Epoch 00031: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500

Epoch 00035: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500

Epoch 00039: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 40/500
Epoch 41/500
Epoch

In [128]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 35968), started 19:00:34 ago. (Use '!kill 35968' to kill it.)

In [129]:
model.evaluate(X_test, y_test, batch_size=8)



[1.408311128616333, 0.6785714030265808]

In [130]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.77      0.35      0.49       600
           1       0.66      0.92      0.77       800

    accuracy                           0.68      1400
   macro avg       0.71      0.64      0.63      1400
weighted avg       0.71      0.68      0.65      1400



## Save best model 

In [131]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_3")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_3/assets


# Experiment 3.4: TESS noise

## Read dataframes

In [166]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [167]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train, df_test, 26) # 13

100%|███████████████████████████████████████| 2800/2800 [00:43<00:00, 65.02it/s]
100%|███████████████████████████████████████| 2800/2800 [00:53<00:00, 52.08it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 1869.89it/s]
100%|███████████████████████████████████████| 1400/1400 [00:35<00:00, 39.04it/s]


In [168]:
y_train,  y_test = encode_labels_tess(y_train,  y_test)

In [169]:
np.size(y_train)

2800

In [170]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train, X_test)

In [171]:
X_train.shape

(2800, 157, 25)

## Shuffle training data

In [172]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [173]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_4.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [None]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

In [None]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

## Train with best parameters

In [144]:
#Best Accuracy 1.0 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [145]:
seed = 7
np.random.seed(seed)

In [146]:
model = create_model()

In [147]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [148]:
import datetime, os

In [149]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [150]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 12:42:20.180132: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 12:42:20.180188: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 12:42:20.180272: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [151]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [152]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500,
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 31/700 [>.............................] - ETA: 3s - loss: 0.8532 - accuracy: 0.4258

2021-09-16 12:42:25.276810: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 12:42:25.276835: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 12:42:25.330671: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 12:42:25.331458: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 12:42:25.332642: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-124219/train/plugins/profile/2021_09_16_12_42_25
2021-09-16 12:42:25.333326: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-124219/train/plugins/profile/2021_09_16_12_42_25/helemanc-Latitude-5410.trace.json.gz
2021-09-16 12:42:25.334363: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-124219/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

Epoch 00041: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch

In [153]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 35968), started 20:07:04 ago. (Use '!kill 35968' to kill it.)

In [154]:
model.evaluate(X_test, y_test, batch_size=8)



[7.522771835327148, 0.6664285659790039]

In [155]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.74      0.34      0.47       600
           1       0.65      0.91      0.76       800

    accuracy                           0.67      1400
   macro avg       0.69      0.63      0.61      1400
weighted avg       0.69      0.67      0.63      1400



## Save best model 

In [156]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_4")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_4/assets


# Experiment 3.5: SAVEE

In [174]:
df_train = SAVEE_train
df_val = SAVEE_val
df_test = SAVEE_test

In [175]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [176]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|█████████████████████████████████████████| 240/240 [00:20<00:00, 11.58it/s]
100%|█████████████████████████████████████████| 240/240 [00:05<00:00, 44.78it/s]
100%|█████████████████████████████████████████| 120/120 [00:09<00:00, 12.79it/s]
100%|█████████████████████████████████████████| 120/120 [00:02<00:00, 46.18it/s]
100%|█████████████████████████████████████████| 120/120 [00:07<00:00, 15.68it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 35.08it/s]


In [177]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [178]:
np.size(y_val)

120

In [179]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [180]:
X_train.shape

(240, 157, 25)

## Shuffle training data

In [181]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [182]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_5.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [163]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [164]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 12:58:43.566034: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 12:58:43.566323: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 12:58:43.571897: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 12:58:43.572053: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 12:58:43.580597: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
234/234 - 10s - loss: 0.7982 - accuracy: 0.6940
Epoch 1/50
234/234 - 10s - loss: 0.2450 - accuracy: 0.9063
Epoch 1/50
234/234 - 10s - loss: 0.7681 - accuracy: 0.6920
Epoch 1/50
234/234 - 10s - loss: 0.8410 - accuracy: 0.6722
Epoch 1/50
234/234 - 10s - loss: 0.2695 - accuracy: 0.9009
Epoch 1/50
234/234 - 11s - loss: 2.2743 - accuracy: 0.6565
Epoch 1/50
234/234 - 11s - loss: 0.3195 - accuracy: 0.8913
Epoch 1/50
234/234 - 11s - loss: 2.0819 - accuracy: 0.6288
Epoch 2/50
234/234 - 10s - loss: 0.2917 - accuracy: 0.8644
Epoch 2/50
234/234 - 9s - loss: 0.2988 - accuracy: 0.8661
Epoch 2/50
234/234 - 9s - loss: 0.2910 - accuracy: 0.8693
Epoch 2/50
234/234 - 10s - loss: 0.0719 - accuracy: 0.9754
Epoch 2/50
234/234 - 10s - loss: 0.0711 - accuracy: 0.9759
Epoch 2/50
234/234 - 10s - loss: 0.8714 - accuracy: 0.7964
Epoch 2/50
234/234 - 10s - loss: 0.6904 - accuracy: 0.8050
Epoch 2/50
234/234 - 10s - loss: 0.0669 - accuracy: 0.9759
Epoch 3/50
234/234 - 9s - loss: 0.1906 - accuracy: 0.9111


In [165]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9996427297592163 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9989, std=0.0008742 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9986, std=0.0005055 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.9986, std=0.0005055 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.9989, std=0.001514 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9993, std=0.000505 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.9982, std=0.001337 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.9971, std=0.001337 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9986, std=0.0005055 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9996, std=0.0005053 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9986, std=0.001335 using {'lr': 0.0001, 'init_mode': 'uni

## Train with best parameters

In [166]:
#Best Accuracy 0.9996427297592163 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
def create_model( init_mode='glorot_normal', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [167]:
seed = 7
np.random.seed(seed)

In [168]:
model = create_model()

In [66]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [67]:
import datetime, os

In [68]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [69]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 16:02:50.760855: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 16:02:50.760888: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 16:02:50.813839: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [173]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [175]:
history = model.fit(X_train, y_train, batch_size = 4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 40/700 [>.............................] - ETA: 3s - loss: 0.2427 - accuracy: 0.8875

2021-09-16 13:35:39.456735: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 13:35:39.456761: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 13:35:39.489023: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 13:35:39.489797: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 13:35:39.490968: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-133517/train/plugins/profile/2021_09_16_13_35_39
2021-09-16 13:35:39.491748: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-133517/train/plugins/profile/2021_09_16_13_35_39/helemanc-Latitude-5410.trace.json.gz
2021-09-16 13:35:39.492808: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-133517/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500

Epoch 00008: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500

Epoch 00012: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500

Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch

In [70]:
%tensorboard --logdir logs

In [177]:
model.evaluate(X_test, y_test, batch_size=8)



[0.5776047706604004, 0.7321428656578064]

In [178]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.69      0.68      0.69       600
           1       0.76      0.77      0.77       800

    accuracy                           0.73      1400
   macro avg       0.73      0.73      0.73      1400
weighted avg       0.73      0.73      0.73      1400



## Save best model 

In [179]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_5")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_5/assets


# Experiment 3.6: SAVEE noise

## Read dataframes

In [183]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [184]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|█████████████████████████████████████████| 480/480 [00:39<00:00, 12.23it/s]
100%|█████████████████████████████████████████| 480/480 [00:11<00:00, 41.69it/s]
100%|█████████████████████████████████████████| 120/120 [00:04<00:00, 24.22it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 63.72it/s]
100%|█████████████████████████████████████████| 120/120 [00:05<00:00, 22.34it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 34.66it/s]


In [185]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [186]:
np.size(y_val)

120

In [187]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [188]:
X_train.shape

(480, 157, 25)

## Shuffle training data

In [189]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [190]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_6.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [78]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [79]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 16:06:36.653809: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:06:36.654398: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 16:06:36.700197: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:06:36.700191: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:06:36.700218: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your mac

Epoch 1/50
40/40 - 2s - loss: 3.0348 - accuracy: 0.4812
Epoch 1/50
40/40 - 2s - loss: 1.2567 - accuracy: 0.5250
Epoch 1/50
40/40 - 2s - loss: 1.3180 - accuracy: 0.5000
Epoch 1/50
40/40 - 2s - loss: 2.8023 - accuracy: 0.4938
Epoch 1/50
40/40 - 2s - loss: 1.2953 - accuracy: 0.5375
Epoch 1/50
40/40 - 2s - loss: 1.2966 - accuracy: 0.4812
Epoch 1/50
40/40 - 2s - loss: 1.2852 - accuracy: 0.5406
Epoch 1/50
40/40 - 3s - loss: 1.1563 - accuracy: 0.5156
Epoch 2/50
40/40 - 1s - loss: 2.3328 - accuracy: 0.5500
Epoch 2/50
40/40 - 2s - loss: 0.6611 - accuracy: 0.6062
Epoch 2/50
40/40 - 2s - loss: 2.9515 - accuracy: 0.5219
Epoch 2/50
40/40 - 2s - loss: 1.1430 - accuracy: 0.5375
Epoch 2/50
40/40 - 2s - loss: 1.2521 - accuracy: 0.5281
Epoch 2/50
40/40 - 2s - loss: 0.7944 - accuracy: 0.5719
Epoch 2/50
40/40 - 2s - loss: 0.8149 - accuracy: 0.5781
Epoch 2/50
40/40 - 2s - loss: 1.0889 - accuracy: 0.5437
Epoch 3/50
40/40 - 2s - loss: 0.6434 - accuracy: 0.6594
Epoch 3/50
40/40 - 2s - loss: 2.3011 - accuracy:

2021-09-16 16:12:20.855123: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-16 16:12:20.855498: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-16 16:12:20.953682: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-16 16:12:20.971769: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/50
60/60 - 1s - loss: 1.0094 - accuracy: 0.4896
Epoch 2/50
60/60 - 0s - loss: 0.6938 - accuracy: 0.5688
Epoch 3/50
60/60 - 0s - loss: 0.6428 - accuracy: 0.6521
Epoch 4/50
60/60 - 0s - loss: 0.6061 - accuracy: 0.6896
Epoch 5/50
60/60 - 0s - loss: 0.6025 - accuracy: 0.6958
Epoch 6/50
60/60 - 0s - loss: 0.5202 - accuracy: 0.7688
Epoch 7/50
60/60 - 0s - loss: 0.4754 - accuracy: 0.7771
Epoch 8/50
60/60 - 0s - loss: 0.4523 - accuracy: 0.7792
Epoch 9/50
60/60 - 0s - loss: 0.4821 - accuracy: 0.7688
Epoch 10/50
60/60 - 0s - loss: 0.4195 - accuracy: 0.8167
Epoch 11/50
60/60 - 0s - loss: 0.4305 - accuracy: 0.8167
Epoch 12/50
60/60 - 0s - loss: 0.3940 - accuracy: 0.8250
Epoch 13/50
60/60 - 0s - loss: 0.3982 - accuracy: 0.8188
Epoch 14/50
60/60 - 0s - loss: 0.3043 - accuracy: 0.8729
Epoch 15/50
60/60 - 0s - loss: 0.3038 - accuracy: 0.8562
Epoch 16/50
60/60 - 0s - loss: 0.2484 - accuracy: 0.8917
Epoch 17/50
60/60 - 0s - loss: 0.2487 - accuracy: 0.8938
Epoch 18/50
60/60 - 0s - loss: 0.2582 - 

In [80]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.806249996026357 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7583, std=0.04742 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.7833, std=0.0299 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6896, std=0.04602 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.775, std=0.03187 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8062, std=0.04677 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7417, std=0.0362 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6854, std=0.0434 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7563, std=0.04677 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7833, std=0.03762 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.7896, std=0.03241 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [81]:
# Best Accuracy 0.806249996026357 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [82]:
seed = 7
np.random.seed(seed)

In [83]:
model = create_model()

In [84]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [85]:
import datetime, os

In [86]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [87]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 16:14:08.928810: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 16:14:08.928886: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 16:14:08.928984: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [88]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [89]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-16 16:14:13.432419: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 16:14:13.432443: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 16:14:13.442943: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 16:14:13.445865: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 16:14:13.449900: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-161408/train/plugins/profile/2021_09_16_16_14_13
2021-09-16 16:14:13.450634: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-161408/train/plugins/profile/2021_09_16_16_14_13/helemanc-Latitude-5410.trace.json.gz
2021-09-16 16:14:13.456591: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-161408/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnP

In [90]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 81574), started 0:11:22 ago. (Use '!kill 81574' to kill it.)

In [91]:
model.evaluate(X_test, y_test, batch_size=4)



[0.7146196365356445, 0.49166667461395264]

In [92]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        60
           1       0.50      0.98      0.66        60

    accuracy                           0.49       120
   macro avg       0.25      0.49      0.33       120
weighted avg       0.25      0.49      0.33       120



## Save best model 

In [93]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_6")

2021-09-16 16:14:33.746247: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_6/assets


# Experiment 3.7: CREMA 

In [191]:
df_train = CREMA_train
df_val = CREMA_val
df_test = CREMA_test

In [192]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [193]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|███████████████████████████████████████| 320/320 [00:00<00:00, 1460.72it/s]
100%|█████████████████████████████████████████| 320/320 [00:10<00:00, 29.99it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 1053.24it/s]
100%|███████████████████████████████████████████| 60/60 [00:01<00:00, 32.31it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 756.07it/s]
100%|███████████████████████████████████████████| 60/60 [00:01<00:00, 35.27it/s]


In [194]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [195]:
np.size(y_val)

60

In [196]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [197]:
X_train.shape

(320, 157, 25)

## Shuffle training data

In [198]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [199]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_7.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [102]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [103]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 16:25:58.906243: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:25:58.906483: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 16:25:58.980798: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:25:58.980938: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 16:25:58.986602: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
27/27 - 1s - loss: 1.5284 - accuracy: 0.5701
Epoch 2/50
27/27 - 1s - loss: 1.3388 - accuracy: 0.6028
Epoch 1/50
27/27 - 2s - loss: 2.5507 - accuracy: 0.5234
Epoch 1/50
27/27 - 2s - loss: 1.6922 - accuracy: 0.5352
Epoch 1/50
27/27 - 2s - loss: 1.2125 - accuracy: 0.4883
Epoch 2/50
27/27 - 1s - loss: 0.7187 - accuracy: 0.6402
Epoch 3/50
27/27 - 1s - loss: 1.1385 - accuracy: 0.5093
Epoch 1/50
27/27 - 3s - loss: 3.8406 - accuracy: 0.5399
Epoch 2/50
27/27 - 1s - loss: 1.1453 - accuracy: 0.6150
Epoch 1/50
27/27 - 3s - loss: 4.1955 - accuracy: 0.5681
Epoch 2/50
27/27 - 1s - loss: 1.1493 - accuracy: 0.6056
Epoch 1/50
27/27 - 4s - loss: 2.1682 - accuracy: 0.4883
Epoch 1/50
27/27 - 4s - loss: 1.6443 - accuracy: 0.5446
Epoch 4/50
27/27 - 1s - loss: 0.9101 - accuracy: 0.6449
Epoch 2/50
27/27 - 1s - loss: 3.2948 - accuracy: 0.5352
Epoch 3/50
27/27 - 1s - loss: 0.6110 - accuracy: 0.6121
Epoch 3/50
27/27 - 1s - loss: 0.8211 - accuracy: 0.6244
Epoch 2/50
27/27 - 1s - loss: 3.0567 - accuracy:

In [104]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8125844995180765 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7154, std=0.06252 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8094, std=0.004054 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7156, std=0.01149 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7812, std=0.0293 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8126, std=0.02228 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7, std=0.06106 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7002, std=0.03857 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7625, std=0.03855 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7875, std=0.009809 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.775, std=0.00682 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [105]:
#Best Accuracy 0.8125844995180765 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [106]:
seed = 7
np.random.seed(seed)

In [107]:
model = create_model()

In [108]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [109]:
import datetime, os

In [110]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [111]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 16:34:22.519948: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 16:34:22.520011: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 16:34:22.520140: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [112]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [113]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-16 16:34:34.130077: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 16:34:34.130110: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 16:34:34.195427: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 16:34:34.196179: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 16:34:34.197436: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-163420/train/plugins/profile/2021_09_16_16_34_34
2021-09-16 16:34:34.198175: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-163420/train/plugins/profile/2021_09_16_16_34_34/helemanc-Latitude-5410.trace.json.gz
2021-09-16 16:34:34.199271: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-163420/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 34/500
Epoch 35/500

In [114]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 81574), started 0:31:41 ago. (Use '!kill 81574' to kill it.)

In [115]:
model.evaluate(X_test, y_test, batch_size=8)



[0.3980877995491028, 0.8500000238418579]

In [116]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.71      0.42      0.53        12
           1       0.87      0.96      0.91        48

    accuracy                           0.85        60
   macro avg       0.79      0.69      0.72        60
weighted avg       0.84      0.85      0.83        60



## Save best model 

In [117]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_7")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_7/assets


# Experiment 3.8: CREMA - noise

In [200]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [201]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|███████████████████████████████████████| 640/640 [00:00<00:00, 1895.29it/s]
100%|█████████████████████████████████████████| 640/640 [00:21<00:00, 29.45it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 1049.30it/s]
100%|███████████████████████████████████████████| 60/60 [00:01<00:00, 31.10it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 1733.72it/s]
100%|███████████████████████████████████████████| 60/60 [00:01<00:00, 53.96it/s]


In [202]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [203]:
np.size(y_val)

60

In [204]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [205]:
X_train.shape

(640, 157, 25)

## Shuffle training data

In [206]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [207]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_8.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [125]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [126]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 16:41:47.732948: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:41:47.733190: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 16:41:47.742530: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 16:41:47.742667: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 16:41:47.749106: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
54/54 - 2s - loss: 1.5744 - accuracy: 0.5386
Epoch 1/50
54/54 - 2s - loss: 1.3551 - accuracy: 0.5869
Epoch 1/50
54/54 - 2s - loss: 2.1017 - accuracy: 0.4848
Epoch 1/50
54/54 - 3s - loss: 1.7474 - accuracy: 0.5141
Epoch 1/50
54/54 - 3s - loss: 1.5497 - accuracy: 0.5457
Epoch 1/50
54/54 - 4s - loss: 3.9871 - accuracy: 0.5000
Epoch 1/50
54/54 - 4s - loss: 1.3824 - accuracy: 0.5199
Epoch 2/50
54/54 - 2s - loss: 1.1322 - accuracy: 0.5691
Epoch 2/50
54/54 - 2s - loss: 1.1169 - accuracy: 0.6408
Epoch 1/50
54/54 - 4s - loss: 3.5957 - accuracy: 0.5269
Epoch 2/50
54/54 - 2s - loss: 1.4470 - accuracy: 0.5246
Epoch 2/50
54/54 - 2s - loss: 1.2351 - accuracy: 0.5329
Epoch 2/50
54/54 - 2s - loss: 0.6808 - accuracy: 0.6089
Epoch 2/50
54/54 - 2s - loss: 3.3334 - accuracy: 0.5282
Epoch 3/50
54/54 - 2s - loss: 1.0867 - accuracy: 0.5878
Epoch 2/50
54/54 - 2s - loss: 0.7249 - accuracy: 0.6393
Epoch 3/50
54/54 - 2s - loss: 0.7989 - accuracy: 0.6385
Epoch 2/50
54/54 - 2s - loss: 2.9895 - accuracy:

In [127]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8484123945236206 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7233, std=0.05564 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8484, std=0.01909 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6984, std=0.02995 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.839, std=0.02619 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8406, std=0.01534 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7406, std=0.01612 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6327, std=0.06933 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8265, std=0.03161 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7999, std=0.03382 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8437, std=0.03585 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_siz

## Train with best parameters

In [129]:
#Best Accuracy 0.8687347372372946 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [130]:
seed = 7
np.random.seed(seed)

In [131]:
model = create_model()

In [132]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [133]:
import datetime, os

In [134]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [135]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-16 17:06:12.299654: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 17:06:12.299697: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 17:06:12.299763: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [136]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [137]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-16 17:06:13.657972: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-16 17:06:13.657995: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-16 17:06:13.717218: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-16 17:06:13.718007: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-16 17:06:13.719249: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-170612/train/plugins/profile/2021_09_16_17_06_13
2021-09-16 17:06:13.719983: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210916-170612/train/plugins/profile/2021_09_16_17_06_13/helemanc-Latitude-5410.trace.json.gz
2021-09-16 17:06:13.721076: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210916-170612/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500

Epoch 00023: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500

Epoch 00027: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500

Epoch 00031: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500

Epoch 00035: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.

In [138]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 81574), started 1:03:31 ago. (Use '!kill 81574' to kill it.)

In [139]:
model.evaluate(X_test, y_test, batch_size=8)



[0.4113219976425171, 0.8166666626930237]

In [140]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.57      0.33      0.42        12
           1       0.85      0.94      0.89        48

    accuracy                           0.82        60
   macro avg       0.71      0.64      0.66        60
weighted avg       0.79      0.82      0.80        60



## Save best model 

In [141]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_8")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_8/assets


# Experiment 3.9: RAVDESS - TESS - SAVEE

In [208]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train])
df_val = pd.concat([RAV_val, SAVEE_val])
df_test = pd.concat([RAV_test, SAVEE_test, TESS_test])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [209]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [210]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|██████████████████████████████████████| 2840/2840 [00:25<00:00, 110.95it/s]
100%|███████████████████████████████████████| 2840/2840 [00:39<00:00, 71.69it/s]
100%|█████████████████████████████████████████| 240/240 [00:04<00:00, 56.98it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 71.99it/s]
100%|██████████████████████████████████████| 1640/1640 [00:04<00:00, 405.97it/s]
100%|███████████████████████████████████████| 1640/1640 [00:22<00:00, 73.36it/s]


In [211]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [212]:
np.size(y_test)

1640

In [213]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [214]:
X_train.shape

(2840, 157, 25)

## Shuffle training data

In [215]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [216]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_9.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [150]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [151]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-16 17:15:08.544044: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 17:15:08.544667: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 17:15:08.557234: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-16 17:15:08.557397: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-16 17:15:08.589942: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
237/237 - 10s - loss: 2.4611 - accuracy: 0.5885
Epoch 1/50
237/237 - 10s - loss: 1.1321 - accuracy: 0.5734
Epoch 1/50
237/237 - 10s - loss: 1.1368 - accuracy: 0.5917
Epoch 1/50
237/237 - 11s - loss: 0.6803 - accuracy: 0.6931
Epoch 1/50
237/237 - 11s - loss: 1.0912 - accuracy: 0.5869
Epoch 1/50
237/237 - 11s - loss: 0.6509 - accuracy: 0.6978
Epoch 1/50
237/237 - 12s - loss: 2.3927 - accuracy: 0.5811
Epoch 1/50
237/237 - 12s - loss: 0.7170 - accuracy: 0.6985
Epoch 2/50
237/237 - 10s - loss: 1.5235 - accuracy: 0.6498
Epoch 2/50
237/237 - 10s - loss: 0.7053 - accuracy: 0.6774
Epoch 2/50
237/237 - 10s - loss: 0.6837 - accuracy: 0.6957
Epoch 2/50
237/237 - 10s - loss: 0.6411 - accuracy: 0.7158
Epoch 2/50
237/237 - 10s - loss: 0.4232 - accuracy: 0.7707
Epoch 2/50
237/237 - 11s - loss: 0.4251 - accuracy: 0.7649
Epoch 2/50
237/237 - 11s - loss: 1.3625 - accuracy: 0.6519
Epoch 2/50
237/237 - 10s - loss: 0.4483 - accuracy: 0.7429
Epoch 3/50
237/237 - 10s - loss: 0.9947 - accuracy: 0.70

2021-09-16 17:23:29.899688: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 29720100 exceeds 10% of free system memory.
2021-09-16 17:23:29.944540: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 29720100 exceeds 10% of free system memory.


119/119 - 1s - loss: 0.2322 - accuracy: 0.9027
Epoch 1/50
237/237 - 9s - loss: 2.7912 - accuracy: 0.5987
Epoch 1/50
237/237 - 11s - loss: 0.5711 - accuracy: 0.7073
Epoch 1/50
237/237 - 11s - loss: 0.6079 - accuracy: 0.7049
Epoch 1/50
474/474 - 12s - loss: 1.4757 - accuracy: 0.6635
Epoch 1/50
237/237 - 13s - loss: 0.5963 - accuracy: 0.7110
Epoch 1/50
474/474 - 13s - loss: 1.5501 - accuracy: 0.6653
Epoch 1/50
474/474 - 14s - loss: 1.8109 - accuracy: 0.6683
Epoch 1/50
237/237 - 15s - loss: 0.9244 - accuracy: 0.5753
Epoch 2/50
237/237 - 11s - loss: 1.7819 - accuracy: 0.6278
Epoch 2/50
237/237 - 10s - loss: 0.4334 - accuracy: 0.7765
Epoch 2/50
237/237 - 10s - loss: 0.4226 - accuracy: 0.7661
Epoch 2/50
237/237 - 11s - loss: 0.4109 - accuracy: 0.7655
Epoch 2/50
474/474 - 13s - loss: 0.5016 - accuracy: 0.7475
Epoch 2/50
474/474 - 13s - loss: 0.5420 - accuracy: 0.7339
Epoch 2/50
474/474 - 12s - loss: 0.5434 - accuracy: 0.7232
Epoch 2/50
237/237 - 11s - loss: 0.7073 - accuracy: 0.6550
Epoch 3/50

2021-09-16 17:32:21.372050: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 29735800 exceeds 10% of free system memory.


Epoch 50/50
237/237 - 9s - loss: 0.2256 - accuracy: 0.8901
Epoch 44/50
474/474 - 9s - loss: 0.1943 - accuracy: 0.9071


2021-09-16 17:32:22.460623: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 29720100 exceeds 10% of free system memory.


119/119 - 2s - loss: 0.2811 - accuracy: 0.8722
Epoch 45/50
474/474 - 8s - loss: 0.2257 - accuracy: 0.8706
Epoch 45/50
474/474 - 9s - loss: 0.2090 - accuracy: 0.8896
Epoch 1/50
237/237 - 12s - loss: 0.8985 - accuracy: 0.5853
Epoch 45/50
474/474 - 9s - loss: 0.2101 - accuracy: 0.9050
Epoch 1/50
237/237 - 13s - loss: 0.8783 - accuracy: 0.5734
Epoch 1/50
237/237 - 14s - loss: 3.1970 - accuracy: 0.5436
Epoch 1/50
237/237 - 15s - loss: 2.8762 - accuracy: 0.5441
Epoch 1/50
237/237 - 14s - loss: 3.1643 - accuracy: 0.5723
Epoch 46/50
474/474 - 12s - loss: 0.2464 - accuracy: 0.8796
Epoch 2/50
237/237 - 11s - loss: 0.6826 - accuracy: 0.6561
Epoch 46/50
474/474 - 12s - loss: 0.2113 - accuracy: 0.9012
Epoch 46/50
474/474 - 12s - loss: 0.1887 - accuracy: 0.9176
Epoch 2/50
237/237 - 10s - loss: 0.6848 - accuracy: 0.6552
Epoch 2/50
237/237 - 11s - loss: 2.2919 - accuracy: 0.6038
Epoch 2/50
237/237 - 10s - loss: 1.8443 - accuracy: 0.6318
Epoch 2/50
237/237 - 11s - loss: 2.3534 - accuracy: 0.6030
Epoch 

2021-09-16 17:41:01.672303: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 29720100 exceeds 10% of free system memory.


119/119 - 2s - loss: 0.3518 - accuracy: 0.8118
Epoch 44/50
237/237 - 7s - loss: 0.2041 - accuracy: 0.9113
Epoch 44/50
237/237 - 8s - loss: 0.1976 - accuracy: 0.9044
Epoch 44/50
237/237 - 8s - loss: 0.2020 - accuracy: 0.8997
Epoch 1/50
474/474 - 15s - loss: 0.7714 - accuracy: 0.6212
Epoch 45/50
237/237 - 9s - loss: 0.2140 - accuracy: 0.9070
Epoch 45/50
237/237 - 11s - loss: 0.2107 - accuracy: 0.8996
Epoch 1/50
474/474 - 17s - loss: 0.8222 - accuracy: 0.6283
Epoch 1/50
474/474 - 17s - loss: 0.7903 - accuracy: 0.6318
Epoch 1/50
474/474 - 17s - loss: 0.6389 - accuracy: 0.6566
Epoch 45/50
237/237 - 11s - loss: 0.1989 - accuracy: 0.9076
Epoch 1/50
474/474 - 17s - loss: 0.6348 - accuracy: 0.6598
Epoch 46/50
237/237 - 10s - loss: 0.1923 - accuracy: 0.9086
Epoch 2/50
474/474 - 12s - loss: 0.5415 - accuracy: 0.7190
Epoch 46/50
237/237 - 11s - loss: 0.1916 - accuracy: 0.9017
Epoch 2/50
474/474 - 12s - loss: 0.5964 - accuracy: 0.7107
Epoch 46/50
237/237 - 10s - loss: 0.1961 - accuracy: 0.9065
Epoc

In [152]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9158464074134827 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.9028, std=0.01194 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9067, std=0.004777 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8528, std=0.01085 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8842, std=0.01255 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9158, std=0.003453 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8796, std=0.01271 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8123, std=0.01682 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.887, std=0.01436 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9092, std=0.0008634 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9078, std=0.00942 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size'

## Train with best parameters

In [113]:
# Best Accuracy 0.9158464074134827 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [114]:
seed = 7
np.random.seed(seed)

In [115]:
model = create_model()

2021-09-20 16:10:54.075244: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-20 16:10:54.075667: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set


In [116]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [117]:
import datetime, os

In [118]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [119]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-20 16:10:54.206977: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-20 16:10:54.206998: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-20 16:10:54.259743: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [120]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [121]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

2021-09-20 16:11:00.973913: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-20 16:11:00.994020: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/500
 31/355 [=>............................] - ETA: 2s - loss: 1.0691 - accuracy: 0.5920

2021-09-20 16:11:01.451882: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-20 16:11:01.451909: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-20 16:11:01.460454: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-20 16:11:01.463174: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-20 16:11:01.466962: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210920-161054/train/plugins/profile/2021_09_20_16_11_01
2021-09-20 16:11:01.467692: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210920-161054/train/plugins/profile/2021_09_20_16_11_01/helemanc-Latitude-5410.trace.json.gz
2021-09-20 16:11:01.473905: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210920-161054/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500

Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 39/500
Epoch 40/500
Epoch 41/500
Ep

In [122]:
%tensorboard --logdir logs

In [123]:
model.evaluate(X_test, y_test, batch_size=8)



[0.6533849835395813, 0.6536585092544556]

In [124]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.58      0.73      0.65       716
           1       0.74      0.60      0.66       924

    accuracy                           0.65      1640
   macro avg       0.66      0.66      0.65      1640
weighted avg       0.67      0.65      0.65      1640



## Save best model 

In [125]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_9")

2021-09-20 16:13:40.918263: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_9/assets


# Experiment 3.10: RAVDESS - TESS - SAVEE noise

## Read dataframes

In [217]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

In [218]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess])
df_val = pd.concat([df_val_rav, df_val_savee])
df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [219]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [220]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|██████████████████████████████████████| 5680/5680 [00:47<00:00, 120.65it/s]
100%|███████████████████████████████████████| 5680/5680 [01:20<00:00, 70.24it/s]
100%|█████████████████████████████████████████| 240/240 [00:04<00:00, 57.03it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 74.21it/s]
100%|██████████████████████████████████████| 1640/1640 [00:04<00:00, 367.03it/s]
100%|███████████████████████████████████████| 1640/1640 [00:22<00:00, 71.89it/s]


In [221]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [222]:
np.size(y_val)

240

In [223]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [224]:
X_train.shape

(5680, 157, 25)

## Shuffle training data

In [225]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [226]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_10.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [135]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [138]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-20 20:10:45.239337: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-20 20:10:45.239329: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-20 20:10:45.239329: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-20 20:10:45.239333: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-20 20:10:45.239331: W tensorflow/stream_executor/platform/default/dso_lo

Epoch 1/50
474/474 - 20s - loss: 0.6437 - accuracy: 0.6686
Epoch 1/50
474/474 - 21s - loss: 2.2808 - accuracy: 0.5672
Epoch 1/50
474/474 - 21s - loss: 0.6387 - accuracy: 0.6810
Epoch 1/50
474/474 - 21s - loss: 0.6272 - accuracy: 0.6791
Epoch 1/50
474/474 - 21s - loss: 0.9576 - accuracy: 0.6044
Epoch 1/50
474/474 - 21s - loss: 2.2316 - accuracy: 0.5671
Epoch 1/50
474/474 - 21s - loss: 1.0118 - accuracy: 0.6007
Epoch 1/50
474/474 - 21s - loss: 0.9381 - accuracy: 0.6038
Epoch 2/50
474/474 - 19s - loss: 0.4427 - accuracy: 0.7447
Epoch 2/50
474/474 - 19s - loss: 1.0635 - accuracy: 0.6403
Epoch 2/50
474/474 - 20s - loss: 0.4449 - accuracy: 0.7436
Epoch 2/50
474/474 - 19s - loss: 0.6158 - accuracy: 0.6704
Epoch 2/50
474/474 - 20s - loss: 0.4473 - accuracy: 0.7367
Epoch 2/50
474/474 - 19s - loss: 0.6331 - accuracy: 0.6718
Epoch 2/50
474/474 - 20s - loss: 1.0457 - accuracy: 0.6398
Epoch 2/50
474/474 - 20s - loss: 0.6252 - accuracy: 0.6744
Epoch 3/50
474/474 - 19s - loss: 0.4146 - accuracy: 0.74

In [139]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.920952041943868 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
 mean=0.8938, std=0.003341 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9035, std=0.01252 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8371, std=0.004303 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7893, std=0.004165 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9132, std=0.009845 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8905, std=0.008537 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8116, std=0.007564 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8317, std=0.02541 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9171, std=0.007187 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.921, std=0.005542 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_s

## Train with best parameters

In [140]:
#Best Accuracy 0.66180948416392 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [141]:
seed = 7
np.random.seed(seed)

In [142]:
model = create_model()

In [143]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [144]:
import datetime, os

In [145]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [146]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-21 09:59:07.496604: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-21 09:59:07.496644: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-21 09:59:07.497029: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [147]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [148]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  27/1420 [..............................] - ETA: 11s - loss: 0.8893 - accuracy: 0.4085

2021-09-21 09:59:10.488496: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-21 09:59:10.488520: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-21 09:59:10.569033: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-21 09:59:10.571858: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-21 09:59:10.578126: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210921-095907/train/plugins/profile/2021_09_21_09_59_10
2021-09-21 09:59:10.578877: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210921-095907/train/plugins/profile/2021_09_21_09_59_10/helemanc-Latitude-5410.trace.json.gz
2021-09-21 09:59:10.585267: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210921-095907/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500

Epoch 00020: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 41/500
Epoch

In [149]:
%tensorboard --logdir logs

In [150]:
model.evaluate(X_test, y_test, batch_size=8)



[0.5573828816413879, 0.7140243649482727]

In [151]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.66      0.73      0.69       716
           1       0.77      0.70      0.74       924

    accuracy                           0.71      1640
   macro avg       0.71      0.72      0.71      1640
weighted avg       0.72      0.71      0.72      1640



## Save best model 

In [152]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_10")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_10/assets


# Experiment 3.11: RAVDESS - TESS - SAVEE - CREMA

In [227]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train, CREMA_train])
df_val = pd.concat([RAV_val, SAVEE_val, CREMA_val])
df_test = pd.concat([RAV_test, SAVEE_test, TESS_test, CREMA_test])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [228]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [229]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|██████████████████████████████████████| 3160/3160 [00:23<00:00, 136.79it/s]
100%|███████████████████████████████████████| 3160/3160 [00:41<00:00, 76.96it/s]
100%|█████████████████████████████████████████| 300/300 [00:03<00:00, 75.29it/s]
100%|█████████████████████████████████████████| 300/300 [00:03<00:00, 77.91it/s]
100%|██████████████████████████████████████| 1700/1700 [00:03<00:00, 463.01it/s]
100%|███████████████████████████████████████| 1700/1700 [00:20<00:00, 81.98it/s]


In [230]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [231]:
np.size(y_test)

1700

In [232]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [233]:
X_train.shape

(3160, 157, 25)

## Shuffle training data

In [234]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [235]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_11.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [162]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [163]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-21 10:26:08.505843: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-21 10:26:08.506083: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-21 10:26:08.539080: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-21 10:26:08.539227: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-21 10:26:08.562277: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
264/264 - 12s - loss: 1.0408 - accuracy: 0.6026
Epoch 1/50
264/264 - 12s - loss: 0.6850 - accuracy: 0.6719
Epoch 1/50
264/264 - 12s - loss: 2.7956 - accuracy: 0.5608
Epoch 1/50
264/264 - 12s - loss: 0.6499 - accuracy: 0.6915
Epoch 1/50
264/264 - 12s - loss: 1.0495 - accuracy: 0.5771
Epoch 1/50
264/264 - 13s - loss: 0.6782 - accuracy: 0.6796
Epoch 1/50
264/264 - 13s - loss: 2.6865 - accuracy: 0.5577
Epoch 1/50
264/264 - 14s - loss: 1.1346 - accuracy: 0.5890
Epoch 2/50
264/264 - 12s - loss: 0.6820 - accuracy: 0.6876
Epoch 2/50
264/264 - 12s - loss: 1.4789 - accuracy: 0.6519
Epoch 2/50
264/264 - 12s - loss: 0.4641 - accuracy: 0.7312
Epoch 2/50
264/264 - 12s - loss: 0.4371 - accuracy: 0.7679
Epoch 2/50
264/264 - 13s - loss: 0.6875 - accuracy: 0.6626
Epoch 2/50
264/264 - 12s - loss: 1.5052 - accuracy: 0.6350
Epoch 2/50
264/264 - 13s - loss: 0.4733 - accuracy: 0.7314
Epoch 2/50
264/264 - 13s - loss: 0.6690 - accuracy: 0.6925
Epoch 3/50
264/264 - 11s - loss: 0.5682 - accuracy: 0.72

2021-09-21 10:35:19.450736: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 50/50
264/264 - 11s - loss: 0.1637 - accuracy: 0.9297
Epoch 50/50
264/264 - 11s - loss: 0.1340 - accuracy: 0.9492


2021-09-21 10:35:20.876386: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 50/50
264/264 - 11s - loss: 0.2845 - accuracy: 0.8580
Epoch 50/50
264/264 - 10s - loss: 0.2229 - accuracy: 0.8917
Epoch 50/50
264/264 - 10s - loss: 0.2714 - accuracy: 0.8681


2021-09-21 10:35:21.709245: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


132/132 - 3s - loss: 0.2736 - accuracy: 0.8860
132/132 - 2s - loss: 0.2412 - accuracy: 0.9127
132/132 - 2s - loss: 0.2144 - accuracy: 0.9126
Epoch 50/50
264/264 - 10s - loss: 0.2549 - accuracy: 0.8709
132/132 - 2s - loss: 0.3372 - accuracy: 0.8425


2021-09-21 10:35:23.416347: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.
2021-09-21 10:35:23.558251: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


132/132 - 2s - loss: 0.2873 - accuracy: 0.8691


2021-09-21 10:35:23.693318: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.
2021-09-21 10:35:23.843977: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


132/132 - 2s - loss: 0.3337 - accuracy: 0.8433
Epoch 50/50
264/264 - 9s - loss: 0.2077 - accuracy: 0.9089


2021-09-21 10:35:23.936784: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.
2021-09-21 10:35:24.327152: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


132/132 - 1s - loss: 0.2763 - accuracy: 0.8718
132/132 - 1s - loss: 0.2885 - accuracy: 0.8689


2021-09-21 10:35:24.872021: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.
2021-09-21 10:35:25.713663: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


Epoch 1/50
264/264 - 10s - loss: 3.0803 - accuracy: 0.5434
Epoch 1/50
527/527 - 12s - loss: 1.7580 - accuracy: 0.6602
Epoch 1/50
527/527 - 12s - loss: 1.6199 - accuracy: 0.6472
Epoch 1/50
264/264 - 12s - loss: 0.6243 - accuracy: 0.6819
Epoch 1/50
527/527 - 13s - loss: 1.6696 - accuracy: 0.6213
Epoch 1/50
264/264 - 12s - loss: 0.6082 - accuracy: 0.6730
Epoch 1/50
264/264 - 15s - loss: 0.5918 - accuracy: 0.6834
Epoch 1/50
264/264 - 17s - loss: 0.9476 - accuracy: 0.5575
Epoch 2/50
264/264 - 11s - loss: 1.4981 - accuracy: 0.6445
Epoch 2/50
264/264 - 11s - loss: 0.4456 - accuracy: 0.7545
Epoch 2/50
527/527 - 12s - loss: 0.5321 - accuracy: 0.7118
Epoch 2/50
527/527 - 13s - loss: 0.5562 - accuracy: 0.7086
Epoch 2/50
264/264 - 11s - loss: 0.4452 - accuracy: 0.7318
Epoch 2/50
527/527 - 13s - loss: 0.5421 - accuracy: 0.7010
Epoch 2/50
264/264 - 11s - loss: 0.4508 - accuracy: 0.7437
Epoch 2/50
264/264 - 11s - loss: 0.7147 - accuracy: 0.6353
Epoch 3/50
264/264 - 11s - loss: 1.1106 - accuracy: 0.65

2021-09-21 10:45:06.608155: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 44/50
527/527 - 13s - loss: 0.2177 - accuracy: 0.9046
132/132 - 3s - loss: 0.2455 - accuracy: 0.8899
132/132 - 2s - loss: 0.3199 - accuracy: 0.8348
Epoch 50/50
264/264 - 10s - loss: 0.2526 - accuracy: 0.8879
Epoch 44/50
527/527 - 12s - loss: 0.2418 - accuracy: 0.8742


2021-09-21 10:45:09.260040: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


132/132 - 3s - loss: 0.2938 - accuracy: 0.8784
Epoch 50/50
264/264 - 11s - loss: 0.1263 - accuracy: 0.9468


2021-09-21 10:45:10.596874: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


132/132 - 2s - loss: 0.3142 - accuracy: 0.8463
132/132 - 2s - loss: 0.2496 - accuracy: 0.8946


2021-09-21 10:45:11.961475: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.
2021-09-21 10:45:12.515464: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 45/50
527/527 - 10s - loss: 0.2521 - accuracy: 0.8770
Epoch 45/50
527/527 - 10s - loss: 0.2260 - accuracy: 0.9051
Epoch 45/50
527/527 - 8s - loss: 0.2616 - accuracy: 0.8695
Epoch 1/50
264/264 - 13s - loss: 0.9020 - accuracy: 0.5657
Epoch 1/50
264/264 - 14s - loss: 0.8561 - accuracy: 0.5710
Epoch 1/50
264/264 - 14s - loss: 3.0166 - accuracy: 0.5423
Epoch 1/50
264/264 - 15s - loss: 3.4146 - accuracy: 0.5553
Epoch 46/50
527/527 - 14s - loss: 0.2531 - accuracy: 0.8775
Epoch 46/50
527/527 - 13s - loss: 0.2795 - accuracy: 0.8619
Epoch 46/50
527/527 - 13s - loss: 0.2125 - accuracy: 0.9065
Epoch 1/50
264/264 - 18s - loss: 3.0657 - accuracy: 0.5396
Epoch 2/50
264/264 - 12s - loss: 0.6986 - accuracy: 0.6440
Epoch 2/50
264/264 - 12s - loss: 0.6655 - accuracy: 0.6559
Epoch 2/50
264/264 - 12s - loss: 1.9591 - accuracy: 0.6097
Epoch 2/50
264/264 - 12s - loss: 2.3274 - accuracy: 0.5985
Epoch 2/50
264/264 - 12s - loss: 2.1443 - accuracy: 0.5857
Epoch 47/50
527/527 - 13s - loss: 0.2427 - accuracy

2021-09-21 10:46:24.063280: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 6/50
264/264 - 12s - loss: 0.9963 - accuracy: 0.6849
264/264 - 3s - loss: 0.2540 - accuracy: 0.8803
264/264 - 3s - loss: 0.2870 - accuracy: 0.8405
264/264 - 3s - loss: 0.2827 - accuracy: 0.8615


2021-09-21 10:46:28.615033: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.
2021-09-21 10:46:28.786205: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 6/50
264/264 - 10s - loss: 0.8530 - accuracy: 0.6996


2021-09-21 10:46:29.331940: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 7/50
264/264 - 9s - loss: 0.4636 - accuracy: 0.7598
Epoch 7/50
264/264 - 9s - loss: 0.4922 - accuracy: 0.7561
Epoch 7/50
264/264 - 9s - loss: 0.7334 - accuracy: 0.7089
Epoch 7/50
264/264 - 9s - loss: 0.8444 - accuracy: 0.6958
Epoch 7/50
264/264 - 7s - loss: 0.8347 - accuracy: 0.6830
Epoch 8/50
264/264 - 11s - loss: 0.4652 - accuracy: 0.7556
Epoch 8/50
264/264 - 11s - loss: 0.4631 - accuracy: 0.7570
Epoch 8/50
264/264 - 11s - loss: 0.7323 - accuracy: 0.6800
Epoch 1/50
264/264 - 15s - loss: 1.5484 - accuracy: 0.6412
Epoch 8/50
264/264 - 11s - loss: 0.7625 - accuracy: 0.7076
Epoch 1/50
264/264 - 16s - loss: 1.8191 - accuracy: 0.6420
Epoch 1/50
264/264 - 17s - loss: 1.4805 - accuracy: 0.6531
Epoch 8/50
264/264 - 12s - loss: 0.7296 - accuracy: 0.6977
Epoch 9/50
264/264 - 11s - loss: 0.4434 - accuracy: 0.7741
Epoch 9/50
264/264 - 11s - loss: 0.6421 - accuracy: 0.6994
Epoch 9/50
264/264 - 12s - loss: 0.4545 - accuracy: 0.7698
Epoch 2/50
264/264 - 11s - loss: 0.5527 - accuracy: 0.7019
Ep

2021-09-21 10:54:43.111706: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 43/50
264/264 - 11s - loss: 0.1952 - accuracy: 0.9084
Epoch 43/50
264/264 - 11s - loss: 0.2525 - accuracy: 0.8704
Epoch 50/50
264/264 - 11s - loss: 0.3320 - accuracy: 0.8262
132/132 - 2s - loss: 0.2830 - accuracy: 0.8661
132/132 - 3s - loss: 0.2965 - accuracy: 0.8642
132/132 - 2s - loss: 0.3860 - accuracy: 0.7979


2021-09-21 10:54:47.336794: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
264/264 - 10s - loss: 0.3329 - accuracy: 0.8263


2021-09-21 10:54:47.783929: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16532100 exceeds 10% of free system memory.


Epoch 43/50
264/264 - 10s - loss: 0.2168 - accuracy: 0.8951
Epoch 50/50
264/264 - 10s - loss: 0.3444 - accuracy: 0.8144
132/132 - 2s - loss: 0.3942 - accuracy: 0.7958
132/132 - 2s - loss: 0.3680 - accuracy: 0.8177


2021-09-21 10:54:51.851231: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 44/50
264/264 - 8s - loss: 0.2170 - accuracy: 0.9046
Epoch 44/50
264/264 - 8s - loss: 0.2254 - accuracy: 0.8984
Epoch 44/50
264/264 - 8s - loss: 0.2162 - accuracy: 0.9065
Epoch 45/50
264/264 - 10s - loss: 0.2037 - accuracy: 0.8975
Epoch 1/50
527/527 - 15s - loss: 0.8378 - accuracy: 0.6089
Epoch 1/50
527/527 - 15s - loss: 0.7583 - accuracy: 0.6182
Epoch 45/50
264/264 - 10s - loss: 0.2365 - accuracy: 0.8794
Epoch 1/50
527/527 - 15s - loss: 0.7774 - accuracy: 0.6084
Epoch 45/50
264/264 - 12s - loss: 0.2042 - accuracy: 0.9027
Epoch 1/50
527/527 - 20s - loss: 0.6540 - accuracy: 0.6372
Epoch 1/50
527/527 - 20s - loss: 0.6749 - accuracy: 0.6388
Epoch 46/50
264/264 - 12s - loss: 0.1814 - accuracy: 0.9165
Epoch 46/50
264/264 - 12s - loss: 0.2220 - accuracy: 0.8946
Epoch 2/50
527/527 - 14s - loss: 0.5611 - accuracy: 0.7205
Epoch 2/50
527/527 - 14s - loss: 0.5574 - accuracy: 0.7023
Epoch 2/50
527/527 - 14s - loss: 0.5745 - accuracy: 0.7076
Epoch 46/50
264/264 - 12s - loss: 0.1950 - accuracy

2021-09-21 11:04:31.247506: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 16547800 exceeds 10% of free system memory.


Epoch 49/50
527/527 - 12s - loss: 0.1237 - accuracy: 0.9521
264/264 - 3s - loss: 0.2687 - accuracy: 0.8843
Epoch 50/50
527/527 - 12s - loss: 0.1427 - accuracy: 0.9402
264/264 - 2s - loss: 0.2313 - accuracy: 0.9003
Epoch 44/50
527/527 - 11s - loss: 0.1316 - accuracy: 0.9397
Epoch 50/50
527/527 - 10s - loss: 0.1365 - accuracy: 0.9411
Epoch 50/50
527/527 - 10s - loss: 0.1186 - accuracy: 0.9482
264/264 - 2s - loss: 0.2761 - accuracy: 0.8708
Epoch 50/50
527/527 - 9s - loss: 0.1154 - accuracy: 0.9487
264/264 - 1s - loss: 0.2513 - accuracy: 0.8909
264/264 - 1s - loss: 0.2512 - accuracy: 0.8927
Epoch 45/50
527/527 - 5s - loss: 0.1366 - accuracy: 0.9407
Epoch 46/50
527/527 - 3s - loss: 0.1417 - accuracy: 0.9407
Epoch 47/50
527/527 - 3s - loss: 0.1194 - accuracy: 0.9549
Epoch 48/50
527/527 - 3s - loss: 0.1397 - accuracy: 0.9459
Epoch 49/50
527/527 - 3s - loss: 0.1162 - accuracy: 0.9449
Epoch 50/50
527/527 - 3s - loss: 0.1227 - accuracy: 0.9430
264/264 - 1s - loss: 0.2088 - accuracy: 0.9117
Epoch

In [164]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9037946462631226 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8699, std=0.001315 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9038, std=0.01255 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8402, std=0.003854 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8608, std=0.01629 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8877, std=0.006786 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8589, std=0.008919 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8038, std=0.009841 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8737, std=0.01417 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8851, std=0.01203 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8984, std=0.009405 using {'lr': 0.0001, 'init_mode': 'uniform', 'ba

## Train with best parameters

In [71]:
#Best Accuracy 0.9037946462631226 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [72]:
seed = 7
np.random.seed(seed)

In [73]:
model = create_model()

2021-09-21 12:17:59.343454: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-21 12:17:59.343939: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set


In [74]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [75]:
import datetime, os

In [76]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [77]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-21 12:17:59.582572: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-21 12:17:59.582625: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-21 12:17:59.636616: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [78]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [79]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

2021-09-21 12:17:59.730160: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-21 12:17:59.753175: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/500
 22/395 [>.............................] - ETA: 2s - loss: 1.4423 - accuracy: 0.4584

2021-09-21 12:18:00.200411: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-21 12:18:00.200440: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-21 12:18:00.209855: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-21 12:18:00.212224: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-21 12:18:00.215880: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210921-121759/train/plugins/profile/2021_09_21_12_18_00
2021-09-21 12:18:00.216608: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210921-121759/train/plugins/profile/2021_09_21_12_18_00/helemanc-Latitude-5410.trace.json.gz
2021-09-21 12:18:00.222715: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210921-121759/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 41/500
Ep

In [80]:
%tensorboard --logdir logs

In [84]:
model.evaluate(X_test, y_test, batch_size=4)



[0.6245795488357544, 0.698235273361206]

In [82]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.66      0.62      0.64       728
           1       0.73      0.76      0.74       972

    accuracy                           0.70      1700
   macro avg       0.69      0.69      0.69      1700
weighted avg       0.70      0.70      0.70      1700



## Save best model 

In [83]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_11")

2021-09-21 12:21:01.532581: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_11/assets


# Experiment 3.12:  RAVDESS - TESS - SAVEE - CREMA noise

## Read dataframes

In [236]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
preprocess_path_crema = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

df_train_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_train.csv"))
df_val_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_val.csv"))
df_test_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_test.csv")) 

In [237]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess, df_train_crema])
df_val = pd.concat([df_val_rav, df_val_savee, df_train_crema, df_train_crema])
df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_crema ])
#df_test = pd.concat([df_test_rav, df_test_savee ])

In [238]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [239]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|██████████████████████████████████████| 6320/6320 [00:44<00:00, 142.63it/s]
100%|███████████████████████████████████████| 6320/6320 [01:27<00:00, 71.96it/s]
100%|██████████████████████████████████████| 1520/1520 [00:04<00:00, 358.09it/s]
100%|███████████████████████████████████████| 1520/1520 [00:22<00:00, 67.96it/s]
100%|██████████████████████████████████████| 1640/1640 [00:03<00:00, 419.40it/s]
100%|███████████████████████████████████████| 1640/1640 [00:23<00:00, 70.78it/s]


In [240]:
X_train[0].shape

(157, 25)

In [241]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [242]:
np.size(y_val)

1520

In [243]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [244]:
X_train.shape

(6320, 157, 25)

## Shuffle training data

In [245]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [246]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_3/scaler_3_12.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [95]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [96]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-21 12:26:13.176233: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-21 12:26:13.176555: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-21 12:26:13.201266: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-21 12:26:13.201426: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-21 12:26:13.226709: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
527/527 - 22s - loss: 0.6622 - accuracy: 0.6513
Epoch 1/50
527/527 - 22s - loss: 0.9647 - accuracy: 0.5794
Epoch 1/50
527/527 - 22s - loss: 0.9176 - accuracy: 0.5934
Epoch 1/50
527/527 - 22s - loss: 0.6296 - accuracy: 0.6658
Epoch 1/50
527/527 - 22s - loss: 0.8889 - accuracy: 0.5987
Epoch 1/50
527/527 - 23s - loss: 0.6588 - accuracy: 0.6602
Epoch 1/50
527/527 - 23s - loss: 2.3885 - accuracy: 0.5625
Epoch 1/50
527/527 - 24s - loss: 2.6048 - accuracy: 0.5735
Epoch 2/50
527/527 - 21s - loss: 0.4678 - accuracy: 0.7185
Epoch 2/50
527/527 - 21s - loss: 0.6320 - accuracy: 0.6487
Epoch 2/50
527/527 - 21s - loss: 0.6176 - accuracy: 0.6597
Epoch 2/50
527/527 - 21s - loss: 0.4721 - accuracy: 0.7090
Epoch 2/50
527/527 - 22s - loss: 0.6400 - accuracy: 0.6506
Epoch 2/50
527/527 - 21s - loss: 1.0681 - accuracy: 0.6240
Epoch 2/50
527/527 - 22s - loss: 0.4689 - accuracy: 0.7062
Epoch 2/50
527/527 - 22s - loss: 1.1255 - accuracy: 0.6276
Epoch 3/50
527/527 - 22s - loss: 0.4431 - accuracy: 0.72

2021-09-21 12:44:39.053006: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 22s - loss: 0.1955 - accuracy: 0.9134


2021-09-21 12:44:40.240669: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 22s - loss: 0.2194 - accuracy: 0.9037


2021-09-21 12:44:40.662636: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 23s - loss: 0.2077 - accuracy: 0.9091


2021-09-21 12:44:41.077321: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 22s - loss: 0.2403 - accuracy: 0.8785


2021-09-21 12:44:41.587810: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


264/264 - 4s - loss: 0.2955 - accuracy: 0.8609
264/264 - 4s - loss: 0.2495 - accuracy: 0.8766


2021-09-21 12:44:44.443841: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.
2021-09-21 12:44:44.584951: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 21s - loss: 0.3287 - accuracy: 0.8206
264/264 - 4s - loss: 0.2632 - accuracy: 0.8875
Epoch 50/50
527/527 - 21s - loss: 0.3195 - accuracy: 0.8263


2021-09-21 12:44:44.742985: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


264/264 - 4s - loss: 0.2292 - accuracy: 0.9022


2021-09-21 12:44:45.090748: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.
2021-09-21 12:44:45.111030: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 20s - loss: 0.2470 - accuracy: 0.8835
264/264 - 4s - loss: 0.2767 - accuracy: 0.8757


2021-09-21 12:44:45.298041: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.
2021-09-21 12:44:45.485226: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.
2021-09-21 12:44:45.741369: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


264/264 - 2s - loss: 0.3441 - accuracy: 0.8073
264/264 - 2s - loss: 0.3418 - accuracy: 0.8059
264/264 - 2s - loss: 0.2831 - accuracy: 0.8856


2021-09-21 12:44:47.908447: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.
2021-09-21 12:44:47.915629: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.
2021-09-21 12:44:49.136925: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


Epoch 1/50
527/527 - 19s - loss: 2.4939 - accuracy: 0.5662
Epoch 1/50
527/527 - 23s - loss: 0.5880 - accuracy: 0.6625
Epoch 1/50
1054/1054 - 24s - loss: 1.1308 - accuracy: 0.6119
Epoch 1/50
1054/1054 - 26s - loss: 1.1156 - accuracy: 0.6069
Epoch 1/50
1054/1054 - 27s - loss: 1.0950 - accuracy: 0.6149
Epoch 1/50
527/527 - 26s - loss: 0.5863 - accuracy: 0.6523
Epoch 1/50
527/527 - 27s - loss: 0.5887 - accuracy: 0.6666
Epoch 1/50
527/527 - 28s - loss: 0.8340 - accuracy: 0.5775
Epoch 2/50
527/527 - 23s - loss: 1.1078 - accuracy: 0.6236
Epoch 2/50
527/527 - 22s - loss: 0.4668 - accuracy: 0.7230
Epoch 2/50
1054/1054 - 26s - loss: 0.5225 - accuracy: 0.6603
Epoch 2/50
527/527 - 23s - loss: 0.4643 - accuracy: 0.7054
Epoch 2/50
1054/1054 - 26s - loss: 0.5451 - accuracy: 0.6660
Epoch 2/50
527/527 - 23s - loss: 0.4629 - accuracy: 0.7121
Epoch 2/50
1054/1054 - 26s - loss: 0.5445 - accuracy: 0.6431
Epoch 2/50
527/527 - 23s - loss: 0.6643 - accuracy: 0.6418
Epoch 3/50
527/527 - 23s - loss: 0.7777 - ac

2021-09-21 13:03:48.777791: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


264/264 - 4s - loss: 0.3468 - accuracy: 0.8186
Epoch 50/50
527/527 - 23s - loss: 0.1986 - accuracy: 0.9167
264/264 - 4s - loss: 0.2433 - accuracy: 0.8965


2021-09-21 13:03:50.972941: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.
2021-09-21 13:03:51.466055: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.
2021-09-21 13:03:52.567378: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.


264/264 - 3s - loss: 0.2358 - accuracy: 0.9060
Epoch 44/50
1054/1054 - 23s - loss: 0.3640 - accuracy: 0.7646
Epoch 44/50
1054/1054 - 25s - loss: 0.3646 - accuracy: 0.7695


2021-09-21 13:03:53.793629: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


264/264 - 3s - loss: 0.2636 - accuracy: 0.8808
Epoch 50/50
527/527 - 20s - loss: 0.2592 - accuracy: 0.8721
Epoch 44/50
1054/1054 - 23s - loss: 0.3936 - accuracy: 0.7534


2021-09-21 13:03:55.260062: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.
2021-09-21 13:03:55.551478: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


264/264 - 3s - loss: 0.2947 - accuracy: 0.8647


2021-09-21 13:04:00.356855: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.


Epoch 45/50
1054/1054 - 23s - loss: 0.3532 - accuracy: 0.7764
Epoch 45/50
1054/1054 - 23s - loss: 0.3533 - accuracy: 0.7746
Epoch 1/50
527/527 - 25s - loss: 0.8719 - accuracy: 0.5611
Epoch 1/50
527/527 - 24s - loss: 0.8804 - accuracy: 0.5683
Epoch 45/50
1054/1054 - 23s - loss: 0.3814 - accuracy: 0.7600
Epoch 1/50
527/527 - 26s - loss: 2.9708 - accuracy: 0.5298
Epoch 1/50
527/527 - 27s - loss: 2.5304 - accuracy: 0.5542
Epoch 1/50
527/527 - 29s - loss: 2.6651 - accuracy: 0.5479
Epoch 2/50
527/527 - 23s - loss: 0.6809 - accuracy: 0.6247
Epoch 2/50
527/527 - 23s - loss: 0.6719 - accuracy: 0.6260
Epoch 46/50
1054/1054 - 26s - loss: 0.3957 - accuracy: 0.7726
Epoch 2/50
527/527 - 23s - loss: 1.6709 - accuracy: 0.5953
Epoch 46/50
1054/1054 - 26s - loss: 0.3613 - accuracy: 0.7765
Epoch 46/50
1054/1054 - 26s - loss: 0.3987 - accuracy: 0.7346
Epoch 2/50
527/527 - 23s - loss: 1.5049 - accuracy: 0.6003
Epoch 2/50
527/527 - 23s - loss: 1.5164 - accuracy: 0.6039
Epoch 3/50
527/527 - 23s - loss: 0.602

2021-09-21 13:06:26.001350: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


Epoch 50/50
1054/1054 - 26s - loss: 0.3627 - accuracy: 0.7750


2021-09-21 13:06:27.780183: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


Epoch 50/50
1054/1054 - 26s - loss: 0.3727 - accuracy: 0.7643


2021-09-21 13:06:28.866891: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33079900 exceeds 10% of free system memory.


527/527 - 6s - loss: 0.3586 - accuracy: 0.7607
Epoch 7/50
527/527 - 22s - loss: 0.4906 - accuracy: 0.7282
Epoch 7/50
527/527 - 21s - loss: 0.4864 - accuracy: 0.7454
527/527 - 5s - loss: 0.3861 - accuracy: 0.7371


2021-09-21 13:06:33.622205: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


527/527 - 5s - loss: 0.3950 - accuracy: 0.7409


2021-09-21 13:06:34.711954: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


Epoch 7/50
527/527 - 21s - loss: 0.6090 - accuracy: 0.6777


2021-09-21 13:06:35.441615: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66159800 exceeds 10% of free system memory.


Epoch 7/50
527/527 - 20s - loss: 0.5945 - accuracy: 0.6841
Epoch 7/50
527/527 - 19s - loss: 0.6007 - accuracy: 0.6865
Epoch 8/50
527/527 - 19s - loss: 0.4754 - accuracy: 0.7401
Epoch 8/50
527/527 - 21s - loss: 0.4769 - accuracy: 0.7447
Epoch 8/50
527/527 - 20s - loss: 0.5998 - accuracy: 0.6734
Epoch 8/50
527/527 - 21s - loss: 0.5900 - accuracy: 0.6796
Epoch 1/50
527/527 - 26s - loss: 1.2248 - accuracy: 0.6126
Epoch 1/50
527/527 - 25s - loss: 1.1973 - accuracy: 0.6050
Epoch 1/50
527/527 - 27s - loss: 1.2639 - accuracy: 0.6386
Epoch 8/50
527/527 - 22s - loss: 0.6038 - accuracy: 0.6770
Epoch 9/50
527/527 - 23s - loss: 0.4646 - accuracy: 0.7432
Epoch 9/50
527/527 - 22s - loss: 0.4667 - accuracy: 0.7439
Epoch 9/50
527/527 - 22s - loss: 0.5579 - accuracy: 0.7014
Epoch 9/50
527/527 - 23s - loss: 0.5647 - accuracy: 0.6829
Epoch 2/50
527/527 - 22s - loss: 0.5492 - accuracy: 0.6729
Epoch 2/50
527/527 - 23s - loss: 0.5538 - accuracy: 0.6504
Epoch 9/50
527/527 - 21s - loss: 0.5778 - accuracy: 0.68

2021-09-21 13:22:27.613792: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 33064200 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 22s - loss: 0.3624 - accuracy: 0.7940
264/264 - 4s - loss: 0.2959 - accuracy: 0.8804
264/264 - 4s - loss: 0.2993 - accuracy: 0.8884
Epoch 50/50
527/527 - 21s - loss: 0.3658 - accuracy: 0.7918


2021-09-21 13:22:33.534557: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 66144100 exceeds 10% of free system memory.


Epoch 43/50
527/527 - 21s - loss: 0.3054 - accuracy: 0.8341
264/264 - 4s - loss: 0.3742 - accuracy: 0.7741
Epoch 43/50
527/527 - 20s - loss: 0.3046 - accuracy: 0.8360
Epoch 50/50
527/527 - 20s - loss: 0.3611 - accuracy: 0.8047
Epoch 43/50
527/527 - 20s - loss: 0.2995 - accuracy: 0.8445
264/264 - 3s - loss: 0.3858 - accuracy: 0.7470
264/264 - 3s - loss: 0.3831 - accuracy: 0.7901
Epoch 44/50
527/527 - 18s - loss: 0.3193 - accuracy: 0.8213
Epoch 44/50
527/527 - 19s - loss: 0.3053 - accuracy: 0.8334
Epoch 44/50
527/527 - 20s - loss: 0.2858 - accuracy: 0.8490
Epoch 1/50
1054/1054 - 28s - loss: 0.7617 - accuracy: 0.6057
Epoch 1/50
1054/1054 - 28s - loss: 0.7549 - accuracy: 0.6005
Epoch 1/50
1054/1054 - 30s - loss: 0.7472 - accuracy: 0.6092
Epoch 1/50
1054/1054 - 31s - loss: 0.6247 - accuracy: 0.6473
Epoch 1/50
1054/1054 - 32s - loss: 0.6483 - accuracy: 0.6231
Epoch 45/50
527/527 - 23s - loss: 0.2990 - accuracy: 0.8374
Epoch 45/50
527/527 - 23s - loss: 0.2983 - accuracy: 0.8351
Epoch 45/50
52

In [97]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9003167549769083 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
 mean=0.8741, std=0.01012 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8888, std=0.0105 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8106, std=0.005694 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7462, std=0.01035 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8945, std=0.0104 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8778, std=0.009833 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7704, std=0.01778 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8369, std=0.01905 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8926, std=0.00455 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9003, std=0.001759 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size':

## Train with best parameters

In [98]:
#Best Accuracy 0.9003167549769083 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,25), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [99]:
seed = 7
np.random.seed(seed)

In [100]:
model = create_model()

In [101]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [102]:
import datetime, os

In [103]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [104]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-22 12:05:16.170238: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 12:05:16.170350: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 12:05:16.172112: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [105]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [106]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  31/1580 [..............................] - ETA: 9s - loss: 0.8171 - accuracy: 0.5250 

2021-09-22 12:05:25.369374: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 12:05:25.369398: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 12:05:25.433143: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-22 12:05:25.433952: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-22 12:05:25.435197: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-120515/train/plugins/profile/2021_09_22_12_05_25
2021-09-22 12:05:25.435890: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210922-120515/train/plugins/profile/2021_09_22_12_05_25/helemanc-Latitude-5410.trace.json.gz
2021-09-22 12:05:25.437013: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-120515/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

Epoch 00007: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500

Epoch 00042: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500

Epoch 00057: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 58/500
Epoch 59/500
Epoch 60/50

In [107]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 5407), started 1 day, 0:41:57 ago. (Use '!kill 5407' to kill it.)

In [108]:
model.evaluate(X_test, y_test, batch_size=4)



[0.6174464821815491, 0.699999988079071]

In [109]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.67      0.61      0.64       716
           1       0.72      0.77      0.74       924

    accuracy                           0.70      1640
   macro avg       0.70      0.69      0.69      1640
weighted avg       0.70      0.70      0.70      1640



## Save best model 

In [110]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_12")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_3/model_3_12/assets
