# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 


In [12]:
!source myenv/bin/activate

In [13]:
# samples in 5 seconds of audio, 16 KHz sample rate 
LENGTH_CHOSEN =  80000

In [22]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold


from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam 
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

[]

# Utils

In [23]:
def load_files(df):
    X = []
    for i in tqdm(df['path']): 
        X.append(librosa.load(i, res_type='kaiser_fast', sr=16000))
    return X

def extract_samples(X): 
    samples = []
    for ind,i in enumerate(X):
        samples.append(i[0])
    return samples 

def extract_labels(df): 
    labels = df['emotion_label'].copy()
    return labels 

def compute_lengths(samples): 
    lengths = [len(x) for x in samples]
    return lengths 

def check_outliers(lengths):
    # outliers
    lengths = np.array(lengths)
    print((lengths > 300000).sum())
    new_lengths = lengths[lengths < 300000]
    return new_lengths 

def compute_mean_length(lengths): 
    return lengths.mean()


def cut_and_pad(samples, labels, length_chosen = LENGTH_CHOSEN): 
    X_new = []
    y_new = []
    count = 0 
    for ind,i in enumerate(samples):
        if i.shape[0] < 300000:
            if i.shape[0] > length_chosen:
                new = i[:length_chosen]
                X_new.append(new)
            elif i.shape[0] < length_chosen:
                new = np.pad(i,math.ceil((length_chosen-i.shape[0])/2), mode='median')
                X_new.append(new)
            else:
                X_new.append(i)
            y_new.append(labels[count])
        count+=1
    
    return X_new, y_new
    
def compute_mfccs(samples, n_mfcc): 
    mfccs = []
    for i in tqdm(samples):
        mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=n_mfcc)
        mfcc = mfcc.T
        mfcc = np.array(mfcc)
        mfccs.append(mfcc[:, 1:]) # get rid of the first component 
    mfccs = np.array(mfccs)
    return mfccs

def compute_energy(samples): 
    energy_per_sample = []
    for i in tqdm(samples):
        energy = librosa.feature.rms(i)
        energy = energy.T 
        energy = np.array(energy)
        energy_per_sample.append(energy) 
    return energy_per_sample
       
def feature_extractor(df_train, df_val, df_test, n_mfcc): 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    # energy 
    energy_train = compute_energy(samples_train) 
    features_train = []
    for i in range(len(mfccs_train)): 
        if len(mfccs_train) == len(energy_train): 
            conc = np.concatenate((mfccs_train[i], energy_train[i]), axis = 1)
            features_train.append(conc)

    
    
  
    load_val = load_files(df_val)
    samples_val = extract_samples(load_val)
    labels_val = extract_labels(df_val)
    samples_val, labels_val = cut_and_pad(samples_val, labels_val)
    samples_val = np.array(samples_val)
    labels_val = np.array(labels_val)
    mfccs_val = compute_mfccs(samples_val, n_mfcc = n_mfcc)
    # energy 
    energy_val = compute_energy(samples_val) 
    features_val = []
    for i in range(len(mfccs_val)): 
        if len(mfccs_val) == len(energy_val): 
            conc = np.concatenate((mfccs_val[i], energy_val[i]), axis = 1)
            features_val.append(conc)
    
    
    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    # energy 
    energy_test = compute_energy(samples_test) 
    features_test=[]
    for i in range(len(mfccs_test)): 
        if len(mfccs_test) == len(energy_test): 
            conc = np.concatenate((mfccs_test[i], energy_test[i]), axis = 1)
            features_test.append(conc)
    

    return np.array(features_train), labels_train,  np.array(features_val), labels_val, np.array(features_test), labels_test
    

def feature_extractor_tess(df_train,  df_test, n_mfcc): 
    # we do not have the validation set here 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    # energy 
    energy_train = compute_energy(samples_train) 
    features_train = []
    for i in range(len(mfccs_train)): 
        if len(mfccs_train) == len(energy_train): 
            conc = np.concatenate((mfccs_train[i], energy_train[i]), axis = 1)
            features_train.append(conc)

    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    # energy 
    energy_test = compute_energy(samples_test) 
    features_test=[]
    for i in range(len(mfccs_test)): 
        if len(mfccs_test) == len(energy_test): 
            conc = np.concatenate((mfccs_test[i], energy_test[i]), axis = 1)
            features_test.append(conc)
    
    
    return np.array(features_train), labels_train, np.array(features_test), labels_test
    
def encode_labels(labels_train, labels_val, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    y_val = pd.Series(labels_val).map(emotion_enc)
    return y_train, y_val, y_test 


def encode_labels_tess(labels_train, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    return y_train, y_test
    
def standard_scaling(X_train, X_val, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    return X_train, X_val, X_test, scaler 
    
def standard_scaling_tess(X_train, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    return X_train, X_test, scaler   
    


# Compute dataframes for datasets and split in Train, Val, Test 

In [24]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

## RADVESS

In [25]:
lst = []
emotion = []
voc_channel = []
full_path = []
modality = []
intensity = []
actors = []
phrase =[]

for root, dirs, files in tqdm(os.walk(RAV)):
    for file in files:
        try:
            #Load librosa array, obtain mfcss, store the file and the mfcss information in a new array
            # X, sample_rate = librosa.load(os.path.join(root,file), res_type='kaiser_fast')
            # mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
            # The instruction below converts the labels (from 1 to 8) to a series from 0 to 7
            # This is because our predictor needs to start from 0 otherwise it will try to predict also 0.
           
            modal = int(file[1:2])
            vchan = int(file[4:5])
            lab = int(file[7:8])
            ints = int(file[10:11])
            phr = int(file[13:14])
            act = int(file[18:20])
            # arr = mfccs, lab
            # lst.append(arr)
            
            modality.append(modal)
            voc_channel.append(vchan)
            emotion.append(lab) #only labels
            intensity.append(ints)
            phrase.append(phr)
            actors.append(act)
            
            full_path.append((root, file)) # only files
          # If the file is not valid, skip it
        except ValueError:
            continue

25it [00:00, 512.80it/s]


In [26]:
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# merge neutral and calm
emotions_list = ['neutral', 'neutral', 'happy', 'sadness', 'angry', 'fear', 'disgust', 'surprise']
emotion_dict = {em[0]+1:em[1] for em in enumerate(emotions_list)}

df = pd.DataFrame([emotion, voc_channel, modality, intensity, actors, actors,phrase, full_path]).T
df.columns = ['emotion', 'voc_channel', 'modality', 'intensity', 'actors', 'gender', 'phrase', 'path']
df['emotion'] = df['emotion'].map(emotion_dict)
df['voc_channel'] = df['voc_channel'].map({1: 'speech', 2:'song'})
df['modality'] = df['modality'].map({1: 'full AV', 2:'video only', 3:'audio only'})
df['intensity'] = df['intensity'].map({1: 'normal', 2:'strong'})
df['actors'] = df['actors']
df['gender'] = df['actors'].apply(lambda x: 'female' if x%2 == 0 else 'male')
df['phrase'] = df['phrase'].map({1: 'Kids are talking by the door', 2:'Dogs are sitting by the door'})
df['path'] = df['path'].apply(lambda x: x[0] + '/' + x[1])

In [27]:
# remove files with noise to apply the same noise to all files for data augmentation 
df = df[~df.path.str.contains('noise')]

In [28]:
df.head()

Unnamed: 0,emotion,voc_channel,modality,intensity,actors,gender,phrase,path
0,disgust,speech,audio only,normal,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [29]:
# only speech
RAV_df = df
RAV_df = RAV_df.loc[RAV_df.voc_channel == 'speech']

In [30]:
RAV_df.insert(0, "emotion_label", RAV_df.emotion, True)

In [31]:
RAV_df = RAV_df.drop(['emotion', 'voc_channel', 'modality', 'intensity', 'phrase'], 1)

In [32]:
RAV_df

Unnamed: 0,emotion_label,actors,gender,path
0,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
...,...,...,...,...
2871,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2873,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2875,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2877,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [33]:
RAV_train = []
RAV_val = []
RAV_test = []

In [34]:
for index, row in RAV_df.iterrows():
    if row['actors'] in range(1,21): 
        RAV_train.append(row) 
    elif row['actors'] in range(21,23): 
        RAV_val.append(row)
    elif row['actors'] in range(23,25): 
        RAV_test.append(row)
len(RAV_train), len(RAV_val), len(RAV_test)

(1200, 120, 120)

In [35]:
RAV_train = pd.DataFrame(RAV_train)
RAV_val = pd.DataFrame(RAV_val)
RAV_test = pd.DataFrame(RAV_test)

In [36]:
RAV_train = RAV_train.drop(['actors'], 1)
RAV_val = RAV_val.drop(['actors'], 1)
RAV_test = RAV_test.drop(['actors'], 1)

In [37]:
RAV_train.reset_index(drop=True, inplace = True) 
RAV_val.reset_index(drop=True, inplace = True) 
RAV_test.reset_index(drop=True, inplace = True ) 

## SAVEE

In [38]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
actors = []
gender = []
for i in dir_list:
    actors.append(i[:2])
    if i[-8:-6]=='_a':
        emotion.append('angry')
        gender.append('male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
        gender.append('male')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        gender.append('male')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        gender.append('male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
        gender.append('male')
    elif i[-8:-6]=='sa':
        emotion.append('sadness')
        gender.append('male')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        gender.append('male') 
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
                      
SAVEE_df = pd.concat([SAVEE_df,
                      pd.DataFrame(actors, columns = ['actors']),
                      pd.DataFrame(gender, columns = ['gender']), 
                      pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

neutral     120
sadness      60
surprise     60
happy        60
disgust      60
fear         60
angry        60
Name: emotion_label, dtype: int64

In [39]:
SAVEE_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,neutral,DC,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [40]:
SAVEE_train = []
SAVEE_val = []
SAVEE_test = []

In [41]:
#DC, JE, JK, KL
for index, row in SAVEE_df.iterrows(): 
    if row['actors'] == 'DC' or row ['actors'] == 'JE':
        SAVEE_train.append(row)
    elif row['actors'] == 'JK': 
        SAVEE_val.append(row)
    else: 
        SAVEE_test.append(row)
len(SAVEE_train), len(SAVEE_val), len(SAVEE_test)

(240, 120, 120)

In [42]:
SAVEE_train = pd.DataFrame(SAVEE_train)
SAVEE_val = pd.DataFrame(SAVEE_val)
SAVEE_test = pd.DataFrame(SAVEE_test)

In [43]:
SAVEE_train = SAVEE_train.drop(['actors'], 1)
SAVEE_val = SAVEE_val.drop(['actors'], 1)
SAVEE_test = SAVEE_test.drop(['actors'], 1)

In [44]:
SAVEE_train = SAVEE_train.reset_index(drop=True) 
SAVEE_val = SAVEE_val.reset_index(drop=True) 
SAVEE_test = SAVEE_test.reset_index(drop=True) 

## TESS

In [45]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []
gender = []
actors = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry':
            emotion.append('angry')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_angry': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_disgust' :
            emotion.append('disgust')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_disgust': 
            emotion.append('disgust')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_Fear':
            emotion.append('fear')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_fear': 
            emotion.append('fear')
            gender.append('female')
            actors.append('YAF') 
            
            
        elif i == 'OAF_happy' :
            emotion.append('happy')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_happy': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_neutral':
            emotion.append('neutral')
            gender.append('female')
            actors.append('OAF')   
        elif i == 'YAF_neutral': 
            emotion.append('neutral')
            gender.append('female')
            actors.append('YAF')      
            
                
        elif i == 'OAF_Pleasant_surprise':
            emotion.append('surprise')
            gender.append('female')
            actors.append('OAF')
        
        elif i == 'YAF_pleasant_surprised': 
            emotion.append('surprise')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_Sad':
            emotion.append('sadness')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_sad': 
            emotion.append('sadness')
            gender.append('female')
            actors.append('YAF')            
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns = ['gender']), 
                     pd.DataFrame(actors, columns= ['actors']),
                     pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

angry       1200
fear         800
surprise     800
sadness      800
disgust      800
neutral      800
happy        400
Name: emotion_label, dtype: int64

In [46]:
TESS_df= TESS_df[~TESS_df.path.str.contains('noise')]

In [47]:
TESS_train = []
TESS_test = []

In [48]:
for index, row in TESS_df.iterrows(): 
    if row['actors'] == 'YAF': 
        TESS_train.append(row)
    else: 
        TESS_test.append(row)
len(TESS_train), len(TESS_test)

(1400, 1400)

In [49]:
TESS_train = pd.DataFrame(TESS_train)
TESS_test = pd.DataFrame(TESS_test)

In [50]:
TESS_train = TESS_train.reset_index(drop=True) 
TESS_test  = TESS_test.reset_index(drop=True) 

## CREMA-D

In [51]:
males = [1,
5,
11,
14,
15,
16,
17,
19,
22,
23,
26,
27,
31,
32,
33,
34,
35,
36,
38,
39,
41,
42,
44,
45,
48,
50,
51,
57,
59, 
62, 
64,
65, 
66,
67,
68,
69,
70,
71,
77, 
80, 
81, 
83, 
85, 
86, 
87,
88, 
90]

In [52]:
females = [ 2,
3,
4,
6,
7,
8,
9,
10,
12,
13,
18,
20,
21,
24,
25,
28,
29,
30,
37,
40,
43,
46,
47,
49,
52,
53,
54,
55,
56, 
58, 
60,
61,
63,
72, 
73, 
74, 
75, 
76, 
78, 
79, 
82, 
84, 
89, 
91]

In [53]:
crema_directory_list = os.listdir(CREMA)

file_emotion = []
file_path = []
actors = []
gender = []




for file in crema_directory_list:

    # storing file emotions
    part=file.split('_')
    
    # use only high intensity files
    if "HI" in part[3] :
        actor = part[0][2:]
        actors.append(actor)
        if int(actor) in males:
            gender.append('male')
        else: 
            gender.append('female')
    
        # storing file paths
        file_path.append(CREMA + file)
        if part[2] == 'SAD':
            file_emotion.append('sadness')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['emotion_label'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['path'])
actors_df = pd.DataFrame(actors, columns=['actors'])
gender_df = pd.DataFrame(gender, columns=['gender'])                      
Crema_df = pd.concat([emotion_df, actors_df, gender_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,happy,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,angry,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,fear,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [54]:
Crema_df.shape

(455, 4)

In [55]:
actor_files = {}

for index, row in Crema_df.iterrows():
    actor = row['actors']
    if actor not in actor_files.keys(): 
        actor_files[actor] = 1
    else: 
        actor_files[actor]+=1

In [56]:
actor_files

{'91': 5,
 '90': 5,
 '89': 5,
 '88': 5,
 '87': 5,
 '86': 5,
 '85': 5,
 '84': 5,
 '83': 5,
 '82': 5,
 '81': 5,
 '80': 5,
 '79': 5,
 '78': 5,
 '77': 5,
 '76': 5,
 '75': 5,
 '74': 5,
 '73': 5,
 '72': 5,
 '71': 5,
 '70': 5,
 '69': 5,
 '68': 5,
 '67': 5,
 '66': 5,
 '65': 5,
 '64': 5,
 '63': 5,
 '62': 5,
 '61': 5,
 '60': 5,
 '59': 5,
 '58': 5,
 '57': 5,
 '56': 5,
 '55': 5,
 '54': 5,
 '53': 5,
 '52': 5,
 '51': 5,
 '50': 5,
 '49': 5,
 '48': 5,
 '47': 5,
 '46': 5,
 '45': 5,
 '44': 5,
 '43': 5,
 '42': 5,
 '41': 5,
 '40': 5,
 '39': 5,
 '38': 5,
 '37': 5,
 '36': 5,
 '35': 5,
 '34': 5,
 '33': 5,
 '32': 5,
 '31': 5,
 '30': 5,
 '29': 5,
 '28': 5,
 '27': 5,
 '26': 5,
 '25': 5,
 '24': 5,
 '23': 5,
 '22': 5,
 '21': 5,
 '20': 5,
 '19': 5,
 '18': 5,
 '17': 5,
 '16': 5,
 '15': 5,
 '14': 5,
 '13': 5,
 '12': 5,
 '11': 5,
 '10': 5,
 '09': 5,
 '08': 5,
 '07': 5,
 '06': 5,
 '05': 5,
 '04': 5,
 '03': 5,
 '02': 5,
 '01': 5}

In [57]:
count_males = 0 
count_females = 0 
male_list = []
for index, row in Crema_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1

In [58]:
count_males, count_females

(235, 220)

Since there are more males than females we will remove randomly 3 male actors (since there are exactly 5 audio files per actor)

In [59]:
import random 
random.seed(42)
males_to_remove = random.sample(male_list, 3)
males_to_remove

['17', '80', '88']

In [60]:
new_df = []
for index, row in Crema_df.iterrows(): 
    if row['actors'] not in males_to_remove: 
        new_df.append(row)

In [61]:
CREMA_df = pd.DataFrame(new_df)

In [62]:
for index, row in CREMA_df.iterrows(): 
    if row['actors'] == '17': 
        print("Elements not removed")

In [63]:
count_males = 0 
count_females = 0 
male_list = []
female_list = []
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1
        if actor not in female_list: 
            female_list.append(actor)

In [64]:
count_males, count_females

(220, 220)

In [65]:
len(female_list)

44

In [66]:
len(male_list)

44

In [67]:
CREMA_train = []
CREMA_val = []
CREMA_test = []

In [68]:
females_train = random.sample(female_list, 32)
males_train = random.sample(male_list, 32)

# remove the elements assigned to train 
for element in females_train:
    if element in female_list:
        female_list.remove(element)
        
for element in males_train:
    if element in male_list:
        male_list.remove(element)

         
females_val = random.sample(female_list, 6) 
males_val = random.sample(male_list, 6) 

# remove the elements assigned to val
for element in females_val:
    if element in female_list:
        female_list.remove(element)
        
for element in males_val:
    if element in male_list:
        male_list.remove(element)
        
females_test = random.sample(female_list, 6) 
males_test = random.sample(male_list, 6)        

In [69]:
females_train, males_train, females_val, males_val, females_test, males_test

(['54',
  '56',
  '58',
  '74',
  '76',
  '13',
  '78',
  '29',
  '84',
  '89',
  '09',
  '60',
  '04',
  '55',
  '52',
  '91',
  '02',
  '07',
  '46',
  '49',
  '37',
  '10',
  '20',
  '75',
  '21',
  '53',
  '06',
  '28',
  '18',
  '63',
  '30',
  '03'],
 ['57',
  '69',
  '65',
  '45',
  '77',
  '81',
  '41',
  '15',
  '44',
  '23',
  '59',
  '86',
  '34',
  '01',
  '85',
  '66',
  '31',
  '33',
  '05',
  '48',
  '50',
  '67',
  '51',
  '22',
  '36',
  '87',
  '71',
  '39',
  '42',
  '11',
  '32',
  '14'],
 ['43', '61', '40', '47', '73', '24'],
 ['62', '68', '64', '83', '70', '26'],
 ['08', '79', '12', '25', '72', '82'],
 ['16', '19', '38', '35', '27', '90'])

In [70]:
train = females_train + males_train 
val = females_val + males_val 
test = females_test + males_test

In [71]:
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if actor in train: 
        CREMA_train.append(row)
    elif actor in val: 
        CREMA_val.append(row)
    else:
        CREMA_test.append(row)

In [72]:
CREMA_train = pd.DataFrame(CREMA_train) 
CREMA_val = pd.DataFrame(CREMA_val) 
CREMA_test = pd.DataFrame(CREMA_test)

In [73]:
CREMA_train.shape, CREMA_val.shape, CREMA_test.shape

((320, 4), (60, 4), (60, 4))

In [74]:
CREMA_train = CREMA_train.reset_index(drop=True) 
CREMA_val = CREMA_val.reset_index(drop = True) 

# Model

In [75]:
def create_model( init_mode='glorot_uniform', lr = 0.001, input_dim=(157, 26)):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=input_dim, kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.5))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

# Experiment 4.1 : RAVDESS

In [89]:
df_train = RAV_train
df_val = RAV_val
df_test = RAV_test

In [90]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [91]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|█████████████████████████████████████| 1200/1200 [00:00<00:00, 2149.43it/s]
100%|███████████████████████████████████████| 1200/1200 [00:13<00:00, 89.15it/s]
100%|█████████████████████████████████████| 1200/1200 [00:00<00:00, 1741.22it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 2542.03it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 91.96it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1165.31it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 2175.88it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 89.38it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1154.95it/s]


In [92]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [93]:
np.size(y_val)

120

In [94]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [95]:
X_train.shape

(1200, 157, 26)

## Shuffle training data

In [96]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [97]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_1.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [None]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

In [None]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

## Train with best parameters

In [None]:
#Best Accuracy 0.7074999809265137 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [None]:
seed = 7
np.random.seed(seed)

In [None]:
model = create_model()

In [None]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [None]:
import datetime, os

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [9]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

NameError: name 'logdir' is not defined

In [10]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

NameError: name 'y_train' is not defined

In [11]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

NameError: name 'model' is not defined

In [83]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 5407), started 1 day, 3:46:37 ago. (Use '!kill 5407' to kill it.)

In [79]:
model.evaluate(X_test, y_test, batch_size=8)



[0.28724825382232666, 0.8916666507720947]

In [80]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.86      0.91      0.89        56
           1       0.92      0.88      0.90        64

    accuracy                           0.89       120
   macro avg       0.89      0.89      0.89       120
weighted avg       0.89      0.89      0.89       120



## Save best model 

In [81]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_1")

2021-09-22 16:01:28.193018: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_1/assets


# Experiment 4.2 : RAVDESS noise

## Read dataframes

In [100]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [101]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|█████████████████████████████████████| 2400/2400 [00:00<00:00, 2493.20it/s]
100%|███████████████████████████████████████| 2400/2400 [00:31<00:00, 77.11it/s]
100%|█████████████████████████████████████| 2400/2400 [00:01<00:00, 1859.74it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 2298.42it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 65.44it/s]
100%|████████████████████████████████████████| 120/120 [00:00<00:00, 949.56it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1927.01it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 71.09it/s]
100%|████████████████████████████████████████| 120/120 [00:00<00:00, 952.58it/s]


In [102]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [103]:
np.size(y_val)

120

In [104]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [105]:
X_train.shape

(2400, 157, 26)

## Shuffle training data

In [106]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [107]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_2.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [91]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [92]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-22 16:10:58.805321: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 16:10:58.805326: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 16:10:58.805321: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 16:10:58.805349: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-22 16:10:58.805349: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your mac

Epoch 1/50
200/200 - 9s - loss: 0.9153 - accuracy: 0.5394
Epoch 1/50
200/200 - 9s - loss: 0.9038 - accuracy: 0.5500
Epoch 1/50
200/200 - 9s - loss: 3.2796 - accuracy: 0.5225
Epoch 1/50
200/200 - 10s - loss: 1.5762 - accuracy: 0.5119
Epoch 1/50
200/200 - 10s - loss: 3.3576 - accuracy: 0.5369
Epoch 1/50
200/200 - 10s - loss: 1.2796 - accuracy: 0.5131
Epoch 1/50
200/200 - 10s - loss: 1.2562 - accuracy: 0.5369
Epoch 1/50
200/200 - 10s - loss: 0.9434 - accuracy: 0.5694
Epoch 2/50
200/200 - 9s - loss: 0.6701 - accuracy: 0.6025
Epoch 2/50
200/200 - 9s - loss: 0.6730 - accuracy: 0.5987
Epoch 2/50
200/200 - 8s - loss: 2.0962 - accuracy: 0.5456
Epoch 2/50
200/200 - 8s - loss: 1.0188 - accuracy: 0.5581
Epoch 2/50
200/200 - 9s - loss: 2.1560 - accuracy: 0.5612
Epoch 2/50
200/200 - 9s - loss: 0.8941 - accuracy: 0.5688
Epoch 2/50
200/200 - 9s - loss: 0.6749 - accuracy: 0.5919
Epoch 2/50
200/200 - 9s - loss: 0.9201 - accuracy: 0.5537
Epoch 3/50
200/200 - 8s - loss: 0.6640 - accuracy: 0.6175
Epoch 3/5

2021-09-22 16:18:03.258083: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:18:03.331496: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:18:03.337440: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


100/100 - 1s - loss: 0.5820 - accuracy: 0.7538


2021-09-22 16:18:03.768229: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:18:04.000516: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:18:04.036527: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


100/100 - 1s - loss: 0.4856 - accuracy: 0.7950
Epoch 1/50
400/400 - 8s - loss: 1.9082 - accuracy: 0.5394
Epoch 1/50
400/400 - 9s - loss: 1.8804 - accuracy: 0.5562
Epoch 1/50
200/200 - 9s - loss: 3.1131 - accuracy: 0.5400
Epoch 1/50
400/400 - 10s - loss: 1.9453 - accuracy: 0.5556
Epoch 1/50
200/200 - 10s - loss: 0.8077 - accuracy: 0.5412
Epoch 1/50
200/200 - 10s - loss: 0.8082 - accuracy: 0.5650
Epoch 1/50
200/200 - 11s - loss: 0.8041 - accuracy: 0.5663
Epoch 1/50
200/200 - 13s - loss: 1.1100 - accuracy: 0.5175
Epoch 2/50
200/200 - 9s - loss: 2.0860 - accuracy: 0.5381
Epoch 2/50
400/400 - 10s - loss: 0.6930 - accuracy: 0.5781
Epoch 2/50
400/400 - 11s - loss: 0.7114 - accuracy: 0.5406
Epoch 2/50
200/200 - 8s - loss: 0.6708 - accuracy: 0.5894
Epoch 2/50
200/200 - 8s - loss: 0.6645 - accuracy: 0.6356
Epoch 2/50
200/200 - 9s - loss: 0.6633 - accuracy: 0.6237
Epoch 2/50
400/400 - 10s - loss: 0.6975 - accuracy: 0.5575
Epoch 2/50
200/200 - 9s - loss: 0.8693 - accuracy: 0.5775
Epoch 3/50
200/20

2021-09-22 16:25:35.768524: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


100/100 - 2s - loss: 0.5686 - accuracy: 0.7275
Epoch 45/50
400/400 - 8s - loss: 0.4013 - accuracy: 0.7944
100/100 - 2s - loss: 0.4783 - accuracy: 0.8138
Epoch 45/50
400/400 - 7s - loss: 0.4274 - accuracy: 0.7794
100/100 - 2s - loss: 0.2601 - accuracy: 0.8825


2021-09-22 16:25:38.224180: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:25:38.277219: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


Epoch 46/50
400/400 - 7s - loss: 0.3464 - accuracy: 0.8319
Epoch 46/50
400/400 - 8s - loss: 0.3843 - accuracy: 0.8125
Epoch 1/50
200/200 - 9s - loss: 1.0918 - accuracy: 0.5288
Epoch 46/50
400/400 - 8s - loss: 0.4317 - accuracy: 0.7713
Epoch 1/50
200/200 - 12s - loss: 1.0311 - accuracy: 0.5437
Epoch 47/50
400/400 - 9s - loss: 0.3452 - accuracy: 0.8325
Epoch 1/50
200/200 - 12s - loss: 3.5810 - accuracy: 0.5188
Epoch 1/50
200/200 - 13s - loss: 3.2351 - accuracy: 0.5206
Epoch 1/50
200/200 - 13s - loss: 3.3358 - accuracy: 0.5181
Epoch 2/50
200/200 - 9s - loss: 0.9261 - accuracy: 0.5512
Epoch 47/50
400/400 - 10s - loss: 0.3912 - accuracy: 0.8031
Epoch 47/50
400/400 - 10s - loss: 0.4360 - accuracy: 0.7650
Epoch 2/50
200/200 - 9s - loss: 0.9076 - accuracy: 0.5400
Epoch 48/50
400/400 - 9s - loss: 0.3216 - accuracy: 0.8469
Epoch 2/50
200/200 - 10s - loss: 2.8189 - accuracy: 0.5244
Epoch 2/50
200/200 - 9s - loss: 2.4250 - accuracy: 0.5506
Epoch 2/50
200/200 - 9s - loss: 2.5843 - accuracy: 0.5306


2021-09-22 16:26:24.865847: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


Epoch 5/50
200/200 - 7s - loss: 1.5065 - accuracy: 0.5669
Epoch 50/50
400/400 - 9s - loss: 0.3934 - accuracy: 0.8019
Epoch 5/50
200/200 - 8s - loss: 1.4539 - accuracy: 0.5606
Epoch 5/50
200/200 - 8s - loss: 1.6942 - accuracy: 0.5419
200/200 - 3s - loss: 0.4444 - accuracy: 0.7750
Epoch 6/50
200/200 - 7s - loss: 0.7030 - accuracy: 0.6012
200/200 - 3s - loss: 0.4752 - accuracy: 0.7563


2021-09-22 16:26:28.531108: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


Epoch 6/50
200/200 - 7s - loss: 0.6703 - accuracy: 0.6069
Epoch 6/50
200/200 - 7s - loss: 1.2260 - accuracy: 0.5838
Epoch 6/50
200/200 - 6s - loss: 1.2809 - accuracy: 0.5838
Epoch 6/50
200/200 - 7s - loss: 1.2196 - accuracy: 0.5587
Epoch 7/50
200/200 - 6s - loss: 0.6850 - accuracy: 0.6112
Epoch 1/50
200/200 - 12s - loss: 1.9912 - accuracy: 0.5594
Epoch 7/50
200/200 - 7s - loss: 0.6510 - accuracy: 0.6206
Epoch 7/50
200/200 - 8s - loss: 1.0908 - accuracy: 0.5825
Epoch 1/50
200/200 - 12s - loss: 1.8180 - accuracy: 0.5512
Epoch 7/50
200/200 - 8s - loss: 1.0839 - accuracy: 0.5713
Epoch 7/50
200/200 - 8s - loss: 1.1864 - accuracy: 0.5813
Epoch 8/50
200/200 - 9s - loss: 0.6360 - accuracy: 0.6463
Epoch 1/50
200/200 - 13s - loss: 1.9865 - accuracy: 0.5512
Epoch 8/50
200/200 - 8s - loss: 0.6597 - accuracy: 0.6175
Epoch 2/50
200/200 - 9s - loss: 0.7227 - accuracy: 0.5825
Epoch 8/50
200/200 - 9s - loss: 1.0143 - accuracy: 0.5863
Epoch 8/50
200/200 - 8s - loss: 1.0109 - accuracy: 0.5550
Epoch 2/50


2021-09-22 16:32:49.864629: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


Epoch 44/50
200/200 - 7s - loss: 0.4212 - accuracy: 0.7925
Epoch 50/50
200/200 - 7s - loss: 0.5769 - accuracy: 0.6831
Epoch 50/50
200/200 - 7s - loss: 0.5547 - accuracy: 0.6988
Epoch 50/50
200/200 - 7s - loss: 0.5756 - accuracy: 0.6831
100/100 - 2s - loss: 0.6426 - accuracy: 0.7000
100/100 - 2s - loss: 0.6424 - accuracy: 0.6450
100/100 - 2s - loss: 0.6459 - accuracy: 0.6750


2021-09-22 16:32:53.458070: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.
2021-09-22 16:32:53.468994: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


Epoch 44/50
200/200 - 6s - loss: 0.3248 - accuracy: 0.8494
Epoch 45/50
200/200 - 5s - loss: 0.4045 - accuracy: 0.7962
Epoch 45/50
200/200 - 6s - loss: 0.4111 - accuracy: 0.7937
Epoch 1/50
400/400 - 10s - loss: 0.9775 - accuracy: 0.5069
Epoch 46/50
200/200 - 6s - loss: 0.3724 - accuracy: 0.8300
Epoch 45/50
200/200 - 7s - loss: 0.2880 - accuracy: 0.8775
Epoch 1/50
400/400 - 12s - loss: 0.9879 - accuracy: 0.5456
Epoch 46/50
200/200 - 7s - loss: 0.4011 - accuracy: 0.8075
Epoch 1/50
400/400 - 14s - loss: 1.0055 - accuracy: 0.5550
Epoch 1/50
400/400 - 14s - loss: 0.7553 - accuracy: 0.5512
Epoch 1/50
400/400 - 14s - loss: 0.7516 - accuracy: 0.5600
Epoch 47/50
200/200 - 9s - loss: 0.3730 - accuracy: 0.8087
Epoch 2/50
400/400 - 10s - loss: 0.7244 - accuracy: 0.5869
Epoch 46/50
200/200 - 10s - loss: 0.2750 - accuracy: 0.8900
Epoch 2/50
400/400 - 10s - loss: 0.7742 - accuracy: 0.5700
Epoch 47/50
200/200 - 9s - loss: 0.3924 - accuracy: 0.8163
Epoch 2/50
400/400 - 10s - loss: 0.7692 - accuracy: 0.5

2021-09-22 16:33:42.078637: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 26124800 exceeds 10% of free system memory.


100/100 - 3s - loss: 0.4349 - accuracy: 0.7912
Epoch 50/50
200/200 - 7s - loss: 0.2589 - accuracy: 0.8931
Epoch 5/50
400/400 - 8s - loss: 0.6270 - accuracy: 0.6419
Epoch 5/50
400/400 - 9s - loss: 0.5936 - accuracy: 0.6675
Epoch 5/50
400/400 - 9s - loss: 0.6115 - accuracy: 0.6513
100/100 - 2s - loss: 0.3342 - accuracy: 0.8788
Epoch 6/50
400/400 - 9s - loss: 0.6105 - accuracy: 0.6606
Epoch 6/50
400/400 - 7s - loss: 0.6177 - accuracy: 0.6575
Epoch 6/50
400/400 - 8s - loss: 0.6222 - accuracy: 0.6431
Epoch 6/50
400/400 - 8s - loss: 0.5667 - accuracy: 0.7125
Epoch 1/50
400/400 - 12s - loss: 0.7697 - accuracy: 0.5456
Epoch 6/50
400/400 - 8s - loss: 0.5905 - accuracy: 0.6731
Epoch 7/50
400/400 - 7s - loss: 0.5969 - accuracy: 0.6762
Epoch 7/50
400/400 - 8s - loss: 0.6107 - accuracy: 0.6456
Epoch 7/50
400/400 - 8s - loss: 0.5995 - accuracy: 0.6513
Epoch 7/50
400/400 - 8s - loss: 0.5633 - accuracy: 0.6956
Epoch 2/50
400/400 - 8s - loss: 0.6775 - accuracy: 0.6031
Epoch 7/50
400/400 - 9s - loss: 0.

2021-09-22 16:39:50.732376: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 39187200 exceeds 10% of free system memory.


Epoch 1/50
300/300 - 2s - loss: 0.7496 - accuracy: 0.5763
Epoch 2/50
300/300 - 2s - loss: 0.6493 - accuracy: 0.6250
Epoch 3/50
300/300 - 2s - loss: 0.6221 - accuracy: 0.6533
Epoch 4/50
300/300 - 2s - loss: 0.5973 - accuracy: 0.6658
Epoch 5/50
300/300 - 2s - loss: 0.5510 - accuracy: 0.7033
Epoch 6/50
300/300 - 2s - loss: 0.5339 - accuracy: 0.7138
Epoch 7/50
300/300 - 2s - loss: 0.5109 - accuracy: 0.7354
Epoch 8/50
300/300 - 2s - loss: 0.4911 - accuracy: 0.7471
Epoch 9/50
300/300 - 2s - loss: 0.4609 - accuracy: 0.7738
Epoch 10/50
300/300 - 2s - loss: 0.4446 - accuracy: 0.7862
Epoch 11/50
300/300 - 2s - loss: 0.4229 - accuracy: 0.7896
Epoch 12/50
300/300 - 2s - loss: 0.4172 - accuracy: 0.8008
Epoch 13/50
300/300 - 2s - loss: 0.4000 - accuracy: 0.8150
Epoch 14/50
300/300 - 2s - loss: 0.3846 - accuracy: 0.8279
Epoch 15/50
300/300 - 2s - loss: 0.3685 - accuracy: 0.8283
Epoch 16/50
300/300 - 2s - loss: 0.3379 - accuracy: 0.8454
Epoch 17/50
300/300 - 2s - loss: 0.3258 - accuracy: 0.8454
Epoch 

In [93]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8837500015894572 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7996, std=0.003281 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8696, std=0.01022 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7321, std=0.01615 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7933, std=0.03993 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8838, std=0.004677 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8083, std=0.004602 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6733, std=0.02248 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8254, std=0.03821 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.87, std=0.0108 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8704, std=0.002125 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 

## Train with best parameters

In [94]:
# Best Accuracy 0.8050000071525574 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}

def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [95]:
seed = 7
np.random.seed(seed)

In [96]:
model = create_model()

In [97]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [98]:
import datetime, os

In [99]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [100]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-22 16:45:47.008211: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 16:45:47.008306: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 16:45:47.009538: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [101]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [102]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

2021-09-22 16:45:59.875324: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 39187200 exceeds 10% of free system memory.


Epoch 1/500
 21/300 [=>............................] - ETA: 2s - loss: 1.2078 - accuracy: 0.4821

2021-09-22 16:46:00.353326: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 16:46:00.353349: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 16:46:00.411269: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-22 16:46:00.417545: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-22 16:46:00.425145: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-164546/train/plugins/profile/2021_09_22_16_46_00
2021-09-22 16:46:00.425896: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210922-164546/train/plugins/profile/2021_09_22_16_46_00/helemanc-Latitude-5410.trace.json.gz
2021-09-22 16:46:00.432863: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-164546/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

E

In [103]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 5407), started 1 day, 4:26:56 ago. (Use '!kill 5407' to kill it.)

In [104]:
model.evaluate(X_test, y_test, batch_size=8)



[0.3349814713001251, 0.8500000238418579]

In [105]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.81      0.89      0.85        56
           1       0.90      0.81      0.85        64

    accuracy                           0.85       120
   macro avg       0.85      0.85      0.85       120
weighted avg       0.85      0.85      0.85       120



## Save best model 

In [106]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_2")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_2/assets


# Experiment 4.3: TESS

In [108]:
df_train = TESS_train
df_test = TESS_test

In [109]:
df_train.reset_index(drop = True, inplace = True) 
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [110]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train,  df_test, 26)

100%|███████████████████████████████████████| 1400/1400 [00:15<00:00, 88.08it/s]
100%|███████████████████████████████████████| 1400/1400 [00:19<00:00, 71.62it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 1698.28it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 1595.23it/s]
100%|███████████████████████████████████████| 1400/1400 [00:18<00:00, 77.39it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 1812.81it/s]


In [111]:
y_train, y_test = encode_labels_tess(y_train, y_test)

In [112]:
np.size(y_test)

1400

In [113]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train,  X_test)

In [114]:
X_train.shape

(1400, 157, 26)

## Shuffle training data

In [115]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [116]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_3.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [115]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [116]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-22 18:35:35.044016: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 18:35:35.044020: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 18:35:35.044016: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 18:35:35.044015: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-22 18:35:35.044016: W tensorflow/stream_executor/platform/default/dso_lo

Epoch 1/50
117/117 - 6s - loss: 0.9549 - accuracy: 0.6552
Epoch 1/50
117/117 - 6s - loss: 0.2084 - accuracy: 0.9260
Epoch 1/50
117/117 - 6s - loss: 0.9349 - accuracy: 0.6924
Epoch 1/50
117/117 - 6s - loss: 0.2428 - accuracy: 0.9165
Epoch 1/50
117/117 - 6s - loss: 2.2707 - accuracy: 0.6313
Epoch 1/50
117/117 - 6s - loss: 0.3064 - accuracy: 0.8992
Epoch 1/50
117/117 - 6s - loss: 2.6732 - accuracy: 0.6174
Epoch 1/50
117/117 - 6s - loss: 0.8358 - accuracy: 0.7031
Epoch 2/50
117/117 - 5s - loss: 0.0334 - accuracy: 0.9904
Epoch 2/50
117/117 - 5s - loss: 0.3406 - accuracy: 0.8597
Epoch 2/50
117/117 - 5s - loss: 0.0202 - accuracy: 0.9957
Epoch 2/50
117/117 - 5s - loss: 0.3052 - accuracy: 0.8810
Epoch 2/50
117/117 - 5s - loss: 0.2545 - accuracy: 0.8982
Epoch 2/50
117/117 - 5s - loss: 0.8249 - accuracy: 0.8114
Epoch 2/50
117/117 - 5s - loss: 0.0308 - accuracy: 0.9893
Epoch 2/50
117/117 - 5s - loss: 1.0163 - accuracy: 0.7974
Epoch 3/50
117/117 - 5s - loss: 0.1904 - accuracy: 0.9261
Epoch 3/50
117

In [117]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 1.0 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=1.0, std=0.0 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=1.0, std=0.0 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [118]:
#Best Accuracy 1.0 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
def create_model( init_mode='lecun_uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [119]:
seed = 7
np.random.seed(seed)

In [120]:
model = create_model()

In [121]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [122]:
import datetime, os

In [123]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [124]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-22 19:20:37.106276: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 19:20:37.106359: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 19:20:37.108901: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [125]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [126]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, 
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 21/175 [==>...........................] - ETA: 1s - loss: 1.6615 - accuracy: 0.4888

2021-09-22 19:20:38.368280: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-22 19:20:38.368305: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-22 19:20:38.431777: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-22 19:20:38.437836: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-22 19:20:38.443221: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-192036/train/plugins/profile/2021_09_22_19_20_38
2021-09-22 19:20:38.443961: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210922-192036/train/plugins/profile/2021_09_22_19_20_38/helemanc-Latitude-5410.trace.json.gz
2021-09-22 19:20:38.450351: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210922-192036/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 41/500
Epoch

In [127]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 5407), started 1 day, 7:02:10 ago. (Use '!kill 5407' to kill it.)

In [128]:
model.evaluate(X_test, y_test, batch_size=8)



[1.1723048686981201, 0.6549999713897705]

In [129]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.65      0.41      0.51       600
           1       0.66      0.84      0.73       800

    accuracy                           0.66      1400
   macro avg       0.65      0.62      0.62      1400
weighted avg       0.65      0.66      0.64      1400



## Save best model 

In [130]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_3")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_3/assets


# Experiment 4.4: TESS noise

## Read dataframes

In [117]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [118]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train, df_test, 26) # 13

100%|██████████████████████████████████████| 2800/2800 [00:27<00:00, 101.16it/s]
100%|███████████████████████████████████████| 2800/2800 [00:34<00:00, 80.26it/s]
100%|█████████████████████████████████████| 2800/2800 [00:01<00:00, 1759.07it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 3227.83it/s]
100%|███████████████████████████████████████| 1400/1400 [00:18<00:00, 73.94it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 1796.42it/s]


In [119]:
y_train,  y_test = encode_labels_tess(y_train,  y_test)

In [120]:
np.size(y_train)

2800

In [121]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train,  X_test)

In [122]:
X_train.shape

(2800, 157, 26)

## Save Scaler

In [123]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_4.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Shuffle training data

In [137]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Hypeparameter optimization

In [138]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [139]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 10:43:11.174940: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 10:43:11.174936: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 10:43:11.174942: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 10:43:11.174936: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 10:43:11.174940: W tensorflow/stream_executor/platform/default/dso_lo

Epoch 1/50
234/234 - 11s - loss: 0.2486 - accuracy: 0.9041
Epoch 1/50
234/234 - 11s - loss: 0.7615 - accuracy: 0.7183
Epoch 1/50
234/234 - 11s - loss: 2.0971 - accuracy: 0.6379
Epoch 1/50
234/234 - 11s - loss: 0.6911 - accuracy: 0.7133
Epoch 1/50
234/234 - 12s - loss: 0.2902 - accuracy: 0.8998
Epoch 1/50
234/234 - 12s - loss: 0.2726 - accuracy: 0.9063
Epoch 1/50
234/234 - 13s - loss: 0.7616 - accuracy: 0.7199
Epoch 1/50
234/234 - 14s - loss: 1.9355 - accuracy: 0.6699
Epoch 2/50
234/234 - 10s - loss: 0.0869 - accuracy: 0.9689
Epoch 2/50
234/234 - 10s - loss: 0.2810 - accuracy: 0.8795
Epoch 2/50
234/234 - 10s - loss: 0.7475 - accuracy: 0.8082
Epoch 2/50
234/234 - 10s - loss: 0.0636 - accuracy: 0.9775
Epoch 2/50
234/234 - 10s - loss: 0.2369 - accuracy: 0.8976
Epoch 2/50
234/234 - 11s - loss: 0.1003 - accuracy: 0.9700
Epoch 2/50
234/234 - 10s - loss: 0.2457 - accuracy: 0.8956
Epoch 2/50
234/234 - 10s - loss: 0.7045 - accuracy: 0.8237
Epoch 3/50
234/234 - 10s - loss: 0.1698 - accuracy: 0.92

2021-09-23 10:51:28.073555: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


Epoch 50/50
234/234 - 9s - loss: 0.0340 - accuracy: 0.9952
Epoch 50/50
234/234 - 8s - loss: 0.0085 - accuracy: 0.9963
Epoch 50/50
234/234 - 8s - loss: 0.0079 - accuracy: 0.9979


2021-09-23 10:51:28.814592: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30468048 exceeds 10% of free system memory.


117/117 - 2s - loss: 0.0085 - accuracy: 0.9968
117/117 - 2s - loss: 0.0042 - accuracy: 0.9979
117/117 - 2s - loss: 0.0049 - accuracy: 0.9968
117/117 - 2s - loss: 0.0049 - accuracy: 0.9968
117/117 - 2s - loss: 0.0038 - accuracy: 0.9979


2021-09-23 10:51:30.014762: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


117/117 - 2s - loss: 7.0226e-04 - accuracy: 1.0000


2021-09-23 10:51:30.244484: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.
2021-09-23 10:51:30.393656: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.
2021-09-23 10:51:30.561451: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


Epoch 1/50
234/234 - 10s - loss: 2.1106 - accuracy: 0.6626
Epoch 1/50
467/467 - 11s - loss: 1.0190 - accuracy: 0.8708
Epoch 1/50
234/234 - 10s - loss: 0.2062 - accuracy: 0.9218
Epoch 1/50
467/467 - 12s - loss: 1.1339 - accuracy: 0.8554
Epoch 1/50
234/234 - 12s - loss: 0.1960 - accuracy: 0.9191
Epoch 1/50
234/234 - 13s - loss: 0.2086 - accuracy: 0.9143
Epoch 1/50
234/234 - 13s - loss: 0.7935 - accuracy: 0.6479
Epoch 1/50
467/467 - 14s - loss: 1.0313 - accuracy: 0.8602
Epoch 2/50
234/234 - 10s - loss: 0.9220 - accuracy: 0.7954
Epoch 2/50
234/234 - 10s - loss: 0.0721 - accuracy: 0.9753
Epoch 2/50
467/467 - 12s - loss: 0.2393 - accuracy: 0.9528
Epoch 2/50
234/234 - 10s - loss: 0.0664 - accuracy: 0.9780
Epoch 2/50
467/467 - 11s - loss: 0.1597 - accuracy: 0.9630
Epoch 2/50
234/234 - 11s - loss: 0.0543 - accuracy: 0.9866
Epoch 2/50
234/234 - 11s - loss: 0.3841 - accuracy: 0.8130
Epoch 2/50
467/467 - 11s - loss: 0.1978 - accuracy: 0.9636
Epoch 3/50
234/234 - 10s - loss: 0.4258 - accuracy: 0.86

2021-09-23 11:00:24.257538: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.
2021-09-23 11:00:24.453700: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


Epoch 45/50
467/467 - 15s - loss: 0.0335 - accuracy: 0.9968


2021-09-23 11:00:24.505636: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30468048 exceeds 10% of free system memory.


Epoch 46/50
467/467 - 7s - loss: 1.5455e-05 - accuracy: 1.0000
Epoch 46/50
467/467 - 8s - loss: 0.0317 - accuracy: 0.9973
Epoch 46/50
467/467 - 9s - loss: 3.7094e-04 - accuracy: 0.9995
Epoch 1/50
234/234 - 12s - loss: 2.4749 - accuracy: 0.5833
Epoch 1/50
234/234 - 13s - loss: 0.7924 - accuracy: 0.6272
Epoch 1/50
234/234 - 13s - loss: 0.7971 - accuracy: 0.6261
Epoch 1/50
234/234 - 14s - loss: 2.6961 - accuracy: 0.5831
Epoch 1/50
234/234 - 14s - loss: 2.1142 - accuracy: 0.6069
Epoch 47/50
467/467 - 12s - loss: 0.0672 - accuracy: 0.9968
Epoch 47/50
467/467 - 12s - loss: 0.0157 - accuracy: 0.9973
Epoch 47/50
467/467 - 12s - loss: 0.0014 - accuracy: 0.9995
Epoch 2/50
234/234 - 10s - loss: 1.2507 - accuracy: 0.7268
Epoch 2/50
234/234 - 10s - loss: 0.3875 - accuracy: 0.8200
Epoch 2/50
234/234 - 10s - loss: 0.3646 - accuracy: 0.8238
Epoch 2/50
234/234 - 9s - loss: 1.3277 - accuracy: 0.7229
Epoch 2/50
234/234 - 11s - loss: 1.1383 - accuracy: 0.7349
Epoch 48/50
467/467 - 11s - loss: 0.0602 - acc

2021-09-23 11:01:45.069000: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30468048 exceeds 10% of free system memory.
2021-09-23 11:01:53.472960: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


Epoch 7/50
234/234 - 186s - loss: 0.2491 - accuracy: 0.9106
234/234 - 194s - loss: 0.0193 - accuracy: 0.9968
Epoch 8/50
234/234 - 186s - loss: 0.2500 - accuracy: 0.9116
Epoch 8/50
234/234 - 186s - loss: 0.0780 - accuracy: 0.9711
Epoch 8/50
234/234 - 186s - loss: 0.0640 - accuracy: 0.9743
Epoch 8/50
234/234 - 187s - loss: 0.2521 - accuracy: 0.9185
Epoch 8/50
234/234 - 10s - loss: 0.2397 - accuracy: 0.9197
Epoch 9/50
234/234 - 11s - loss: 0.2164 - accuracy: 0.9250
Epoch 1/50
234/234 - 17s - loss: 1.0171 - accuracy: 0.8281
Epoch 1/50
234/234 - 17s - loss: 1.0411 - accuracy: 0.8564
Epoch 9/50
234/234 - 12s - loss: 0.0615 - accuracy: 0.9786
Epoch 9/50
234/234 - 11s - loss: 0.2234 - accuracy: 0.9239
Epoch 9/50
234/234 - 12s - loss: 0.0658 - accuracy: 0.9738
Epoch 9/50
234/234 - 12s - loss: 0.1935 - accuracy: 0.9266
Epoch 1/50
234/234 - 18s - loss: 0.9559 - accuracy: 0.8607
Epoch 2/50
234/234 - 11s - loss: 0.1630 - accuracy: 0.9464
Epoch 2/50
234/234 - 12s - loss: 0.2034 - accuracy: 0.9496
Ep

2021-09-23 11:13:34.478262: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30468048 exceeds 10% of free system memory.
2021-09-23 11:13:35.128645: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.
2021-09-23 11:13:35.337560: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30468048 exceeds 10% of free system memory.


117/117 - 2s - loss: 0.0095 - accuracy: 0.9968


2021-09-23 11:13:36.746639: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


Epoch 43/50
234/234 - 7s - loss: 0.0048 - accuracy: 0.9984
Epoch 44/50
234/234 - 7s - loss: 0.0259 - accuracy: 0.9963
Epoch 44/50
234/234 - 8s - loss: 0.0048 - accuracy: 0.9984
Epoch 44/50
234/234 - 11s - loss: 0.0063 - accuracy: 0.9989
Epoch 1/50
467/467 - 13s - loss: 0.3865 - accuracy: 0.8189
Epoch 1/50
467/467 - 15s - loss: 0.5255 - accuracy: 0.7649
Epoch 1/50
467/467 - 16s - loss: 0.4773 - accuracy: 0.7792
Epoch 45/50
234/234 - 11s - loss: 0.0094 - accuracy: 0.9989
Epoch 1/50
467/467 - 17s - loss: 0.5426 - accuracy: 0.7627
Epoch 45/50
234/234 - 11s - loss: 2.5638e-04 - accuracy: 1.0000
Epoch 1/50
467/467 - 17s - loss: 0.4283 - accuracy: 0.7874
Epoch 45/50
234/234 - 11s - loss: 0.0069 - accuracy: 0.9979
Epoch 2/50
467/467 - 13s - loss: 0.1088 - accuracy: 0.9620
Epoch 46/50
234/234 - 11s - loss: 2.2309e-04 - accuracy: 1.0000
Epoch 2/50
467/467 - 13s - loss: 0.1622 - accuracy: 0.9341
Epoch 2/50
467/467 - 13s - loss: 0.1538 - accuracy: 0.9330
Epoch 46/50
234/234 - 11s - loss: 1.3575e-0

2021-09-23 11:14:53.116709: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30484376 exceeds 10% of free system memory.


117/117 - 3s - loss: 0.0013 - accuracy: 1.0000
Epoch 6/50
467/467 - 13s - loss: 0.0162 - accuracy: 0.9957
Epoch 6/50
467/467 - 12s - loss: 0.0339 - accuracy: 0.9866
Epoch 6/50
467/467 - 12s - loss: 0.0345 - accuracy: 0.9855
117/117 - 2s - loss: 0.0127 - accuracy: 0.9968
Epoch 6/50
467/467 - 12s - loss: 0.0368 - accuracy: 0.9866
Epoch 6/50
467/467 - 11s - loss: 0.0225 - accuracy: 0.9936
Epoch 7/50
467/467 - 10s - loss: 0.0187 - accuracy: 0.9925
Epoch 7/50
467/467 - 10s - loss: 0.0342 - accuracy: 0.9861
Epoch 7/50
467/467 - 10s - loss: 0.0310 - accuracy: 0.9882
Epoch 7/50
467/467 - 10s - loss: 0.0316 - accuracy: 0.9877
Epoch 7/50
467/467 - 10s - loss: 0.0215 - accuracy: 0.9930
Epoch 1/50
467/467 - 14s - loss: 0.3857 - accuracy: 0.8131
Epoch 8/50
467/467 - 10s - loss: 0.0148 - accuracy: 0.9952
Epoch 8/50
467/467 - 11s - loss: 0.0225 - accuracy: 0.9925
Epoch 8/50
467/467 - 10s - loss: 0.0172 - accuracy: 0.9941
Epoch 8/50
467/467 - 10s - loss: 0.0176 - accuracy: 0.9946
Epoch 8/50
467/467 - 

2021-09-23 11:22:31.569280: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 45718400 exceeds 10% of free system memory.


Epoch 1/50
700/700 - 3s - loss: 0.3182 - accuracy: 0.8500
Epoch 2/50
700/700 - 3s - loss: 0.0660 - accuracy: 0.9793
Epoch 3/50
700/700 - 3s - loss: 0.0393 - accuracy: 0.9864
Epoch 4/50
700/700 - 3s - loss: 0.0303 - accuracy: 0.9871
Epoch 5/50
700/700 - 3s - loss: 0.0228 - accuracy: 0.9914
Epoch 6/50
700/700 - 3s - loss: 0.0121 - accuracy: 0.9964
Epoch 7/50
700/700 - 3s - loss: 0.0127 - accuracy: 0.9957
Epoch 8/50
700/700 - 3s - loss: 0.0094 - accuracy: 0.9957
Epoch 9/50
700/700 - 3s - loss: 0.0062 - accuracy: 0.9971
Epoch 10/50
700/700 - 3s - loss: 0.0045 - accuracy: 0.9986
Epoch 11/50
700/700 - 3s - loss: 0.0036 - accuracy: 0.9989
Epoch 12/50
700/700 - 3s - loss: 0.0090 - accuracy: 0.9971
Epoch 13/50
700/700 - 3s - loss: 0.0030 - accuracy: 0.9989
Epoch 14/50
700/700 - 3s - loss: 0.0049 - accuracy: 0.9986
Epoch 15/50
700/700 - 3s - loss: 0.0041 - accuracy: 0.9979
Epoch 16/50
700/700 - 3s - loss: 0.0047 - accuracy: 0.9979
Epoch 17/50
700/700 - 3s - loss: 9.5326e-04 - accuracy: 1.0000
Ep

In [140]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9996427297592163 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
 mean=0.9982, std=0.001337 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9982, std=0.0005058 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.9971, std=0.0005044 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.9971, std=0.001337 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9982, std=0.002526 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.9993, std=0.001011 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.9964, std=0.001334 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9989, std=0.001516 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9982, std=0.001822 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9996, std=0.0005053 using {'lr': 0.0001, 'init_mode': 'uniform', '

## Train with best parameters

In [141]:
#Best Accuracy 0.9996427297592163 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [142]:
seed = 7
np.random.seed(seed)

In [143]:
model = create_model()

In [144]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [145]:
import datetime, os

In [146]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [147]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 12:09:08.901446: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 12:09:08.901529: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 12:09:08.902717: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [148]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [150]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500,
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

2021-09-23 12:09:19.922174: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 45718400 exceeds 10% of free system memory.


Epoch 1/500
 40/700 [>.............................] - ETA: 3s - loss: 0.0291 - accuracy: 0.9937

2021-09-23 12:09:20.197974: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 12:09:20.197997: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 12:09:20.219244: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 12:09:20.220007: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 12:09:20.221115: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-120908/train/plugins/profile/2021_09_23_12_09_20
2021-09-23 12:09:20.221834: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-120908/train/plugins/profile/2021_09_23_12_09_20/helemanc-Latitude-5410.trace.json.gz
2021-09-23 12:09:20.222828: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-120908/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

Epoc

In [151]:
%tensorboard --logdir logs

In [152]:
model.evaluate(X_test, y_test, batch_size=8)



[4.756372928619385, 0.6592857241630554]

In [153]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.70      0.36      0.48       600
           1       0.65      0.88      0.75       800

    accuracy                           0.66      1400
   macro avg       0.67      0.62      0.61      1400
weighted avg       0.67      0.66      0.63      1400



## Save best model 

In [154]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_4")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_4/assets


# Experiment 4.5: SAVEE

In [124]:
df_train = SAVEE_train
df_val = SAVEE_val
df_test = SAVEE_test

In [125]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [126]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|█████████████████████████████████████████| 240/240 [00:08<00:00, 28.96it/s]
100%|█████████████████████████████████████████| 240/240 [00:02<00:00, 85.93it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 1052.83it/s]
100%|█████████████████████████████████████████| 120/120 [00:04<00:00, 28.48it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 77.10it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1023.26it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 36.84it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 95.12it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1091.05it/s]


In [127]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [128]:
np.size(y_val)

120

In [129]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [130]:
X_train.shape

(240, 157, 26)

## Shuffle training data

In [131]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [132]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_5.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [163]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [164]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 12:32:17.046591: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 12:32:17.046829: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 12:32:17.060640: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 12:32:17.060779: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 12:32:17.067041: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
20/20 - 1s - loss: 1.4646 - accuracy: 0.5188
Epoch 1/50
20/20 - 2s - loss: 1.3770 - accuracy: 0.5250
Epoch 1/50
20/20 - 2s - loss: 1.5135 - accuracy: 0.5437
Epoch 1/50
20/20 - 2s - loss: 1.7425 - accuracy: 0.5125
Epoch 1/50
20/20 - 2s - loss: 1.5822 - accuracy: 0.5312
Epoch 1/50
20/20 - 2s - loss: 3.5642 - accuracy: 0.5688
Epoch 1/50
20/20 - 2s - loss: 1.4275 - accuracy: 0.4500
Epoch 1/50
20/20 - 2s - loss: 4.3986 - accuracy: 0.4688
Epoch 2/50
20/20 - 1s - loss: 1.3058 - accuracy: 0.5562
Epoch 2/50
20/20 - 1s - loss: 0.8947 - accuracy: 0.6313
Epoch 2/50
20/20 - 1s - loss: 1.3653 - accuracy: 0.5000
Epoch 2/50
20/20 - 1s - loss: 1.4838 - accuracy: 0.4313
Epoch 2/50
20/20 - 1s - loss: 1.3036 - accuracy: 0.4875
Epoch 2/50
20/20 - 1s - loss: 1.1355 - accuracy: 0.5375
Epoch 2/50
20/20 - 1s - loss: 3.5792 - accuracy: 0.5188
Epoch 2/50
20/20 - 1s - loss: 3.6059 - accuracy: 0.4875
Epoch 3/50
20/20 - 1s - loss: 1.2903 - accuracy: 0.5312
Epoch 3/50
20/20 - 1s - loss: 0.8158 - accuracy:

In [165]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8166666626930237 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7, std=0.01021 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.7833, std=0.04602 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6667, std=0.04823 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8167, std=0.02125 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7625, std=0.027 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6875, std=0.02041 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6875, std=0.01021 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7833, std=0.02569 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7792, std=0.03584 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.7458, std=0.02569 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [166]:
#Best Accuracy 0.8166666626930237 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
def create_model( init_mode='he_uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [167]:
seed = 7
np.random.seed(seed)

In [168]:
model = create_model()

In [169]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [170]:
import datetime, os

In [171]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [172]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 12:46:06.910840: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 12:46:06.910868: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 12:46:06.910904: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [173]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [174]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-23 12:46:13.618792: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 12:46:13.618814: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 12:46:13.675501: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 12:46:13.676271: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 12:46:13.677498: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-124606/train/plugins/profile/2021_09_23_12_46_13
2021-09-23 12:46:13.678215: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-124606/train/plugins/profile/2021_09_23_12_46_13/helemanc-Latitude-5410.trace.json.gz
2021-09-23 12:46:13.679319: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-124606/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034

In [175]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 144355), started 0:32:37 ago. (Use '!kill 144355' to kill it.)

In [176]:
model.evaluate(X_test, y_test, batch_size=8)



[0.6844130754470825, 0.5833333134651184]

In [177]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.56      0.77      0.65        60
           1       0.63      0.40      0.49        60

    accuracy                           0.58       120
   macro avg       0.60      0.58      0.57       120
weighted avg       0.60      0.58      0.57       120



## Save best model 

In [178]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_5")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_5/assets


# Experiment 4.6: SAVEE noise

## Read dataframes

In [133]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [134]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|█████████████████████████████████████████| 480/480 [00:16<00:00, 29.99it/s]
100%|█████████████████████████████████████████| 480/480 [00:05<00:00, 86.49it/s]
100%|███████████████████████████████████████| 480/480 [00:00<00:00, 1493.61it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 33.05it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 90.06it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1031.98it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 38.79it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 98.27it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1109.18it/s]


In [135]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [136]:
np.size(y_val)

120

In [137]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [138]:
X_train.shape

(480, 157, 26)

## Shuffle training data

In [139]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [140]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_6.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [186]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [187]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 12:57:18.306850: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 12:57:18.307061: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 12:57:18.366581: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 12:57:18.366581: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 12:57:18.366606: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your mac

Epoch 1/50
40/40 - 1s - loss: 1.1568 - accuracy: 0.5281
Epoch 2/50
40/40 - 1s - loss: 1.1992 - accuracy: 0.5125
Epoch 1/50
40/40 - 2s - loss: 4.1681 - accuracy: 0.4875
Epoch 1/50
40/40 - 3s - loss: 1.4347 - accuracy: 0.5281
Epoch 1/50
40/40 - 3s - loss: 3.7406 - accuracy: 0.4875
Epoch 3/50
40/40 - 2s - loss: 1.2169 - accuracy: 0.5063
Epoch 1/50
40/40 - 3s - loss: 1.6884 - accuracy: 0.4875
Epoch 1/50
40/40 - 3s - loss: 1.7564 - accuracy: 0.4781
Epoch 1/50
40/40 - 3s - loss: 1.4058 - accuracy: 0.5063
Epoch 1/50
40/40 - 4s - loss: 1.6122 - accuracy: 0.5219
Epoch 2/50
40/40 - 2s - loss: 3.5437 - accuracy: 0.5031
Epoch 2/50
40/40 - 2s - loss: 3.3475 - accuracy: 0.5344
Epoch 2/50
40/40 - 2s - loss: 0.7994 - accuracy: 0.5781
Epoch 2/50
40/40 - 2s - loss: 0.6990 - accuracy: 0.5969
Epoch 4/50
40/40 - 2s - loss: 0.9110 - accuracy: 0.5406
Epoch 2/50
40/40 - 2s - loss: 1.4711 - accuracy: 0.5094
Epoch 2/50
40/40 - 2s - loss: 1.0908 - accuracy: 0.5562
Epoch 2/50
40/40 - 2s - loss: 1.2879 - accuracy:

In [188]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8208333253860474 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7625, std=0.05328 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8208, std=0.02619 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6604, std=0.07259 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.775, std=0.03104 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8125, std=0.027 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7521, std=0.01559 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6396, std=0.02412 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7792, std=0.03398 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7938, std=0.02841 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8042, std=0.02902 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size'

## Train with best parameters

In [191]:
#Best Accuracy 0.8208333253860474 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [192]:
seed = 7
np.random.seed(seed)

In [193]:
model = create_model()

In [194]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [195]:
import datetime, os

In [196]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [197]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 13:30:25.687160: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 13:30:25.687250: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 13:30:25.687421: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [198]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [199]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-23 13:30:30.712721: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 13:30:30.712747: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 13:30:30.779557: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 13:30:30.780339: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 13:30:30.781606: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-133025/train/plugins/profile/2021_09_23_13_30_30
2021-09-23 13:30:30.782333: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-133025/train/plugins/profile/2021_09_23_13_30_30/helemanc-Latitude-5410.trace.json.gz
2021-09-23 13:30:30.783707: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-133025/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 34/500
Epoch 35/500

In [200]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 144355), started 1:16:59 ago. (Use '!kill 144355' to kill it.)

In [201]:
model.evaluate(X_test, y_test, batch_size=4)



[0.7091980576515198, 0.5249999761581421]

In [202]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       1.00      0.05      0.10        60
           1       0.51      1.00      0.68        60

    accuracy                           0.53       120
   macro avg       0.76      0.53      0.39       120
weighted avg       0.76      0.53      0.39       120



## Save best model 

In [203]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_6")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_6/assets


# Experiment 4.7: CREMA 

In [141]:
df_train = CREMA_train
df_val = CREMA_val
df_test = CREMA_test

In [142]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [143]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|████████████████████████████████████████| 320/320 [00:00<00:00, 974.65it/s]
100%|█████████████████████████████████████████| 320/320 [00:04<00:00, 76.74it/s]
100%|███████████████████████████████████████| 320/320 [00:00<00:00, 1386.56it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 800.20it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 80.34it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 968.27it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 904.12it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 75.79it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 972.08it/s]


In [144]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [145]:
np.size(y_val)

60

In [146]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [147]:
X_train.shape

(320, 157, 26)

## Shuffle training data

In [148]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [149]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_7.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [212]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [213]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 13:35:24.740142: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 13:35:24.740409: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 13:35:24.773966: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 13:35:24.774268: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 13:35:24.786303: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
27/27 - 2s - loss: 1.4294 - accuracy: 0.5493
Epoch 1/50
27/27 - 2s - loss: 1.9114 - accuracy: 0.4860
Epoch 1/50
27/27 - 2s - loss: 1.5480 - accuracy: 0.6150
Epoch 1/50
27/27 - 2s - loss: 1.4985 - accuracy: 0.5962
Epoch 2/50
27/27 - 1s - loss: 1.2520 - accuracy: 0.5258
Epoch 2/50
27/27 - 1s - loss: 1.5635 - accuracy: 0.4507
Epoch 2/50
27/27 - 1s - loss: 1.2674 - accuracy: 0.5607
Epoch 1/50
27/27 - 2s - loss: 1.3650 - accuracy: 0.5540
Epoch 2/50
27/27 - 1s - loss: 1.0634 - accuracy: 0.5728
Epoch 3/50
27/27 - 1s - loss: 1.3551 - accuracy: 0.5587
Epoch 3/50
27/27 - 1s - loss: 1.1936 - accuracy: 0.5446
Epoch 2/50
27/27 - 1s - loss: 1.3746 - accuracy: 0.6526
Epoch 3/50
27/27 - 1s - loss: 1.1899 - accuracy: 0.6075
Epoch 1/50
27/27 - 3s - loss: 2.1098 - accuracy: 0.5981
Epoch 1/50
27/27 - 3s - loss: 4.7805 - accuracy: 0.5540
Epoch 1/50
27/27 - 4s - loss: 4.5852 - accuracy: 0.4836
Epoch 3/50
27/27 - 1s - loss: 0.9786 - accuracy: 0.6338
Epoch 4/50
27/27 - 1s - loss: 1.3831 - accuracy:

In [214]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8032093048095703 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7249, std=0.02043 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8032, std=0.03282 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7313, std=0.02824 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7563, std=0.01987 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7875, std=0.02491 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7157, std=0.07179 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6156, std=0.01683 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7688, std=0.01837 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7656, std=0.02094 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8, std=0.009302 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size

## Train with best parameters

In [215]:
#Best Accuracy 0.8032093048095703 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [216]:
seed = 7
np.random.seed(seed)

In [217]:
model = create_model()

In [218]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [219]:
import datetime, os

In [220]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [221]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 13:40:53.333346: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 13:40:53.333426: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 13:40:53.333569: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [222]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [223]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-23 13:40:56.608005: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 13:40:56.608035: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 13:40:56.677400: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 13:40:56.678224: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 13:40:56.679520: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-134053/train/plugins/profile/2021_09_23_13_40_56
2021-09-23 13:40:56.680276: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-134053/train/plugins/profile/2021_09_23_13_40_56/helemanc-Latitude-5410.trace.json.gz
2021-09-23 13:40:56.681376: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-134053/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnP

In [224]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 144355), started 1:27:24 ago. (Use '!kill 144355' to kill it.)

In [225]:
model.evaluate(X_test, y_test, batch_size=8)



[0.40318310260772705, 0.8999999761581421]

In [226]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.80      0.67      0.73        12
           1       0.92      0.96      0.94        48

    accuracy                           0.90        60
   macro avg       0.86      0.81      0.83        60
weighted avg       0.90      0.90      0.90        60



## Save best model 

In [227]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_7")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_7/assets


# Experiment 4.8: CREMA - noise

In [150]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [151]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|███████████████████████████████████████| 640/640 [00:00<00:00, 1870.73it/s]
100%|█████████████████████████████████████████| 640/640 [00:08<00:00, 72.73it/s]
100%|███████████████████████████████████████| 640/640 [00:00<00:00, 1540.59it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 3074.44it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 73.21it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 824.58it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 1687.29it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 69.47it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 847.86it/s]


In [152]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [153]:
np.size(y_val)

60

In [154]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [155]:
X_train.shape

(640, 157, 26)

## Shuffle training data

In [156]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [157]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_8.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [235]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [236]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 13:52:44.486860: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 13:52:44.487949: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 13:52:44.615021: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 13:52:44.615187: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 13:52:44.759918: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
54/54 - 2s - loss: 1.4508 - accuracy: 0.5681
Epoch 1/50
54/54 - 3s - loss: 1.3127 - accuracy: 0.5691
Epoch 1/50
54/54 - 3s - loss: 1.3952 - accuracy: 0.5714
Epoch 1/50
54/54 - 4s - loss: 1.3267 - accuracy: 0.6033
Epoch 1/50
54/54 - 3s - loss: 1.7564 - accuracy: 0.4941
Epoch 1/50
54/54 - 4s - loss: 1.2455 - accuracy: 0.6019
Epoch 2/50
54/54 - 2s - loss: 1.4761 - accuracy: 0.5446
Epoch 2/50
54/54 - 2s - loss: 1.0986 - accuracy: 0.5808
Epoch 1/50
54/54 - 5s - loss: 3.8148 - accuracy: 0.5761
Epoch 2/50
54/54 - 2s - loss: 0.7604 - accuracy: 0.6417
Epoch 2/50
54/54 - 2s - loss: 0.7362 - accuracy: 0.6362
Epoch 2/50
54/54 - 3s - loss: 1.0861 - accuracy: 0.5574
Epoch 1/50
54/54 - 6s - loss: 3.9844 - accuracy: 0.4718
Epoch 3/50
54/54 - 2s - loss: 1.0341 - accuracy: 0.5691
Epoch 2/50
54/54 - 2s - loss: 0.8794 - accuracy: 0.6136
Epoch 3/50
54/54 - 2s - loss: 1.1555 - accuracy: 0.5376
Epoch 2/50
54/54 - 2s - loss: 3.4970 - accuracy: 0.5176
Epoch 3/50
54/54 - 2s - loss: 0.6083 - accuracy:

In [237]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8608953754107157 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.778, std=0.04796 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8609, std=0.03137 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7155, std=0.05592 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8437, std=0.02139 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8546, std=0.03156 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.725, std=0.03842 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7031, std=0.01016 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8422, std=0.006169 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8249, std=0.04468 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8453, std=0.017 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size'

## Train with best parameters

In [238]:
#Best Accuracy 0.8608953754107157 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [239]:
seed = 7
np.random.seed(seed)

In [240]:
model = create_model()

In [241]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [242]:
import datetime, os

In [243]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [244]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 14:02:48.581723: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 14:02:48.581751: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 14:02:48.581797: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [245]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [246]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-23 14:02:52.549866: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 14:02:52.549894: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 14:02:52.608574: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 14:02:52.609342: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 14:02:52.610571: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-140247/train/plugins/profile/2021_09_23_14_02_52
2021-09-23 14:02:52.611361: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-140247/train/plugins/profile/2021_09_23_14_02_52/helemanc-Latitude-5410.trace.json.gz
2021-09-23 14:02:52.612486: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-140247/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500

Epoch 00040: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 41/500
Ep

In [247]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 144355), started 1:49:31 ago. (Use '!kill 144355' to kill it.)

In [248]:
model.evaluate(X_test, y_test, batch_size=8)



[0.4743385314941406, 0.8166666626930237]

In [249]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       1.00      0.08      0.15        12
           1       0.81      1.00      0.90        48

    accuracy                           0.82        60
   macro avg       0.91      0.54      0.53        60
weighted avg       0.85      0.82      0.75        60



## Save best model 

In [250]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_8")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_8/assets


# Experiment 4.9: RAVDESS - TESS - SAVEE

In [158]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train])
df_val = pd.concat([RAV_val, SAVEE_val])
#df_test = pd.concat([RAV_test, SAVEE_test, TESS_test])
df_test = pd.concat([RAV_train, SAVEE_test])

In [159]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [160]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|██████████████████████████████████████| 2840/2840 [00:21<00:00, 130.54it/s]
100%|███████████████████████████████████████| 2840/2840 [00:33<00:00, 84.66it/s]
100%|█████████████████████████████████████| 2840/2840 [00:01<00:00, 1899.10it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 63.41it/s]
100%|█████████████████████████████████████████| 240/240 [00:02<00:00, 82.69it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 1261.64it/s]
100%|██████████████████████████████████████| 1320/1320 [00:03<00:00, 365.29it/s]
100%|███████████████████████████████████████| 1320/1320 [00:15<00:00, 83.82it/s]
100%|█████████████████████████████████████| 1320/1320 [00:00<00:00, 1769.41it/s]


In [161]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [162]:
np.size(y_test)

1320

In [163]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [164]:
X_train.shape

(2840, 157, 26)

## Shuffle training data

In [165]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [166]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_9.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [259]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [260]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-23 14:07:45.798758: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 14:07:45.798968: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 14:07:45.833965: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-23 14:07:45.834115: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-23 14:07:45.860594: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
237/237 - 11s - loss: 0.7088 - accuracy: 0.7021
Epoch 1/50
237/237 - 11s - loss: 1.0660 - accuracy: 0.5935
Epoch 1/50
237/237 - 11s - loss: 2.4586 - accuracy: 0.6144
Epoch 1/50
237/237 - 12s - loss: 0.9826 - accuracy: 0.6202
Epoch 1/50
237/237 - 12s - loss: 1.1379 - accuracy: 0.5948
Epoch 1/50
237/237 - 12s - loss: 0.7033 - accuracy: 0.6895
Epoch 1/50
237/237 - 12s - loss: 0.6553 - accuracy: 0.6999
Epoch 1/50
237/237 - 12s - loss: 2.5589 - accuracy: 0.5800
Epoch 2/50
237/237 - 10s - loss: 0.4342 - accuracy: 0.7676
Epoch 2/50
237/237 - 10s - loss: 1.5483 - accuracy: 0.6593
Epoch 2/50
237/237 - 10s - loss: 0.4482 - accuracy: 0.7614
Epoch 2/50
237/237 - 10s - loss: 0.6528 - accuracy: 0.7010
Epoch 2/50
237/237 - 10s - loss: 0.4401 - accuracy: 0.7670
Epoch 2/50
237/237 - 10s - loss: 0.6737 - accuracy: 0.7036
Epoch 2/50
237/237 - 11s - loss: 0.6818 - accuracy: 0.6859
Epoch 2/50
237/237 - 10s - loss: 1.5058 - accuracy: 0.6476
Epoch 3/50
237/237 - 10s - loss: 0.4015 - accuracy: 0.78

2021-09-23 14:16:15.877344: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30925232 exceeds 10% of free system memory.


119/119 - 2s - loss: 0.2054 - accuracy: 0.9186
119/119 - 2s - loss: 0.3073 - accuracy: 0.8490


2021-09-23 14:16:16.848416: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.
2021-09-23 14:16:16.888807: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30925232 exceeds 10% of free system memory.


119/119 - 2s - loss: 0.2961 - accuracy: 0.8522
Epoch 50/50
237/237 - 8s - loss: 0.1814 - accuracy: 0.9165
119/119 - 2s - loss: 0.2198 - accuracy: 0.9133
119/119 - 1s - loss: 0.2324 - accuracy: 0.9039


2021-09-23 14:16:17.396489: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.


119/119 - 1s - loss: 0.2345 - accuracy: 0.9029
Epoch 1/50
237/237 - 10s - loss: 2.5616 - accuracy: 0.5903
Epoch 1/50
474/474 - 12s - loss: 1.7284 - accuracy: 0.6762
Epoch 1/50
474/474 - 11s - loss: 1.5110 - accuracy: 0.6572
Epoch 1/50
237/237 - 12s - loss: 0.5823 - accuracy: 0.7084
Epoch 1/50
474/474 - 13s - loss: 1.7945 - accuracy: 0.6499
Epoch 1/50
237/237 - 12s - loss: 0.6203 - accuracy: 0.7116
Epoch 1/50
237/237 - 13s - loss: 0.5919 - accuracy: 0.7064
Epoch 1/50
237/237 - 16s - loss: 0.9735 - accuracy: 0.5747
Epoch 2/50
237/237 - 11s - loss: 1.3679 - accuracy: 0.6753
Epoch 2/50
237/237 - 10s - loss: 0.4228 - accuracy: 0.7670
Epoch 2/50
237/237 - 11s - loss: 0.4178 - accuracy: 0.7744
Epoch 2/50
474/474 - 12s - loss: 0.4858 - accuracy: 0.7485
Epoch 2/50
474/474 - 12s - loss: 0.5414 - accuracy: 0.7242
Epoch 2/50
237/237 - 11s - loss: 0.4149 - accuracy: 0.7761
Epoch 2/50
474/474 - 12s - loss: 0.5223 - accuracy: 0.7249
Epoch 2/50
237/237 - 10s - loss: 0.7506 - accuracy: 0.6461
Epoch 3/5

2021-09-23 14:25:16.224249: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.


119/119 - 2s - loss: 0.2744 - accuracy: 0.8691
Epoch 45/50
474/474 - 9s - loss: 0.2191 - accuracy: 0.8859
Epoch 45/50
474/474 - 8s - loss: 0.1751 - accuracy: 0.9324
Epoch 45/50
474/474 - 8s - loss: 0.1757 - accuracy: 0.9219
Epoch 46/50
474/474 - 10s - loss: 0.2237 - accuracy: 0.8769
Epoch 1/50
237/237 - 12s - loss: 0.8754 - accuracy: 0.5755
Epoch 1/50
237/237 - 13s - loss: 0.9612 - accuracy: 0.5711
Epoch 1/50
237/237 - 12s - loss: 3.3866 - accuracy: 0.5420
Epoch 1/50
237/237 - 13s - loss: 3.1487 - accuracy: 0.5277
Epoch 46/50
474/474 - 12s - loss: 0.1659 - accuracy: 0.9276
Epoch 46/50
474/474 - 12s - loss: 0.1614 - accuracy: 0.9303
Epoch 1/50
237/237 - 17s - loss: 2.9288 - accuracy: 0.5549
Epoch 2/50
237/237 - 11s - loss: 0.6666 - accuracy: 0.6605
Epoch 2/50
237/237 - 11s - loss: 2.1484 - accuracy: 0.6281
Epoch 2/50
237/237 - 11s - loss: 0.7278 - accuracy: 0.6556
Epoch 47/50
474/474 - 12s - loss: 0.2305 - accuracy: 0.8785
Epoch 2/50
237/237 - 11s - loss: 2.2282 - accuracy: 0.5880
Epoch

2021-09-23 14:26:21.734481: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.


Epoch 6/50
237/237 - 10s - loss: 0.9340 - accuracy: 0.6973
237/237 - 3s - loss: 0.2277 - accuracy: 0.8933
237/237 - 3s - loss: 0.2166 - accuracy: 0.8985


2021-09-23 14:26:26.557464: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30925232 exceeds 10% of free system memory.


Epoch 6/50
237/237 - 9s - loss: 0.8759 - accuracy: 0.6959
Epoch 7/50
237/237 - 7s - loss: 0.4578 - accuracy: 0.7666
Epoch 7/50
237/237 - 8s - loss: 0.7972 - accuracy: 0.7184
Epoch 7/50
237/237 - 7s - loss: 0.8280 - accuracy: 0.6899
Epoch 7/50
237/237 - 9s - loss: 0.4820 - accuracy: 0.7491
Epoch 7/50
237/237 - 9s - loss: 0.7694 - accuracy: 0.7022
Epoch 1/50
237/237 - 14s - loss: 1.6062 - accuracy: 0.6566
Epoch 8/50
237/237 - 9s - loss: 0.4456 - accuracy: 0.7756
Epoch 8/50
237/237 - 10s - loss: 0.6716 - accuracy: 0.7301
Epoch 8/50
237/237 - 10s - loss: 0.7778 - accuracy: 0.7211
Epoch 8/50
237/237 - 10s - loss: 0.4637 - accuracy: 0.7554
Epoch 1/50
237/237 - 16s - loss: 1.6475 - accuracy: 0.6646
Epoch 1/50
237/237 - 16s - loss: 1.6827 - accuracy: 0.6499
Epoch 8/50
237/237 - 10s - loss: 0.7013 - accuracy: 0.7138
Epoch 2/50
237/237 - 10s - loss: 0.5303 - accuracy: 0.7221
Epoch 9/50
237/237 - 10s - loss: 0.4192 - accuracy: 0.7914
Epoch 9/50
237/237 - 10s - loss: 0.6749 - accuracy: 0.7338
Epoc

2021-09-23 14:34:00.442239: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.


Epoch 50/50
237/237 - 9s - loss: 0.2150 - accuracy: 0.8975


2021-09-23 14:34:01.101732: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30908904 exceeds 10% of free system memory.


Epoch 50/50
237/237 - 10s - loss: 0.3098 - accuracy: 0.8453
119/119 - 3s - loss: 0.3675 - accuracy: 0.8015
119/119 - 1s - loss: 0.2726 - accuracy: 0.8691
Epoch 44/50
237/237 - 7s - loss: 0.2069 - accuracy: 0.9076
Epoch 45/50
237/237 - 7s - loss: 0.1844 - accuracy: 0.9192
119/119 - 2s - loss: 0.3424 - accuracy: 0.8626
Epoch 44/50
237/237 - 7s - loss: 0.1802 - accuracy: 0.9166
Epoch 45/50
237/237 - 6s - loss: 0.1954 - accuracy: 0.9149
Epoch 46/50
237/237 - 7s - loss: 0.1839 - accuracy: 0.9107
Epoch 45/50
237/237 - 8s - loss: 0.1650 - accuracy: 0.9282
Epoch 1/50
474/474 - 14s - loss: 0.8078 - accuracy: 0.6313
Epoch 1/50
474/474 - 15s - loss: 0.7949 - accuracy: 0.6244
Epoch 1/50
474/474 - 15s - loss: 0.8355 - accuracy: 0.6125
Epoch 1/50
474/474 - 16s - loss: 0.6513 - accuracy: 0.6429
Epoch 46/50
237/237 - 11s - loss: 0.1894 - accuracy: 0.9155
Epoch 47/50
237/237 - 11s - loss: 0.1602 - accuracy: 0.9303
Epoch 1/50
474/474 - 18s - loss: 0.6268 - accuracy: 0.6540
Epoch 46/50
237/237 - 11s - lo

2021-09-23 14:34:57.265751: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 30925232 exceeds 10% of free system memory.


Epoch 4/50
474/474 - 12s - loss: 0.4013 - accuracy: 0.8014
Epoch 50/50
237/237 - 9s - loss: 0.1733 - accuracy: 0.9192
119/119 - 3s - loss: 0.3424 - accuracy: 0.8669
Epoch 5/50
474/474 - 11s - loss: 0.4106 - accuracy: 0.7887
Epoch 50/50
237/237 - 9s - loss: 0.1540 - accuracy: 0.9293
Epoch 5/50
474/474 - 11s - loss: 0.3950 - accuracy: 0.8046
Epoch 5/50
474/474 - 11s - loss: 0.4112 - accuracy: 0.7866
119/119 - 3s - loss: 0.2159 - accuracy: 0.9112
Epoch 5/50
474/474 - 12s - loss: 0.3689 - accuracy: 0.8056
Epoch 5/50
474/474 - 10s - loss: 0.3693 - accuracy: 0.8088
Epoch 1/50
474/474 - 17s - loss: 0.6599 - accuracy: 0.6478
Epoch 6/50
474/474 - 9s - loss: 0.3733 - accuracy: 0.8093
Epoch 6/50
474/474 - 10s - loss: 0.3782 - accuracy: 0.7998
Epoch 6/50
474/474 - 10s - loss: 0.3907 - accuracy: 0.7904
Epoch 6/50
474/474 - 10s - loss: 0.3566 - accuracy: 0.8146
Epoch 6/50
474/474 - 10s - loss: 0.3462 - accuracy: 0.8236
Epoch 2/50
474/474 - 9s - loss: 0.4776 - accuracy: 0.7508
Epoch 7/50
474/474 - 9s

In [261]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9204278389612833 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.9025, std=0.00902 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9081, std=0.007476 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8641, std=0.01915 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8849, std=0.0158 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9204, std=0.01345 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8775, std=0.01189 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8261, std=0.02633 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8909, std=0.01824 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9102, std=0.001472 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9134, std=0.01137 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 

## Train with best parameters

In [262]:
# Best Accuracy 0.9204278389612833 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [263]:
seed = 7
np.random.seed(seed)

In [264]:
model = create_model()

In [265]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [266]:
import datetime, os

In [267]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [268]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-23 15:15:09.289770: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 15:15:09.289809: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 15:15:09.289877: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [269]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [270]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 20/355 [>.............................] - ETA: 3s - loss: 1.0825 - accuracy: 0.5467

2021-09-23 15:15:11.659212: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-23 15:15:11.659235: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-23 15:15:11.716709: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-23 15:15:11.717478: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-23 15:15:11.718724: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-151508/train/plugins/profile/2021_09_23_15_15_11
2021-09-23 15:15:11.719442: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210923-151508/train/plugins/profile/2021_09_23_15_15_11/helemanc-Latitude-5410.trace.json.gz
2021-09-23 15:15:11.720531: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210923-151508/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500

Epoch 00038: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500

Epoch 00042: ReduceLROnPlateau reducing learning rate to 1.56250007421

In [271]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 144355), started 3:03:51 ago. (Use '!kill 144355' to kill it.)

In [272]:
model.evaluate(X_test, y_test, batch_size=8)



[0.19151173532009125, 0.9439393877983093]

In [273]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       620
           1       0.95      0.95      0.95       700

    accuracy                           0.94      1320
   macro avg       0.94      0.94      0.94      1320
weighted avg       0.94      0.94      0.94      1320



## Save best model 

In [274]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_9")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_9/assets


# Experiment 4.10: RAVDESS - TESS - SAVEE noise

## Read dataframes

In [167]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

In [168]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess])
df_val = pd.concat([df_val_rav, df_val_savee])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
df_test = pd.concat([RAV_train, SAVEE_test])

In [169]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [170]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|██████████████████████████████████████| 5680/5680 [00:41<00:00, 135.39it/s]
100%|███████████████████████████████████████| 5680/5680 [01:10<00:00, 81.07it/s]
100%|█████████████████████████████████████| 5680/5680 [00:03<00:00, 1817.69it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 64.55it/s]
100%|█████████████████████████████████████████| 240/240 [00:02<00:00, 81.86it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 1223.94it/s]
100%|██████████████████████████████████████| 1320/1320 [00:03<00:00, 368.56it/s]
100%|███████████████████████████████████████| 1320/1320 [00:15<00:00, 84.82it/s]
100%|█████████████████████████████████████| 1320/1320 [00:00<00:00, 1781.58it/s]


In [171]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [172]:
np.size(y_val)

240

In [173]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [174]:
X_train.shape

(5680, 157, 26)

## Shuffle training data

In [175]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [176]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_10.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [65]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [66]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-24 13:17:20.237013: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 13:17:20.237267: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 13:17:20.239630: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 13:17:20.239779: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 13:17:20.310593: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
474/474 - 19s - loss: 0.8589 - accuracy: 0.6173
Epoch 1/50
474/474 - 19s - loss: 0.8419 - accuracy: 0.6163
Epoch 1/50
474/474 - 20s - loss: 0.5829 - accuracy: 0.7079
Epoch 1/50
474/474 - 20s - loss: 0.6112 - accuracy: 0.6942
Epoch 1/50
474/474 - 21s - loss: 2.0402 - accuracy: 0.5809
Epoch 1/50
474/474 - 21s - loss: 0.8318 - accuracy: 0.6158
Epoch 1/50
474/474 - 21s - loss: 0.5806 - accuracy: 0.7135
Epoch 1/50
474/474 - 21s - loss: 1.8782 - accuracy: 0.5895
Epoch 2/50
474/474 - 19s - loss: 0.5872 - accuracy: 0.6937
Epoch 2/50
474/474 - 20s - loss: 0.5859 - accuracy: 0.6896
Epoch 2/50
474/474 - 20s - loss: 0.4292 - accuracy: 0.7382
Epoch 2/50
474/474 - 18s - loss: 0.5577 - accuracy: 0.7058
Epoch 2/50
474/474 - 19s - loss: 1.0115 - accuracy: 0.6567
Epoch 2/50
474/474 - 20s - loss: 0.4226 - accuracy: 0.7407
Epoch 2/50
474/474 - 20s - loss: 0.4156 - accuracy: 0.7571
Epoch 2/50
474/474 - 20s - loss: 0.9755 - accuracy: 0.6503
Epoch 3/50
474/474 - 19s - loss: 0.5131 - accuracy: 0.73

2021-09-24 14:25:55.144608: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-24 14:25:55.145015: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-24 14:25:55.264249: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-24 14:25:55.284155: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/50
1420/1420 - 5s - loss: 0.6361 - accuracy: 0.6769
Epoch 2/50
1420/1420 - 5s - loss: 0.4719 - accuracy: 0.7525
Epoch 3/50
1420/1420 - 5s - loss: 0.4176 - accuracy: 0.7748
Epoch 4/50
1420/1420 - 5s - loss: 0.3788 - accuracy: 0.7935
Epoch 5/50
1420/1420 - 5s - loss: 0.3570 - accuracy: 0.8090
Epoch 6/50
1420/1420 - 5s - loss: 0.3317 - accuracy: 0.8243
Epoch 7/50
1420/1420 - 5s - loss: 0.3185 - accuracy: 0.8289
Epoch 8/50
1420/1420 - 5s - loss: 0.3042 - accuracy: 0.8403
Epoch 9/50
1420/1420 - 5s - loss: 0.2859 - accuracy: 0.8526
Epoch 10/50
1420/1420 - 5s - loss: 0.2856 - accuracy: 0.8484
Epoch 11/50
1420/1420 - 5s - loss: 0.2708 - accuracy: 0.8643
Epoch 12/50
1420/1420 - 5s - loss: 0.2669 - accuracy: 0.8683
Epoch 13/50
1420/1420 - 5s - loss: 0.2560 - accuracy: 0.8741
Epoch 14/50
1420/1420 - 5s - loss: 0.2456 - accuracy: 0.8831
Epoch 15/50
1420/1420 - 5s - loss: 0.2283 - accuracy: 0.8921
Epoch 16/50
1420/1420 - 5s - loss: 0.2243 - accuracy: 0.8938
Epoch 17/50
1420/1420 - 5s - loss

In [67]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.924473226070404 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.906, std=0.00478 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9148, std=0.01296 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8877, std=0.006613 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8916, std=0.008866 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.9164, std=0.002472 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.9035, std=0.002236 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8548, std=0.00788 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9116, std=0.001065 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.9245, std=0.005586 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9234, std=0.009369 using {'lr': 0.0001, 'init_mode': 'uniform', 'ba

## Train with best parameters

In [68]:
#Best Accuracy 0.9042315085728964 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
def create_model( init_mode='glorot_normal', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [69]:
seed = 7
np.random.seed(seed)

In [70]:
model = create_model()

In [71]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [72]:
import datetime, os

In [73]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [74]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-24 15:03:44.607988: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-24 15:03:44.608090: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-24 15:03:44.664416: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [75]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [76]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  47/1420 [..............................] - ETA: 6s - loss: 1.5324 - accuracy: 0.4037

2021-09-24 15:03:45.895297: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-24 15:03:45.895322: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-24 15:03:45.905234: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-24 15:03:45.907677: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-24 15:03:45.911280: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210924-150344/train/plugins/profile/2021_09_24_15_03_45
2021-09-24 15:03:45.912000: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210924-150344/train/plugins/profile/2021_09_24_15_03_45/helemanc-Latitude-5410.trace.json.gz
2021-09-24 15:03:45.919226: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210924-150344/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

Epoch 00010: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500

Epoch 00014: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500

Epoch 00018: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 35/500
Epoch 36/500
Epoch

In [77]:
%tensorboard --logdir logs

In [78]:
model.evaluate(X_test, y_test, batch_size=4)



[0.4390370547771454, 0.8545454740524292]

In [79]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.87      0.81      0.84       620
           1       0.84      0.89      0.87       700

    accuracy                           0.85      1320
   macro avg       0.86      0.85      0.85      1320
weighted avg       0.86      0.85      0.85      1320



## Save best model 

In [80]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_10")

2021-09-24 15:09:06.195749: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_10/assets


# Experiment 4.11: RAVDESS - TESS - SAVEE - CREMA

In [177]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train, CREMA_train])
df_val = pd.concat([RAV_val, SAVEE_val, CREMA_val])
#df_test = pd.concat([RAV_test, SAVEE_test, TESS_test, CREMA_test])
df_test = pd.concat([RAV_train, SAVEE_test])

In [178]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [179]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26)

100%|██████████████████████████████████████| 3160/3160 [00:21<00:00, 145.13it/s]
100%|███████████████████████████████████████| 3160/3160 [00:38<00:00, 83.00it/s]
100%|█████████████████████████████████████| 3160/3160 [00:01<00:00, 1822.15it/s]
100%|█████████████████████████████████████████| 300/300 [00:03<00:00, 79.49it/s]
100%|█████████████████████████████████████████| 300/300 [00:03<00:00, 78.67it/s]
100%|███████████████████████████████████████| 300/300 [00:00<00:00, 1348.13it/s]
100%|██████████████████████████████████████| 1320/1320 [00:03<00:00, 377.72it/s]
100%|███████████████████████████████████████| 1320/1320 [00:18<00:00, 72.52it/s]
100%|█████████████████████████████████████| 1320/1320 [00:00<00:00, 1685.90it/s]


In [180]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [181]:
np.size(y_test)

1320

In [182]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [183]:
X_train.shape

(3160, 157, 26)

## Shuffle training data

In [184]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [185]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_11.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [146]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [147]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-24 15:23:37.730984: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 15:23:37.731528: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 15:23:37.805751: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 15:23:37.805899: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 15:23:37.870186: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
264/264 - 11s - loss: 0.9672 - accuracy: 0.5836
Epoch 1/50
264/264 - 11s - loss: 0.9680 - accuracy: 0.6165
Epoch 1/50
264/264 - 11s - loss: 0.6404 - accuracy: 0.7010
Epoch 1/50
264/264 - 11s - loss: 0.6685 - accuracy: 0.7089
Epoch 1/50
264/264 - 12s - loss: 0.6783 - accuracy: 0.6972
Epoch 1/50
264/264 - 12s - loss: 0.9087 - accuracy: 0.6137
Epoch 1/50
264/264 - 12s - loss: 2.2387 - accuracy: 0.5923
Epoch 1/50
264/264 - 12s - loss: 2.0604 - accuracy: 0.5973
Epoch 2/50
264/264 - 11s - loss: 0.6094 - accuracy: 0.6923
Epoch 2/50
264/264 - 10s - loss: 1.3531 - accuracy: 0.6540
Epoch 2/50
264/264 - 11s - loss: 0.6717 - accuracy: 0.6891
Epoch 2/50
264/264 - 11s - loss: 0.4349 - accuracy: 0.7556
Epoch 2/50
264/264 - 11s - loss: 0.4572 - accuracy: 0.7537
Epoch 2/50
264/264 - 11s - loss: 0.4458 - accuracy: 0.7545
Epoch 2/50
264/264 - 11s - loss: 0.5966 - accuracy: 0.6986
Epoch 2/50
264/264 - 11s - loss: 1.3059 - accuracy: 0.6553
Epoch 3/50
264/264 - 11s - loss: 0.5542 - accuracy: 0.72

In [148]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9022166530291239 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8905, std=0.01363 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.9022, std=0.004811 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8566, std=0.01163 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8801, std=0.00551 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8943, std=0.01244 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8778, std=0.01183 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8266, std=0.01793 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.901, std=0.01807 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8971, std=0.006497 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8997, std=0.005001 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_

## Train with best parameters

In [149]:
#Best Accuracy 0.8876572251319885 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [150]:
seed = 7
np.random.seed(seed)

In [151]:
model = create_model()

In [152]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [153]:
import datetime, os

In [154]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [155]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-24 16:09:09.646452: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-24 16:09:09.646481: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-24 16:09:09.646522: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [156]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [157]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 29/790 [>.............................] - ETA: 4s - loss: 3.0549 - accuracy: 0.5363

2021-09-24 16:09:10.636200: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-24 16:09:10.636224: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-24 16:09:10.693869: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-24 16:09:10.694630: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-24 16:09:10.695887: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210924-160909/train/plugins/profile/2021_09_24_16_09_10
2021-09-24 16:09:10.696646: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210924-160909/train/plugins/profile/2021_09_24_16_09_10/helemanc-Latitude-5410.trace.json.gz
2021-09-24 16:09:10.697725: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210924-160909/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

E

In [177]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 6943), started 1 day, 22:53:02 ago. (Use '!kill 6943' to kill it.)

In [172]:
model.evaluate(X_test, y_test, batch_size=4)



[0.6379664540290833, 0.6666666865348816]

In [160]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.82      0.96      0.89       620
           1       0.96      0.81      0.88       700

    accuracy                           0.88      1320
   macro avg       0.89      0.89      0.88      1320
weighted avg       0.90      0.88      0.88      1320



## Save best model 

In [161]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_11")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_11/assets


# Experiment 4.12:  RAVDESS - TESS - SAVEE - CREMA noise

## Read dataframes

In [186]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
preprocess_path_crema = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

df_train_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_train.csv"))
df_val_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_val.csv"))
df_test_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_test.csv")) 

In [187]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess, df_train_crema])
df_val = pd.concat([df_val_rav, df_val_savee, df_train_crema, df_train_crema])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_crema ])
df_test = pd.concat([df_test_rav, df_test_savee ])

In [188]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [189]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 26) # 13

100%|██████████████████████████████████████| 6320/6320 [00:42<00:00, 149.84it/s]
100%|███████████████████████████████████████| 6320/6320 [01:19<00:00, 79.51it/s]
100%|█████████████████████████████████████| 6320/6320 [00:03<00:00, 1884.28it/s]
100%|██████████████████████████████████████| 1520/1520 [00:04<00:00, 372.24it/s]
100%|███████████████████████████████████████| 1520/1520 [00:19<00:00, 77.11it/s]
100%|█████████████████████████████████████| 1520/1520 [00:00<00:00, 1870.95it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 77.80it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 73.47it/s]
100%|███████████████████████████████████████| 240/240 [00:00<00:00, 1253.39it/s]


In [190]:
y_train, y_val, y_test  = encode_labels(y_train, y_val, y_test)

In [191]:
np.size(y_val)

1520

In [192]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [193]:
X_train.shape

(6320, 157, 26)

## Shuffle training data

In [194]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [195]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_4/scaler_4_12.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [173]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [174]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-24 16:20:42.363999: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 16:20:42.364213: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 16:20:42.488146: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-24 16:20:42.488458: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-24 16:20:42.531146: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
527/527 - 20s - loss: 0.8890 - accuracy: 0.5977
Epoch 1/50
527/527 - 22s - loss: 0.9081 - accuracy: 0.5930
Epoch 1/50
527/527 - 22s - loss: 0.9937 - accuracy: 0.5882
Epoch 1/50
527/527 - 22s - loss: 0.6555 - accuracy: 0.6587
Epoch 1/50
527/527 - 23s - loss: 0.6603 - accuracy: 0.6628
Epoch 1/50
527/527 - 23s - loss: 0.6469 - accuracy: 0.6618
Epoch 1/50
527/527 - 24s - loss: 2.5474 - accuracy: 0.5709
Epoch 1/50
527/527 - 24s - loss: 2.2749 - accuracy: 0.5737
Epoch 2/50
527/527 - 22s - loss: 0.6159 - accuracy: 0.6734
Epoch 2/50
527/527 - 22s - loss: 0.6162 - accuracy: 0.6706
Epoch 2/50
527/527 - 22s - loss: 0.6269 - accuracy: 0.6535
Epoch 2/50
527/527 - 22s - loss: 0.4645 - accuracy: 0.7209
Epoch 2/50
527/527 - 21s - loss: 0.4547 - accuracy: 0.7140
Epoch 2/50
527/527 - 22s - loss: 0.4645 - accuracy: 0.7142
Epoch 2/50
527/527 - 22s - loss: 1.1203 - accuracy: 0.6380
Epoch 2/50
527/527 - 22s - loss: 1.0641 - accuracy: 0.6397
Epoch 3/50
527/527 - 21s - loss: 0.5720 - accuracy: 0.68

2021-09-24 16:39:37.429745: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 34403096 exceeds 10% of free system memory.
2021-09-24 16:39:37.779501: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68806192 exceeds 10% of free system memory.
2021-09-24 16:39:38.008571: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.
2021-09-24 16:39:38.180170: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68806192 exceeds 10% of free system memory.


264/264 - 3s - loss: 0.2673 - accuracy: 0.8922
Epoch 50/50
527/527 - 20s - loss: 0.2977 - accuracy: 0.8374
264/264 - 2s - loss: 0.3245 - accuracy: 0.8524


2021-09-24 16:39:40.579955: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68806192 exceeds 10% of free system memory.


264/264 - 2s - loss: 0.3355 - accuracy: 0.8049


2021-09-24 16:39:41.973642: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 1/50
527/527 - 22s - loss: 2.2459 - accuracy: 0.5683
Epoch 1/50
527/527 - 23s - loss: 0.5846 - accuracy: 0.6729
Epoch 1/50
527/527 - 23s - loss: 0.5808 - accuracy: 0.6755
Epoch 1/50
1054/1054 - 26s - loss: 1.0240 - accuracy: 0.6406
Epoch 1/50
1054/1054 - 27s - loss: 1.0683 - accuracy: 0.6197
Epoch 1/50
1054/1054 - 28s - loss: 1.1206 - accuracy: 0.6222
Epoch 1/50
527/527 - 30s - loss: 0.5950 - accuracy: 0.6711
Epoch 1/50
527/527 - 29s - loss: 0.9162 - accuracy: 0.5592
Epoch 2/50
527/527 - 24s - loss: 1.0017 - accuracy: 0.6293
Epoch 2/50
527/527 - 23s - loss: 0.4477 - accuracy: 0.7285
Epoch 2/50
527/527 - 23s - loss: 0.4410 - accuracy: 0.7380
Epoch 2/50
1054/1054 - 27s - loss: 0.5242 - accuracy: 0.6924
Epoch 2/50
1054/1054 - 27s - loss: 0.5195 - accuracy: 0.6770
Epoch 2/50
1054/1054 - 27s - loss: 0.5128 - accuracy: 0.6794
Epoch 2/50
527/527 - 24s - loss: 0.4527 - accuracy: 0.7186
Epoch 2/50
527/527 - 23s - loss: 0.6780 - accuracy: 0.6394
Epoch 3/50
527/527 - 23s - loss: 0.7279 - ac

2021-09-24 16:59:13.849501: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 34386768 exceeds 10% of free system memory.


Epoch 44/50
1054/1054 - 26s - loss: 0.3327 - accuracy: 0.8094
264/264 - 4s - loss: 0.2260 - accuracy: 0.9084
Epoch 50/50
527/527 - 22s - loss: 0.3034 - accuracy: 0.8448
264/264 - 4s - loss: 0.2472 - accuracy: 0.8955


2021-09-24 16:59:19.985128: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 44/50
1054/1054 - 25s - loss: 0.3408 - accuracy: 0.7817
264/264 - 3s - loss: 0.3387 - accuracy: 0.8248


2021-09-24 16:59:22.037442: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 44/50
1054/1054 - 23s - loss: 0.3292 - accuracy: 0.8101
Epoch 50/50
527/527 - 20s - loss: 0.2393 - accuracy: 0.8882
264/264 - 5s - loss: 0.2966 - accuracy: 0.8600
Epoch 45/50
1054/1054 - 21s - loss: 0.3299 - accuracy: 0.8077


2021-09-24 16:59:38.433298: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68806192 exceeds 10% of free system memory.


Epoch 1/50
527/527 - 25s - loss: 0.8941 - accuracy: 0.5443
Epoch 45/50
1054/1054 - 23s - loss: 0.3533 - accuracy: 0.7750
Epoch 1/50
527/527 - 26s - loss: 0.8508 - accuracy: 0.5755
Epoch 45/50
1054/1054 - 23s - loss: 0.3119 - accuracy: 0.8179
Epoch 1/50
527/527 - 27s - loss: 2.4638 - accuracy: 0.5545
Epoch 1/50
527/527 - 30s - loss: 2.8101 - accuracy: 0.5516
Epoch 46/50
1054/1054 - 25s - loss: 0.3308 - accuracy: 0.8066
Epoch 2/50
527/527 - 24s - loss: 0.6699 - accuracy: 0.6316
Epoch 2/50
527/527 - 24s - loss: 0.6482 - accuracy: 0.6578
Epoch 1/50
527/527 - 31s - loss: 2.8244 - accuracy: 0.5565
Epoch 46/50
1054/1054 - 27s - loss: 0.3539 - accuracy: 0.7765
Epoch 2/50
527/527 - 24s - loss: 1.4463 - accuracy: 0.6029
Epoch 46/50
1054/1054 - 26s - loss: 0.3342 - accuracy: 0.8106
Epoch 2/50
527/527 - 24s - loss: 1.6999 - accuracy: 0.6024
Epoch 47/50
1054/1054 - 25s - loss: 0.3352 - accuracy: 0.8075
Epoch 3/50
527/527 - 24s - loss: 0.5916 - accuracy: 0.6774
Epoch 3/50
527/527 - 24s - loss: 0.581

2021-09-24 17:01:57.073051: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 50/50
1054/1054 - 25s - loss: 0.3344 - accuracy: 0.7940
Epoch 50/50
1054/1054 - 25s - loss: 0.3189 - accuracy: 0.8225
Epoch 7/50
527/527 - 21s - loss: 0.4908 - accuracy: 0.7308
527/527 - 6s - loss: 0.3521 - accuracy: 0.7764
Epoch 7/50
527/527 - 20s - loss: 0.5965 - accuracy: 0.6883
Epoch 6/50
527/527 - 21s - loss: 0.6521 - accuracy: 0.6730


2021-09-24 17:02:05.396389: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 7/50
527/527 - 21s - loss: 0.4883 - accuracy: 0.7364
527/527 - 6s - loss: 0.3185 - accuracy: 0.7888
Epoch 7/50
527/527 - 19s - loss: 0.6136 - accuracy: 0.6829
Epoch 8/50
527/527 - 19s - loss: 0.4762 - accuracy: 0.7446
Epoch 1/50
527/527 - 25s - loss: 1.0179 - accuracy: 0.6352
Epoch 7/50
527/527 - 21s - loss: 0.6047 - accuracy: 0.6853
Epoch 8/50
527/527 - 22s - loss: 0.5846 - accuracy: 0.6860
Epoch 8/50
527/527 - 21s - loss: 0.4572 - accuracy: 0.7504
Epoch 8/50
527/527 - 21s - loss: 0.5825 - accuracy: 0.6971
Epoch 1/50
527/527 - 28s - loss: 1.2071 - accuracy: 0.6245
Epoch 1/50
527/527 - 28s - loss: 1.1708 - accuracy: 0.6355
Epoch 9/50
527/527 - 23s - loss: 0.4498 - accuracy: 0.7553
Epoch 2/50
527/527 - 23s - loss: 0.5257 - accuracy: 0.6736
Epoch 9/50
527/527 - 22s - loss: 0.5670 - accuracy: 0.6945
Epoch 8/50
527/527 - 24s - loss: 0.5922 - accuracy: 0.6825
Epoch 9/50
527/527 - 23s - loss: 0.4484 - accuracy: 0.7589
Epoch 9/50
527/527 - 23s - loss: 0.5664 - accuracy: 0.6976
Epoch 2/5

2021-09-24 17:18:38.930690: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 43/50
527/527 - 19s - loss: 0.2458 - accuracy: 0.8792
Epoch 44/50
527/527 - 18s - loss: 0.2421 - accuracy: 0.8863
Epoch 50/50
527/527 - 20s - loss: 0.3449 - accuracy: 0.8094
264/264 - 5s - loss: 0.3723 - accuracy: 0.8386
Epoch 45/50
527/527 - 21s - loss: 0.2794 - accuracy: 0.8490


2021-09-24 17:18:59.144437: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68789864 exceeds 10% of free system memory.


Epoch 1/50
1054/1054 - 29s - loss: 0.7670 - accuracy: 0.6174
Epoch 44/50
527/527 - 22s - loss: 0.2466 - accuracy: 0.8813
Epoch 1/50
1054/1054 - 29s - loss: 0.7818 - accuracy: 0.6169
Epoch 1/50
1054/1054 - 30s - loss: 0.7625 - accuracy: 0.6122
Epoch 45/50
527/527 - 22s - loss: 0.2489 - accuracy: 0.8861
Epoch 1/50
1054/1054 - 33s - loss: 0.6445 - accuracy: 0.6328
Epoch 46/50
527/527 - 23s - loss: 0.2664 - accuracy: 0.8607
Epoch 45/50
527/527 - 23s - loss: 0.2541 - accuracy: 0.8818
Epoch 2/50
1054/1054 - 27s - loss: 0.5505 - accuracy: 0.7016
Epoch 2/50
1054/1054 - 27s - loss: 0.5531 - accuracy: 0.7028
Epoch 2/50
1054/1054 - 27s - loss: 0.5630 - accuracy: 0.6993
Epoch 1/50
1054/1054 - 36s - loss: 0.6487 - accuracy: 0.6364
Epoch 46/50
527/527 - 24s - loss: 0.2441 - accuracy: 0.8877
Epoch 2/50
1054/1054 - 27s - loss: 0.5125 - accuracy: 0.7221
Epoch 47/50
527/527 - 24s - loss: 0.2725 - accuracy: 0.8547
Epoch 46/50
527/527 - 25s - loss: 0.2350 - accuracy: 0.8880
Epoch 3/50
1054/1054 - 27s - lo

2021-09-24 17:21:05.777420: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 68806192 exceeds 10% of free system memory.


Epoch 50/50
527/527 - 23s - loss: 0.2331 - accuracy: 0.8960
Epoch 6/50
1054/1054 - 23s - loss: 0.4104 - accuracy: 0.7793
264/264 - 5s - loss: 0.2617 - accuracy: 0.8937
Epoch 6/50
1054/1054 - 26s - loss: 0.4172 - accuracy: 0.7660
Epoch 6/50
1054/1054 - 26s - loss: 0.4065 - accuracy: 0.7703
Epoch 50/50
527/527 - 22s - loss: 0.2510 - accuracy: 0.8813
Epoch 5/50
1054/1054 - 25s - loss: 0.3971 - accuracy: 0.7821
264/264 - 4s - loss: 0.2625 - accuracy: 0.8761
Epoch 6/50
1054/1054 - 25s - loss: 0.3863 - accuracy: 0.7935
Epoch 1/50
1054/1054 - 30s - loss: 0.6539 - accuracy: 0.6374
Epoch 7/50
1054/1054 - 23s - loss: 0.4004 - accuracy: 0.7845
Epoch 7/50
1054/1054 - 21s - loss: 0.3951 - accuracy: 0.7828
Epoch 7/50
1054/1054 - 21s - loss: 0.3927 - accuracy: 0.7864
Epoch 6/50
1054/1054 - 20s - loss: 0.3842 - accuracy: 0.7921
Epoch 7/50
1054/1054 - 21s - loss: 0.3672 - accuracy: 0.7999
Epoch 2/50
1054/1054 - 21s - loss: 0.5269 - accuracy: 0.7103
Epoch 8/50
1054/1054 - 21s - loss: 0.3846 - accuracy: 

In [175]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.9096513191858927 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
 mean=0.8864, std=0.007932 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8907, std=0.002225 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8274, std=0.01946 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.7843, std=0.005657 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.903, std=0.005452 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8752, std=0.01106 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8131, std=0.0311 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8763, std=0.01415 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.904, std=0.0035 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.9097, std=0.003616 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size':

## Train with best parameters

In [178]:
#Best Accuracy 0.8767395615577698 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,26), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [179]:
seed = 7
np.random.seed(seed)

In [180]:
model = create_model()

In [1]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [2]:
import datetime, os

In [183]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [184]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-26 14:05:13.525700: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-26 14:05:13.525776: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-26 14:05:13.525890: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [185]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [186]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  28/1580 [..............................] - ETA: 10s - loss: 0.7927 - accuracy: 0.7147

2021-09-26 14:05:14.538961: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-26 14:05:14.538985: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-26 14:05:14.594344: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-26 14:05:14.595198: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-26 14:05:14.596516: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210926-140513/train/plugins/profile/2021_09_26_14_05_14
2021-09-26 14:05:14.597244: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210926-140513/train/plugins/profile/2021_09_26_14_05_14/helemanc-Latitude-5410.trace.json.gz
2021-09-26 14:05:14.598444: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210926-140513/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500

Epoch 00019: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500

Epoch 00049: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500

Epoch 00059: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 60/50

In [3]:
%tensorboard --logdir logs

In [188]:
model.evaluate(X_test, y_test, batch_size=4)



[0.9847053289413452, 0.6708333492279053]

In [189]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.74      0.49      0.59       116
           1       0.64      0.84      0.72       124

    accuracy                           0.67       240
   macro avg       0.69      0.67      0.66       240
weighted avg       0.69      0.67      0.66       240



## Save best model 

In [190]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_12")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_4/model_4_12/assets
