# Configuration

NOTES: The warnings after the import are referred to the fact that Tensorflow 2.x versions are built to directly look for a GPU in the system. The warning can be forgot if you are not going to use the GPU. 

In [1]:
!source myenv/bin/activate

In [2]:
# samples in 5 seconds of audio, 16 KHz sample rate 
LENGTH_CHOSEN =  80000

In [3]:
import os
import librosa
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('whitegrid')
import IPython.display as ipd
import librosa.display
import numpy as np
import pickle
import scipy
import ipywidgets
import math

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, StratifiedKFold


from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, AveragePooling1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD, Adam 
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers

# from livelossplot import PlotLossesKeras
tf.config.list_physical_devices('GPU')

2021-09-29 14:57:47.190628: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-29 14:57:47.190651: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-29 14:57:48.782676: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-29 14:57:48.786713: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-09-29 14:57:48.873317: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-09-29 14:57:48.873349: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (helemanc-Latitude-5410): /pr

[]

# Utils

In [4]:
def load_files(df):
    X = []
    for i in tqdm(df['path']): 
        X.append(librosa.load(i, res_type='kaiser_fast', sr=16000))
    return X

def extract_samples(X): 
    samples = []
    for ind,i in enumerate(X):
        samples.append(i[0])
    return samples 

def extract_labels(df): 
    labels = df['emotion_label'].copy()
    return labels 

def compute_lengths(samples): 
    lengths = [len(x) for x in samples]
    return lengths 

def check_outliers(lengths):
    # outliers
    lengths = np.array(lengths)
    print((lengths > 300000).sum())
    new_lengths = lengths[lengths < 300000]
    return new_lengths 

def compute_mean_length(lengths): 
    return lengths.mean()

def cut_and_pad(samples, labels, length_chosen = LENGTH_CHOSEN): 
    X_new = []
    y_new = []
    count = 0 
    for ind,i in enumerate(samples):
        if i.shape[0] < 300000:
            if i.shape[0] > length_chosen:
                new = i[:length_chosen]
                X_new.append(new)
            elif i.shape[0] < length_chosen:
                new = np.pad(i,math.ceil((length_chosen-i.shape[0])/2), mode='median')
                X_new.append(new)
            else:
                X_new.append(i)
            y_new.append(labels[count])
        count+=1
    
    return X_new, y_new
    
def compute_mfccs(samples, n_mfcc): 
    mfccs = []
    for i in tqdm(samples):
        mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=n_mfcc)
        mfcc = mfcc.T
        mfcc = np.array(mfcc)
        mfccs.append(mfcc[:, 1:]) # get rid of the first component 
    mfccs = np.array(mfccs)
    return mfccs


def feature_extractor(df_train, df_val, df_test, n_mfcc): 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    
    
  
    load_val = load_files(df_val)
    samples_val = extract_samples(load_val)
    labels_val = extract_labels(df_val)
    samples_val, labels_val = cut_and_pad(samples_val, labels_val)
    samples_val = np.array(samples_val)
    labels_val = np.array(labels_val)
    mfccs_val = compute_mfccs(samples_val, n_mfcc = n_mfcc)
    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    

    return mfccs_train, labels_train,  mfccs_val, labels_val, mfccs_test, labels_test
    

def feature_extractor_tess(df_train,  df_test, n_mfcc): 
    # we do not have the validation set here 
    load_train = load_files(df_train)
    samples_train = extract_samples(load_train)
    labels_train = extract_labels(df_train)
    samples_train, labels_train = cut_and_pad(samples_train, labels_train)
    samples_train = np.array(samples_train)
    labels_train = np.array(labels_train)
    mfccs_train = compute_mfccs(samples_train, n_mfcc = n_mfcc)
    
    
    
    load_test = load_files(df_test)
    samples_test = extract_samples(load_test)
    labels_test = extract_labels(df_test)
    samples_test, labels_test = cut_and_pad(samples_test, labels_test)
    samples_test = np.array(samples_test)
    labels_test = np.array(labels_test)
    mfccs_test = compute_mfccs(samples_test, n_mfcc = n_mfcc)
    
    return mfccs_train, labels_train, mfccs_test, labels_test
    
def encode_labels(labels_train, labels_val, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    y_val = pd.Series(labels_val).map(emotion_enc)
    return y_train, y_val, y_test 


def encode_labels_tess(labels_train, labels_test): 
    
    emotion_enc = {'fear':1, 'disgust':1, 'neutral':0, 'calm':0,  'happy':0, 'sadness':1, 'surprise':0, 'angry':1}
    y_train = pd.Series(labels_train).replace(emotion_enc)
  
    y_test = pd.Series(labels_test).map(emotion_enc)
    return y_train, y_test
    
def standard_scaling(X_train, X_val, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
    return X_train, X_val, X_test, scaler 
    
def standard_scaling_tess(X_train, X_test): 
  
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)
    return X_train, X_test, scaler    
    
# Data Augmentation 
'''
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

# Data Augmentation 
def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)


def compute_mfccs_augmentation(samples, labels): 
    mfccs = []
    counter = 0 
    for i in tqdm(samples):

       # Weiner Filtering on original noise 
        samples_weiner = scipy.signal.wiener(i)
        is_fin = np.isfinite(samples_weiner).all()


        # Data Augmentation - Noise 
        noise_audio = noise(samples_weiner)

        # Data Augmentation - Pitch 
        pitch_audio = pitch(samples_weiner, sampling_rate=16000)


        # Data Augmentation -  pitch + noise 
        pn = pitch(noise_audio, sampling_rate = 16000)


        if is_fin: 
          # MFCC

          mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=13)
          mfcc = mfcc.T
          mfccs.append(mfcc[:, 1:])

          mfcc_augmented = librosa.feature.mfcc(y=samples_weiner, sr=16000, n_mfcc=13)
          mfcc_augmented = mfcc_augmented.T
          mfccs.append(mfcc_augmented[:, 1:])

          mfcc_augmented_pitch = librosa.feature.mfcc(y=noise_audio, sr=16000, n_mfcc=13)
          mfcc_augmented_pitch = mfcc_augmented_pitch.T
          mfccs.append(mfcc_augmented_pitch[:, 1:])

          mfcc_augmented_p = librosa.feature.mfcc(y=pitch_audio, sr=16000, n_mfcc=13)
          mfcc_augmented_p = mfcc_augmented_p.T
          mfccs.append(mfcc_augmented_p[:, 1:]) 

          mfcc_augmented_pn = librosa.feature.mfcc(y=pn, sr=16000, n_mfcc=13)
          mfcc_augmented_pn = mfcc_augmented_pn.T
          mfccs.append(mfcc_augmented_pn[:, 1:]) 
    
    mfccs = np.array(mfccs)
    
    # Copy labels 
    y_prov = []
    y = labels 
    for i in range(len(y)): 
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
      y_prov.append(y[i])
    y = np.asarray(y_prov)

    return mfccs, y 


'''


'\ndef noise(data):\n    noise_amp = 0.035*np.random.uniform()*np.amax(data)\n    data = data + noise_amp*np.random.normal(size=data.shape[0])\n    return data\n\n# Data Augmentation \ndef pitch(data, sampling_rate, pitch_factor=0.7):\n    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)\n\n\ndef compute_mfccs_augmentation(samples, labels): \n    mfccs = []\n    counter = 0 \n    for i in tqdm(samples):\n\n       # Weiner Filtering on original noise \n        samples_weiner = scipy.signal.wiener(i)\n        is_fin = np.isfinite(samples_weiner).all()\n\n\n        # Data Augmentation - Noise \n        noise_audio = noise(samples_weiner)\n\n        # Data Augmentation - Pitch \n        pitch_audio = pitch(samples_weiner, sampling_rate=16000)\n\n\n        # Data Augmentation -  pitch + noise \n        pn = pitch(noise_audio, sampling_rate = 16000)\n\n\n        if is_fin: \n          # MFCC\n\n          mfcc = librosa.feature.mfcc(y=i, sr=16000, n_mfcc=13)\n          mf

# Compute dataframes for datasets and split in Train, Val, Test 

In [5]:
main_path = '/media/helemanc/OS/Users/i2CAT/Desktop/Datasets SER/'
TESS = os.path.join(main_path, "tess/TESS Toronto emotional speech set data/") 
RAV = os.path.join(main_path, "ravdess-emotional-speech-audio/audio_speech_actors_01-24")
SAVEE = os.path.join(main_path, "savee/ALL/")
CREMA = os.path.join(main_path, "creamd/AudioWAV/")

## RADVESS

In [6]:
lst = []
emotion = []
voc_channel = []
full_path = []
modality = []
intensity = []
actors = []
phrase =[]

for root, dirs, files in tqdm(os.walk(RAV)):
    for file in files:
        try:
            #Load librosa array, obtain mfcss, store the file and the mfcss information in a new array
            # X, sample_rate = librosa.load(os.path.join(root,file), res_type='kaiser_fast')
            # mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0) 
            # The instruction below converts the labels (from 1 to 8) to a series from 0 to 7
            # This is because our predictor needs to start from 0 otherwise it will try to predict also 0.
           
            modal = int(file[1:2])
            vchan = int(file[4:5])
            lab = int(file[7:8])
            ints = int(file[10:11])
            phr = int(file[13:14])
            act = int(file[18:20])
            # arr = mfccs, lab
            # lst.append(arr)
            
            modality.append(modal)
            voc_channel.append(vchan)
            emotion.append(lab) #only labels
            intensity.append(ints)
            phrase.append(phr)
            actors.append(act)
            
            full_path.append((root, file)) # only files
          # If the file is not valid, skip it
        except ValueError:
            continue

25it [00:00, 1526.07it/s]


In [8]:
# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# merge neutral and calm
emotions_list = ['neutral', 'neutral', 'happy', 'sadness', 'angry', 'fear', 'disgust', 'surprise']
emotion_dict = {em[0]+1:em[1] for em in enumerate(emotions_list)}

df = pd.DataFrame([emotion, voc_channel, modality, intensity, actors, actors,phrase, full_path]).T
df.columns = ['emotion', 'voc_channel', 'modality', 'intensity', 'actors', 'gender', 'phrase', 'path']
df['emotion'] = df['emotion'].map(emotion_dict)
df['voc_channel'] = df['voc_channel'].map({1: 'speech', 2:'song'})
df['modality'] = df['modality'].map({1: 'full AV', 2:'video only', 3:'audio only'})
df['intensity'] = df['intensity'].map({1: 'normal', 2:'strong'})
df['actors'] = df['actors']
df['gender'] = df['actors'].apply(lambda x: 'female' if x%2 == 0 else 'male')
df['phrase'] = df['phrase'].map({1: 'Kids are talking by the door', 2:'Dogs are sitting by the door'})
df['path'] = df['path'].apply(lambda x: x[0] + '/' + x[1])

In [9]:
# remove files with noise to apply the same noise to all files for data augmentation 
df = df[~df.path.str.contains('noise')]

In [10]:
df.head()

Unnamed: 0,emotion,voc_channel,modality,intensity,actors,gender,phrase,path
0,disgust,speech,audio only,normal,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,speech,audio only,strong,1,male,Kids are talking by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,speech,audio only,strong,1,male,Dogs are sitting by the door,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [11]:
# only speech
RAV_df = df
RAV_df = RAV_df.loc[RAV_df.voc_channel == 'speech']

In [12]:
RAV_df.insert(0, "emotion_label", RAV_df.emotion, True)

In [13]:
RAV_df = RAV_df.drop(['emotion', 'voc_channel', 'modality', 'intensity', 'phrase'], 1)

In [14]:
RAV_df

Unnamed: 0,emotion_label,actors,gender,path
0,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
6,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
8,disgust,1,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
...,...,...,...,...
2871,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2873,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2875,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2877,neutral,24,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [15]:
RAV_train = []
RAV_val = []
RAV_test = []

In [16]:
for index, row in RAV_df.iterrows():
    if row['actors'] in range(1,21): 
        RAV_train.append(row) 
    elif row['actors'] in range(21,23): 
        RAV_val.append(row)
    elif row['actors'] in range(23,25): 
        RAV_test.append(row)
len(RAV_train), len(RAV_val), len(RAV_test)

(1200, 120, 120)

In [17]:
RAV_train = pd.DataFrame(RAV_train)
RAV_val = pd.DataFrame(RAV_val)
RAV_test = pd.DataFrame(RAV_test)

In [18]:
RAV_train = RAV_train.drop(['actors'], 1)
RAV_val = RAV_val.drop(['actors'], 1)
RAV_test = RAV_test.drop(['actors'], 1)

In [19]:
RAV_train.reset_index(drop=True, inplace = True) 
RAV_val.reset_index(drop=True, inplace = True) 
RAV_test.reset_index(drop=True, inplace = True ) 

## SAVEE

In [20]:
# Get the data location for SAVEE
dir_list = os.listdir(SAVEE)

# parse the filename to get the emotions
emotion=[]
path = []
actors = []
gender = []
for i in dir_list:
    actors.append(i[:2])
    if i[-8:-6]=='_a':
        emotion.append('angry')
        gender.append('male')
    elif i[-8:-6]=='_d':
        emotion.append('disgust')
        gender.append('male')
    elif i[-8:-6]=='_f':
        emotion.append('fear')
        gender.append('male')
    elif i[-8:-6]=='_h':
        emotion.append('happy')
        gender.append('male')
    elif i[-8:-6]=='_n':
        emotion.append('neutral')
        gender.append('male')
    elif i[-8:-6]=='sa':
        emotion.append('sadness')
        gender.append('male')
    elif i[-8:-6]=='su':
        emotion.append('surprise')
        gender.append('male') 
    else:
        emotion.append('Unknown') 
    path.append(SAVEE + i)
    
# Now check out the label count distribution 
SAVEE_df = pd.DataFrame(emotion, columns = ['emotion_label'])
                      
SAVEE_df = pd.concat([SAVEE_df,
                      pd.DataFrame(actors, columns = ['actors']),
                      pd.DataFrame(gender, columns = ['gender']), 
                      pd.DataFrame(path, columns = ['path'])], axis = 1)
SAVEE_df.emotion_label.value_counts()

neutral     120
sadness      60
surprise     60
happy        60
disgust      60
fear         60
angry        60
Name: emotion_label, dtype: int64

In [21]:
SAVEE_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,neutral,DC,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,sadness,KL,male,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [22]:
SAVEE_train = []
SAVEE_val = []
SAVEE_test = []

In [23]:
#DC, JE, JK, KL
for index, row in SAVEE_df.iterrows(): 
    if row['actors'] == 'DC' or row ['actors'] == 'JE':
        SAVEE_train.append(row)
    elif row['actors'] == 'JK': 
        SAVEE_val.append(row)
    else: 
        SAVEE_test.append(row)
len(SAVEE_train), len(SAVEE_val), len(SAVEE_test)

(240, 120, 120)

In [24]:
SAVEE_train = pd.DataFrame(SAVEE_train)
SAVEE_val = pd.DataFrame(SAVEE_val)
SAVEE_test = pd.DataFrame(SAVEE_test)

In [25]:
SAVEE_train = SAVEE_train.drop(['actors'], 1)
SAVEE_val = SAVEE_val.drop(['actors'], 1)
SAVEE_test = SAVEE_test.drop(['actors'], 1)

In [26]:
SAVEE_train = SAVEE_train.reset_index(drop=True) 
SAVEE_val = SAVEE_val.reset_index(drop=True) 
SAVEE_test = SAVEE_test.reset_index(drop=True) 

## TESS

In [27]:
dir_list = os.listdir(TESS)
dir_list.sort()
dir_list

path = []
emotion = []
gender = []
actors = []

for i in dir_list:
    fname = os.listdir(TESS + i)
    for f in fname:
        if i == 'OAF_angry':
            emotion.append('angry')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_angry': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_disgust' :
            emotion.append('disgust')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_disgust': 
            emotion.append('disgust')
            gender.append('female')
            actors.append('YAF')
            
            
        elif i == 'OAF_Fear':
            emotion.append('fear')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_fear': 
            emotion.append('fear')
            gender.append('female')
            actors.append('YAF') 
            
            
        elif i == 'OAF_happy' :
            emotion.append('happy')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_happy': 
            emotion.append('angry')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_neutral':
            emotion.append('neutral')
            gender.append('female')
            actors.append('OAF')   
        elif i == 'YAF_neutral': 
            emotion.append('neutral')
            gender.append('female')
            actors.append('YAF')      
            
                
        elif i == 'OAF_Pleasant_surprise':
            emotion.append('surprise')
            gender.append('female')
            actors.append('OAF')
        
        elif i == 'YAF_pleasant_surprised': 
            emotion.append('surprise')
            gender.append('female')
            actors.append('YAF')            
            
        elif i == 'OAF_Sad':
            emotion.append('sadness')
            gender.append('female')
            actors.append('OAF')
        elif i == 'YAF_sad': 
            emotion.append('sadness')
            gender.append('female')
            actors.append('YAF')            
        else:
            emotion.append('Unknown')
        path.append(TESS + i + "/" + f)

TESS_df = pd.DataFrame(emotion, columns = ['emotion_label'])
TESS_df = pd.concat([TESS_df, pd.DataFrame(gender, columns = ['gender']), 
                     pd.DataFrame(actors, columns= ['actors']),
                     pd.DataFrame(path, columns = ['path'])],axis=1)
TESS_df.emotion_label.value_counts()

angry       1200
fear         800
surprise     800
sadness      800
disgust      800
neutral      800
happy        400
Name: emotion_label, dtype: int64

In [28]:
TESS_df= TESS_df[~TESS_df.path.str.contains('noise')]

In [29]:
TESS_train = []
TESS_test = []

In [30]:
for index, row in TESS_df.iterrows(): 
    if row['actors'] == 'YAF': 
        TESS_train.append(row)
    else: 
        TESS_test.append(row)
len(TESS_train), len(TESS_test)

(1400, 1400)

In [31]:
TESS_train = pd.DataFrame(TESS_train)
TESS_test = pd.DataFrame(TESS_test)

In [32]:
TESS_train = TESS_train.reset_index(drop=True) 
TESS_test  = TESS_test.reset_index(drop=True) 

## CREMA-D

In [33]:
males = [1,
5,
11,
14,
15,
16,
17,
19,
22,
23,
26,
27,
31,
32,
33,
34,
35,
36,
38,
39,
41,
42,
44,
45,
48,
50,
51,
57,
59, 
62, 
64,
65, 
66,
67,
68,
69,
70,
71,
77, 
80, 
81, 
83, 
85, 
86, 
87,
88, 
90]

In [34]:
females = [ 2,
3,
4,
6,
7,
8,
9,
10,
12,
13,
18,
20,
21,
24,
25,
28,
29,
30,
37,
40,
43,
46,
47,
49,
52,
53,
54,
55,
56, 
58, 
60,
61,
63,
72, 
73, 
74, 
75, 
76, 
78, 
79, 
82, 
84, 
89, 
91]

In [35]:
crema_directory_list = os.listdir(CREMA)

file_emotion = []
file_path = []
actors = []
gender = []




for file in crema_directory_list:

    # storing file emotions
    part=file.split('_')
    
    # use only high intensity files
    if "HI" in part[3] :
        actor = part[0][2:]
        actors.append(actor)
        if int(actor) in males:
            gender.append('male')
        else: 
            gender.append('female')
    
        # storing file paths
        file_path.append(CREMA + file)
        if part[2] == 'SAD':
            file_emotion.append('sadness')
        elif part[2] == 'ANG':
            file_emotion.append('angry')
        elif part[2] == 'DIS':
            file_emotion.append('disgust')
        elif part[2] == 'FEA':
            file_emotion.append('fear')
        elif part[2] == 'HAP':
            file_emotion.append('happy')
        elif part[2] == 'NEU':
            file_emotion.append('neutral')
        else:
            file_emotion.append('Unknown')

# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['emotion_label'])

# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['path'])
actors_df = pd.DataFrame(actors, columns=['actors'])
gender_df = pd.DataFrame(gender, columns=['gender'])                      
Crema_df = pd.concat([emotion_df, actors_df, gender_df, path_df], axis=1)
Crema_df.head()

Unnamed: 0,emotion_label,actors,gender,path
0,happy,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
1,sadness,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
2,angry,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
3,disgust,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...
4,fear,91,female,/media/helemanc/OS/Users/i2CAT/Desktop/Dataset...


In [36]:
Crema_df.shape

(455, 4)

In [37]:
actor_files = {}

for index, row in Crema_df.iterrows():
    actor = row['actors']
    if actor not in actor_files.keys(): 
        actor_files[actor] = 1
    else: 
        actor_files[actor]+=1

In [38]:
actor_files

{'91': 5,
 '90': 5,
 '89': 5,
 '88': 5,
 '87': 5,
 '86': 5,
 '85': 5,
 '84': 5,
 '83': 5,
 '82': 5,
 '81': 5,
 '80': 5,
 '79': 5,
 '78': 5,
 '77': 5,
 '76': 5,
 '75': 5,
 '74': 5,
 '73': 5,
 '72': 5,
 '71': 5,
 '70': 5,
 '69': 5,
 '68': 5,
 '67': 5,
 '66': 5,
 '65': 5,
 '64': 5,
 '63': 5,
 '62': 5,
 '61': 5,
 '60': 5,
 '59': 5,
 '58': 5,
 '57': 5,
 '56': 5,
 '55': 5,
 '54': 5,
 '53': 5,
 '52': 5,
 '51': 5,
 '50': 5,
 '49': 5,
 '48': 5,
 '47': 5,
 '46': 5,
 '45': 5,
 '44': 5,
 '43': 5,
 '42': 5,
 '41': 5,
 '40': 5,
 '39': 5,
 '38': 5,
 '37': 5,
 '36': 5,
 '35': 5,
 '34': 5,
 '33': 5,
 '32': 5,
 '31': 5,
 '30': 5,
 '29': 5,
 '28': 5,
 '27': 5,
 '26': 5,
 '25': 5,
 '24': 5,
 '23': 5,
 '22': 5,
 '21': 5,
 '20': 5,
 '19': 5,
 '18': 5,
 '17': 5,
 '16': 5,
 '15': 5,
 '14': 5,
 '13': 5,
 '12': 5,
 '11': 5,
 '10': 5,
 '09': 5,
 '08': 5,
 '07': 5,
 '06': 5,
 '05': 5,
 '04': 5,
 '03': 5,
 '02': 5,
 '01': 5}

In [39]:
count_males = 0 
count_females = 0 
male_list = []
for index, row in Crema_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1

In [40]:
count_males, count_females

(235, 220)

Since there are more males than females we will remove randomly 3 male actors (since there are exactly 5 audio files per actor)

In [41]:
import random 
random.seed(42)
males_to_remove = random.sample(male_list, 3)
males_to_remove

['17', '80', '88']

In [42]:
new_df = []
for index, row in Crema_df.iterrows(): 
    if row['actors'] not in males_to_remove: 
        new_df.append(row)

In [43]:
CREMA_df = pd.DataFrame(new_df)

In [44]:
for index, row in CREMA_df.iterrows(): 
    if row['actors'] == '17': 
        print("Elements not removed")

In [45]:
count_males = 0 
count_females = 0 
male_list = []
female_list = []
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if gender == 'male':
        count_males +=1
        if actor not in male_list: 
            male_list.append(actor)
    else: 
        count_females +=1
        if actor not in female_list: 
            female_list.append(actor)

In [46]:
count_males, count_females

(220, 220)

In [47]:
len(female_list)

44

In [48]:
len(male_list)

44

In [49]:
CREMA_train = []
CREMA_val = []
CREMA_test = []

In [50]:
females_train = random.sample(female_list, 32)
males_train = random.sample(male_list, 32)

# remove the elements assigned to train 
for element in females_train:
    if element in female_list:
        female_list.remove(element)
        
for element in males_train:
    if element in male_list:
        male_list.remove(element)

         
females_val = random.sample(female_list, 6) 
males_val = random.sample(male_list, 6) 

# remove the elements assigned to val
for element in females_val:
    if element in female_list:
        female_list.remove(element)
        
for element in males_val:
    if element in male_list:
        male_list.remove(element)
        
females_test = random.sample(female_list, 6) 
males_test = random.sample(male_list, 6)        

In [51]:
females_train, males_train, females_val, males_val, females_test, males_test

(['54',
  '56',
  '58',
  '74',
  '76',
  '13',
  '78',
  '29',
  '84',
  '89',
  '09',
  '60',
  '04',
  '55',
  '52',
  '91',
  '02',
  '07',
  '46',
  '49',
  '37',
  '10',
  '20',
  '75',
  '21',
  '53',
  '06',
  '28',
  '18',
  '63',
  '30',
  '03'],
 ['57',
  '69',
  '65',
  '45',
  '77',
  '81',
  '41',
  '15',
  '44',
  '23',
  '59',
  '86',
  '34',
  '01',
  '85',
  '66',
  '31',
  '33',
  '05',
  '48',
  '50',
  '67',
  '51',
  '22',
  '36',
  '87',
  '71',
  '39',
  '42',
  '11',
  '32',
  '14'],
 ['43', '61', '40', '47', '73', '24'],
 ['62', '68', '64', '83', '70', '26'],
 ['08', '79', '12', '25', '72', '82'],
 ['16', '19', '38', '35', '27', '90'])

In [52]:
train = females_train + males_train 
val = females_val + males_val 
test = females_test + males_test

In [53]:
for index, row in CREMA_df.iterrows(): 
    gender = row['gender']
    actor = row['actors']
    if actor in train: 
        CREMA_train.append(row)
    elif actor in val: 
        CREMA_val.append(row)
    else:
        CREMA_test.append(row)

In [54]:
CREMA_train = pd.DataFrame(CREMA_train) 
CREMA_val = pd.DataFrame(CREMA_val) 
CREMA_test = pd.DataFrame(CREMA_test)

In [55]:
CREMA_train.shape, CREMA_val.shape, CREMA_test.shape

((320, 4), (60, 4), (60, 4))

In [56]:
CREMA_train = CREMA_train.reset_index(drop=True) 
CREMA_val = CREMA_val.reset_index(drop = True) 

# Model

In [57]:
def create_model( init_mode='glorot_uniform', lr = 0.001, input_dim=(157, 12)):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=input_dim, kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.5))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

# Experiment 1.1 : RAVDESS

In [58]:
df_train = RAV_train
df_val = RAV_val
df_test = RAV_test

In [59]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [60]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13)

100%|██████████████████████████████████████| 1200/1200 [00:01<00:00, 810.31it/s]
100%|██████████████████████████████████████| 1200/1200 [00:11<00:00, 101.15it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1532.68it/s]
100%|████████████████████████████████████████| 120/120 [00:01<00:00, 101.27it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1363.43it/s]
100%|████████████████████████████████████████| 120/120 [00:01<00:00, 101.07it/s]


In [61]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [62]:
np.size(y_val)

120

In [64]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [None]:
X_train.shape

## Save Scaler

In [65]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_1.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [151]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [155]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-01 13:19:32.512921: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-01 13:19:32.532392: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz
2021-09-01 13:19:32.549236: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-01 13:19:32.549722: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/50
100/100 - 5s - loss: 0.8241 - accuracy: 0.5938
Epoch 1/50
100/100 - 5s - loss: 1.2473 - accuracy: 0.5387
Epoch 1/50
100/100 - 5s - loss: 1.1164 - accuracy: 0.5412
Epoch 1/50
100/100 - 5s - loss: 0.8522 - accuracy: 0.5512
Epoch 1/50
100/100 - 5s - loss: 1.2074 - accuracy: 0.5088
Epoch 1/50
100/100 - 5s - loss: 0.7896 - accuracy: 0.5987
Epoch 1/50
100/100 - 6s - loss: 2.9165 - accuracy: 0.5225
Epoch 1/50
100/100 - 6s - loss: 3.0430 - accuracy: 0.5387
Epoch 2/50
100/100 - 4s - loss: 0.6237 - accuracy: 0.6725
Epoch 2/50
100/100 - 4s - loss: 0.9934 - accuracy: 0.5875
Epoch 2/50
100/100 - 4s - loss: 0.8510 - accuracy: 0.6025
Epoch 2/50
100/100 - 4s - loss: 0.6380 - accuracy: 0.6438
Epoch 2/50
100/100 - 4s - loss: 0.9801 - accuracy: 0.5425
Epoch 2/50
100/100 - 4s - loss: 0.6459 - accuracy: 0.6150
Epoch 2/50
100/100 - 4s - loss: 2.2540 - accuracy: 0.5725
Epoch 2/50
100/100 - 4s - loss: 2.5056 - accuracy: 0.5475
Epoch 3/50
100/100 - 4s - loss: 0.5978 - accuracy: 0.6775
Epoch 3/50
100

Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/scikit_learn-0.24.2-py3.8-linux-x86_64.egg/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/helemanc/.local/lib/python3.8/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 223, in fit
    return super(KerasClassifier, self).fit(x, y, **kwargs)
  File "/home/helemanc/.local/lib/python3.8/site-packages/tensorflow/python/keras/wrappers/scikit_learn.py", line 166, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/home/helemanc/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1050, in fit
    data_handler = data_adapter.DataHandler(
  File "/home/helemanc/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1117, in __init__
    dataset = dataset.map(_make_class_weight_map_fn(class_weight))
  File "/home/helemanc/.local/lib/

In [156]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.7024999856948853 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6783, std=0.007169 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.6975, std=0.008898 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6517, std=0.02656 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.67, std=0.01814 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7025, std=0.009354 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6908, std=0.01007 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6825, std=0.005401 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7008, std=0.02125 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6833, std=0.01007 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.69, std=0.01061 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4

## Train with best parameters

In [166]:
# Best Accuracy for 0.875 using {'lr': 0.001, 'init_mode': 'glorot_uniform', 'batch_size': 8}
def create_model( init_mode='glorot_uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [167]:
seed = 7
np.random.seed(seed)

In [168]:
model = create_model()

In [169]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [170]:
import datetime, os

In [171]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [172]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-01 15:17:53.180562: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-01 15:17:53.180608: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-01 15:17:53.180661: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [173]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [174]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 19/150 [==>...........................] - ETA: 1s - loss: 0.8691 - accuracy: 0.4791

2021-09-01 15:17:54.771655: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-01 15:17:54.771679: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-01 15:17:54.849128: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-01 15:17:54.849863: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-01 15:17:54.851096: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210901-151752/train/plugins/profile/2021_09_01_15_17_54
2021-09-01 15:17:54.851821: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210901-151752/train/plugins/profile/2021_09_01_15_17_54/helemanc-Latitude-5410.trace.json.gz
2021-09-01 15:17:54.852912: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210901-151752/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

E

In [175]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 12069), started 2:34:24 ago. (Use '!kill 12069' to kill it.)

In [176]:
model.evaluate(X_test, y_test, batch_size=8)



[0.3355533480644226, 0.8666666746139526]

In [177]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.81      0.93      0.87        56
           1       0.93      0.81      0.87        64

    accuracy                           0.87       120
   macro avg       0.87      0.87      0.87       120
weighted avg       0.87      0.87      0.87       120



## Save best model 

In [87]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_1")

2021-09-01 12:45:52.760534: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_1/assets


# Experiment 1.2 : RAVDESS noise

## Read dataframes

In [67]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [68]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13) # 13

100%|█████████████████████████████████████| 2400/2400 [00:01<00:00, 1847.40it/s]
100%|███████████████████████████████████████| 2400/2400 [00:28<00:00, 84.50it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1643.78it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 81.84it/s]
100%|███████████████████████████████████████| 120/120 [00:00<00:00, 1417.07it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 87.29it/s]


In [69]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [70]:
np.size(y_val)

120

In [71]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [72]:
X_train.shape

(2400, 157, 12)

## Save Scaler

In [73]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_2.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [282]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [283]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-07 16:39:48.697852: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-07 16:39:48.698127: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-07 16:39:48.709846: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-07 16:39:48.710044: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-07 16:39:48.710668: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
200/200 - 9s - loss: 3.0051 - accuracy: 0.5375
Epoch 1/50
200/200 - 9s - loss: 0.8132 - accuracy: 0.5500
Epoch 1/50
200/200 - 9s - loss: 1.2389 - accuracy: 0.5263
Epoch 1/50
200/200 - 9s - loss: 2.8671 - accuracy: 0.5325
Epoch 1/50
200/200 - 9s - loss: 0.8080 - accuracy: 0.5412
Epoch 1/50
200/200 - 9s - loss: 1.2681 - accuracy: 0.5181
Epoch 1/50
200/200 - 9s - loss: 0.8223 - accuracy: 0.5656
Epoch 1/50
200/200 - 9s - loss: 1.2588 - accuracy: 0.5275
Epoch 2/50
200/200 - 8s - loss: 2.1727 - accuracy: 0.5412
Epoch 2/50
200/200 - 8s - loss: 0.6933 - accuracy: 0.5806
Epoch 2/50
200/200 - 8s - loss: 0.8832 - accuracy: 0.5569
Epoch 2/50
200/200 - 8s - loss: 2.0209 - accuracy: 0.5400
Epoch 2/50
200/200 - 8s - loss: 0.6525 - accuracy: 0.6150
Epoch 2/50
200/200 - 8s - loss: 0.9065 - accuracy: 0.5600
Epoch 2/50
200/200 - 9s - loss: 0.6843 - accuracy: 0.5931
Epoch 2/50
200/200 - 8s - loss: 0.8220 - accuracy: 0.5894
Epoch 3/50
200/200 - 8s - loss: 0.7641 - accuracy: 0.5700
Epoch 3/50
200

In [284]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.7900000015894572 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.7196, std=0.05893 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.7783, std=0.04887 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.68, std=0.03466 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.6942, std=0.006947 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7896, std=0.06704 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.7263, std=0.04157 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6483, std=0.06044 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7292, std=0.04323 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.79, std=0.05179 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.7896, std=0.04572 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size

## Train with best parameters

In [285]:
#Best Accuracy 0.815416693687439 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
#Best Accuracy 0.7900000015894572 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
def create_model( init_mode='glorot_normal', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [286]:
seed = 7
np.random.seed(seed)

In [287]:
model = create_model()

In [288]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [289]:
import datetime, os

In [290]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [291]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-07 17:11:26.651758: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-07 17:11:26.651858: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-07 17:11:26.652051: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [292]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [293]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 33/600 [>.............................] - ETA: 3s - loss: 1.3337 - accuracy: 0.4009

2021-09-07 17:11:31.541073: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-07 17:11:31.541098: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-07 17:11:31.600932: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-07 17:11:31.601735: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-07 17:11:31.603029: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210907-171125/train/plugins/profile/2021_09_07_17_11_31
2021-09-07 17:11:31.603868: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210907-171125/train/plugins/profile/2021_09_07_17_11_31/helemanc-Latitude-5410.trace.json.gz
2021-09-07 17:11:31.605178: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210907-171125/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500

Epoch 00015: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500

Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch

In [294]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 33221), started 5 days, 4:47:18 ago. (Use '!kill 33221' to kill it.)

In [295]:
model.evaluate(X_test, y_test, batch_size=4)



[0.4222056269645691, 0.8500000238418579]

In [296]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.80      0.91      0.85        56
           1       0.91      0.80      0.85        64

    accuracy                           0.85       120
   macro avg       0.85      0.85      0.85       120
weighted avg       0.86      0.85      0.85       120



## Save best model 

In [297]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_2")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_2/assets


# Experiment 1.3: TESS

In [74]:
df_train = TESS_train
df_test = TESS_test

In [75]:
df_train.reset_index(drop = True, inplace = True) 
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [76]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train,  df_test, 13)

100%|███████████████████████████████████████| 1400/1400 [00:15<00:00, 92.52it/s]
100%|███████████████████████████████████████| 1400/1400 [00:17<00:00, 78.95it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 2547.13it/s]
100%|███████████████████████████████████████| 1400/1400 [00:19<00:00, 70.32it/s]


In [77]:
y_train, y_test = encode_labels_tess(y_train, y_test)

In [78]:
np.size(y_test)

1400

In [79]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train,  X_test)

In [80]:
X_train.shape

(1400, 157, 12)

## Save Scaler

In [81]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_3.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [314]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [315]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-08 09:48:44.741839: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-08 09:48:44.742119: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-08 09:48:44.754399: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-08 09:48:44.754521: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-08 09:48:44.845314: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
117/117 - 5s - loss: 1.1270 - accuracy: 0.5863
Epoch 1/50
117/117 - 5s - loss: 0.9143 - accuracy: 0.6720
Epoch 1/50
117/117 - 5s - loss: 0.7108 - accuracy: 0.7741
Epoch 1/50
117/117 - 6s - loss: 0.4311 - accuracy: 0.8071
Epoch 1/50
117/117 - 6s - loss: 2.6329 - accuracy: 0.6099
Epoch 1/50
117/117 - 6s - loss: 2.9699 - accuracy: 0.5520
Epoch 1/50
117/117 - 6s - loss: 0.3801 - accuracy: 0.8703
Epoch 1/50
117/117 - 7s - loss: 0.4394 - accuracy: 0.8929
Epoch 2/50
117/117 - 4s - loss: 0.5415 - accuracy: 0.7599
Epoch 2/50
117/117 - 5s - loss: 0.5111 - accuracy: 0.8103
Epoch 2/50
117/117 - 5s - loss: 0.2798 - accuracy: 0.8929
Epoch 2/50
117/117 - 4s - loss: 1.4711 - accuracy: 0.7406
Epoch 2/50
117/117 - 5s - loss: 0.0882 - accuracy: 0.9625
Epoch 2/50
117/117 - 4s - loss: 1.4878 - accuracy: 0.7020
Epoch 2/50
117/117 - 4s - loss: 0.0102 - accuracy: 0.9979
Epoch 2/50
117/117 - 4s - loss: 0.0997 - accuracy: 0.9700
Epoch 3/50
117/117 - 4s - loss: 0.2835 - accuracy: 0.8767
Epoch 3/50
117

In [316]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.7160703738530477 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6189, std=0.3558 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.6918, std=0.3869 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6368, std=0.3572 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.6197, std=0.3555 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.6803, std=0.3729 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6396, std=0.3624 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5747, std=0.3479 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7161, std=0.3555 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6796, std=0.3813 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6718, std=0.3743 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [317]:
#Best Accuracy 0.7160703738530477 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
def create_model( init_mode='he_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [318]:
seed = 7
np.random.seed(seed)

In [327]:
model = create_model()

In [328]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [329]:
import datetime, os

In [330]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [331]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-08 10:12:53.208196: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-08 10:12:53.208295: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-08 10:12:53.208389: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [332]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [333]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, 
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 30/175 [====>.........................] - ETA: 1s - loss: 4.8003 - accuracy: 0.6082

2021-09-08 10:12:54.404823: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-08 10:12:54.404853: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-08 10:12:54.431915: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-08 10:12:54.432667: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-08 10:12:54.433873: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210908-101253/train/plugins/profile/2021_09_08_10_12_54
2021-09-08 10:12:54.434579: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210908-101253/train/plugins/profile/2021_09_08_10_12_54/helemanc-Latitude-5410.trace.json.gz
2021-09-08 10:12:54.435584: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210908-101253/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500

Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 39/500
Epoch 40/500
Epoch 41/500
Ep

In [334]:
%tensorboard --logdir logs

In [335]:
model.evaluate(X_test, y_test, batch_size=8)



[5.148990154266357, 0.4828571379184723]

In [336]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.08      0.02      0.03       600
           1       0.53      0.83      0.65       800

    accuracy                           0.48      1400
   macro avg       0.30      0.42      0.34      1400
weighted avg       0.34      0.48      0.38      1400



## Save best model 

In [337]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_3")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_3/assets


# Experiment 1.4: TESS noise

## Read dataframes

In [91]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [82]:
X_train, y_train, X_test, y_test = feature_extractor_tess(df_train, df_test, 13) # 13

100%|███████████████████████████████████████| 1400/1400 [00:14<00:00, 93.40it/s]
100%|███████████████████████████████████████| 1400/1400 [00:19<00:00, 72.91it/s]
100%|█████████████████████████████████████| 1400/1400 [00:00<00:00, 2594.50it/s]
100%|███████████████████████████████████████| 1400/1400 [00:18<00:00, 74.98it/s]


In [83]:
y_train,  y_test = encode_labels_tess(y_train,  y_test)

In [84]:
np.size(y_train)

1400

In [85]:
X_train, X_test, fitted_scaler = standard_scaling_tess(X_train,  X_test)

In [86]:
X_train.shape

(1400, 157, 12)

## Save Scaler

In [87]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_4.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [346]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [347]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-08 10:34:48.999062: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-08 10:34:48.999332: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-08 10:34:49.083228: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-08 10:34:49.083386: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-08 10:34:49.168769: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
234/234 - 9s - loss: 0.9760 - accuracy: 0.6251
Epoch 1/50
234/234 - 10s - loss: 0.8394 - accuracy: 0.6931
Epoch 1/50
234/234 - 10s - loss: 0.9416 - accuracy: 0.6093
Epoch 1/50
234/234 - 11s - loss: 0.5387 - accuracy: 0.7572
Epoch 1/50
234/234 - 11s - loss: 2.8422 - accuracy: 0.5456
Epoch 1/50
234/234 - 11s - loss: 0.4638 - accuracy: 0.7895
Epoch 1/50
234/234 - 11s - loss: 2.1143 - accuracy: 0.6026
Epoch 1/50
234/234 - 11s - loss: 0.3203 - accuracy: 0.8913
Epoch 2/50
234/234 - 10s - loss: 0.5699 - accuracy: 0.7333
Epoch 2/50
234/234 - 9s - loss: 0.3236 - accuracy: 0.8666
Epoch 2/50
234/234 - 10s - loss: 0.6212 - accuracy: 0.6940
Epoch 2/50
234/234 - 9s - loss: 0.2223 - accuracy: 0.9036
Epoch 2/50
234/234 - 9s - loss: 0.2874 - accuracy: 0.8591
Epoch 2/50
234/234 - 9s - loss: 1.3961 - accuracy: 0.6613
Epoch 2/50
234/234 - 9s - loss: 0.0699 - accuracy: 0.9764
Epoch 2/50
234/234 - 10s - loss: 1.0992 - accuracy: 0.7081
Epoch 3/50
234/234 - 9s - loss: 0.2102 - accuracy: 0.9111
Epoc

In [349]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8613794445991516 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8392, std=0.1345 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8614, std=0.1448 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8303, std=0.1329 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8564, std=0.1214 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8103, std=0.1404 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8246, std=0.16 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8096, std=0.1263 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8457, std=0.1341 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8328, std=0.1659 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8153, std=0.1456 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [350]:
#Best Accuracy 0.8613794445991516 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [351]:
seed = 7
np.random.seed(seed)

In [352]:
model = create_model()

In [353]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [354]:
import datetime, os

In [355]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [356]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-08 11:26:40.939946: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-08 11:26:40.940039: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-08 11:26:40.940209: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [357]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [361]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500,
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 28/350 [=>............................] - ETA: 2s - loss: 0.1815 - accuracy: 0.9107

2021-09-08 11:27:17.360468: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-08 11:27:17.360498: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-08 11:27:17.380910: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-08 11:27:17.381532: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-08 11:27:17.382619: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210908-112640/train/plugins/profile/2021_09_08_11_27_17
2021-09-08 11:27:17.383438: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210908-112640/train/plugins/profile/2021_09_08_11_27_17/helemanc-Latitude-5410.trace.json.gz
2021-09-08 11:27:17.384633: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210908-112640/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500

Epoch 00031: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500

Epoch 00044: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500

Epoch 00048: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500

Epoch 00056: ReduceLROnPlateau reducing learning 

In [362]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 52834), started 1:16:23 ago. (Use '!kill 52834' to kill it.)

In [365]:
model.evaluate(X_test, y_test, batch_size=8)



[13.615291595458984, 0.5707142949104309]

In [366]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       600
           1       0.57      1.00      0.73       800

    accuracy                           0.57      1400
   macro avg       0.29      0.50      0.36      1400
weighted avg       0.33      0.57      0.42      1400



## Save best model 

In [367]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_4")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_4/assets


# Experiment 1.5: SAVEE

In [88]:
df_train = SAVEE_train
df_val = SAVEE_val
df_test = SAVEE_test

In [89]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [90]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13)

100%|█████████████████████████████████████████| 240/240 [00:07<00:00, 31.59it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 66.30it/s]
100%|█████████████████████████████████████████| 120/120 [00:04<00:00, 29.99it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 81.23it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 35.54it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 64.39it/s]


In [91]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [92]:
np.size(y_val)

120

In [93]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [94]:
X_train.shape

(240, 157, 12)

## Save Scaler

In [95]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_5.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [104]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [105]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 09:18:38.108892: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 09:18:38.109209: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 09:18:38.112755: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 09:18:38.112896: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 09:18:38.155235: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
20/20 - 2s - loss: 1.5270 - accuracy: 0.4812
Epoch 1/50
20/20 - 2s - loss: 2.9679 - accuracy: 0.5375
Epoch 1/50
20/20 - 2s - loss: 1.2272 - accuracy: 0.5375
Epoch 1/50
20/20 - 2s - loss: 1.7773 - accuracy: 0.4062
Epoch 1/50
20/20 - 2s - loss: 3.6308 - accuracy: 0.4750
Epoch 1/50
20/20 - 2s - loss: 1.3372 - accuracy: 0.5063
Epoch 1/50
20/20 - 2s - loss: 1.3905 - accuracy: 0.5750
Epoch 1/50
20/20 - 2s - loss: 1.2101 - accuracy: 0.5437
Epoch 2/50
20/20 - 1s - loss: 3.1501 - accuracy: 0.4812
Epoch 2/50
20/20 - 1s - loss: 1.4426 - accuracy: 0.4875
Epoch 2/50
20/20 - 1s - loss: 0.7178 - accuracy: 0.5437
Epoch 2/50
20/20 - 1s - loss: 1.6265 - accuracy: 0.4688
Epoch 2/50
20/20 - 1s - loss: 0.6796 - accuracy: 0.5750
Epoch 2/50
20/20 - 1s - loss: 3.6106 - accuracy: 0.4688
Epoch 2/50
20/20 - 1s - loss: 1.0958 - accuracy: 0.5875
Epoch 2/50
20/20 - 1s - loss: 0.7841 - accuracy: 0.4875
Epoch 3/50
20/20 - 1s - loss: 0.6605 - accuracy: 0.6187
Epoch 3/50
20/20 - 1s - loss: 0.6018 - accuracy:

2021-09-10 09:21:19.242205: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-10 09:21:19.242639: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-10 09:21:19.468482: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-10 09:21:19.486070: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2299965000 Hz


Epoch 1/50
30/30 - 0s - loss: 0.8937 - accuracy: 0.5542
Epoch 2/50
30/30 - 0s - loss: 0.6893 - accuracy: 0.5417
Epoch 3/50
30/30 - 0s - loss: 0.6663 - accuracy: 0.6000
Epoch 4/50
30/30 - 0s - loss: 0.6260 - accuracy: 0.6583
Epoch 5/50
30/30 - 0s - loss: 0.6408 - accuracy: 0.6583
Epoch 6/50
30/30 - 0s - loss: 0.6070 - accuracy: 0.6917
Epoch 7/50
30/30 - 0s - loss: 0.6162 - accuracy: 0.6583
Epoch 8/50
30/30 - 0s - loss: 0.5212 - accuracy: 0.7292
Epoch 9/50
30/30 - 0s - loss: 0.5255 - accuracy: 0.7042
Epoch 10/50
30/30 - 0s - loss: 0.4708 - accuracy: 0.7958
Epoch 11/50
30/30 - 0s - loss: 0.4848 - accuracy: 0.7458
Epoch 12/50
30/30 - 0s - loss: 0.4667 - accuracy: 0.7583
Epoch 13/50
30/30 - 0s - loss: 0.4656 - accuracy: 0.7750
Epoch 14/50
30/30 - 0s - loss: 0.5019 - accuracy: 0.7667
Epoch 15/50
30/30 - 0s - loss: 0.4135 - accuracy: 0.8167
Epoch 16/50
30/30 - 0s - loss: 0.3988 - accuracy: 0.8167
Epoch 17/50
30/30 - 0s - loss: 0.3883 - accuracy: 0.8042
Epoch 18/50
30/30 - 0s - loss: 0.3467 - 

In [106]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.5666666626930237 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.5042, std=0.07169 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.5167, std=0.07795 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.4583, std=0.07728 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.5417, std=0.04125 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.5667, std=0.08559 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.4458, std=0.03118 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.4042, std=0.0598 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.5167, std=0.04823 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.4333, std=0.09483 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.4583, std=0.03118 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}

## Train with best parameters

In [107]:
# Best Accuracy 0.5666666626930237 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [108]:
seed = 7
np.random.seed(seed)

In [109]:
model = create_model()

In [110]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [111]:
import datetime, os

In [112]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [113]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 09:26:18.097233: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 09:26:18.097270: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 09:26:18.157189: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [114]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [115]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-10 09:26:24.600596: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 09:26:24.600619: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 09:26:24.611402: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 09:26:24.614297: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 09:26:24.618019: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-092617/train/plugins/profile/2021_09_10_09_26_24
2021-09-10 09:26:24.618719: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-092617/train/plugins/profile/2021_09_10_09_26_24/helemanc-Latitude-5410.trace.json.gz
2021-09-10 09:26:24.623781: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-092617/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 34/500
Epoch 35/500

In [116]:
%tensorboard --logdir logs

In [117]:
model.evaluate(X_test, y_test, batch_size=8)



[0.6990429759025574, 0.574999988079071]

In [118]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.55      0.85      0.67        60
           1       0.67      0.30      0.41        60

    accuracy                           0.57       120
   macro avg       0.61      0.57      0.54       120
weighted avg       0.61      0.57      0.54       120



## Save best model 

In [119]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_5")

2021-09-10 09:27:10.000130: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_5/assets


# Experiment 1.6: SAVEE noise

## Read dataframes

In [121]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [96]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13) # 13

100%|█████████████████████████████████████████| 240/240 [00:07<00:00, 30.88it/s]
100%|█████████████████████████████████████████| 240/240 [00:02<00:00, 82.75it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 31.22it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 85.81it/s]
100%|█████████████████████████████████████████| 120/120 [00:03<00:00, 34.81it/s]
100%|█████████████████████████████████████████| 120/120 [00:01<00:00, 81.95it/s]


In [97]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [98]:
np.size(y_val)

120

In [99]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [100]:
X_train.shape

(240, 157, 12)

## Save Scaler

In [101]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_6.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [127]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [128]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 10:12:40.207276: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 10:12:40.207581: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 10:12:40.244465: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 10:12:40.244613: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 10:12:40.263988: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
40/40 - 1s - loss: 1.3243 - accuracy: 0.4938
Epoch 1/50
40/40 - 1s - loss: 1.5059 - accuracy: 0.4656
Epoch 1/50
40/40 - 2s - loss: 1.1382 - accuracy: 0.4938
Epoch 2/50
40/40 - 1s - loss: 1.2724 - accuracy: 0.4812
Epoch 2/50
40/40 - 1s - loss: 1.3281 - accuracy: 0.4844
Epoch 1/50
40/40 - 3s - loss: 1.0757 - accuracy: 0.4781
Epoch 2/50
40/40 - 2s - loss: 0.8134 - accuracy: 0.4812
Epoch 3/50
40/40 - 1s - loss: 1.0278 - accuracy: 0.5375
Epoch 1/50
40/40 - 3s - loss: 1.0618 - accuracy: 0.4969
Epoch 3/50
40/40 - 2s - loss: 1.2318 - accuracy: 0.5125
Epoch 1/50
40/40 - 4s - loss: 4.1743 - accuracy: 0.4750
Epoch 1/50
40/40 - 4s - loss: 1.6586 - accuracy: 0.5188
Epoch 1/50
40/40 - 4s - loss: 3.4505 - accuracy: 0.4688
Epoch 2/50
40/40 - 1s - loss: 0.8040 - accuracy: 0.5562
Epoch 3/50
40/40 - 1s - loss: 0.7287 - accuracy: 0.5844
Epoch 4/50
40/40 - 2s - loss: 1.0622 - accuracy: 0.5531
Epoch 2/50
40/40 - 2s - loss: 0.8132 - accuracy: 0.5719
Epoch 4/50
40/40 - 2s - loss: 1.0734 - accuracy:

In [129]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.743749996026357 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6292, std=0.03474 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.7042, std=0.05598 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5646, std=0.05311 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.6208, std=0.06131 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.7437, std=0.04082 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6, std=0.05179 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5854, std=0.08297 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6083, std=0.09483 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6792, std=0.02946 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.7, std=0.05376 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [134]:
#Best Accuracy 0.743749996026357 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [135]:
seed = 7
np.random.seed(seed)

In [136]:
model = create_model()

In [137]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [138]:
import datetime, os

In [139]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [140]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 10:20:10.080966: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 10:20:10.081082: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 10:20:10.082825: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [141]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [142]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-10 10:20:17.112602: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 10:20:17.112629: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 10:20:17.167517: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 10:20:17.168279: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 10:20:17.169536: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-102009/train/plugins/profile/2021_09_10_10_20_17
2021-09-10 10:20:17.170270: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-102009/train/plugins/profile/2021_09_10_10_20_17/helemanc-Latitude-5410.trace.json.gz
2021-09-10 10:20:17.171351: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-102009/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500

Epoch 00020: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 33/500
Epoch 34/500
Epoch 35/500

In [143]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 6399), started 0:54:04 ago. (Use '!kill 6399' to kill it.)

In [295]:
model.evaluate(X_test, y_test, batch_size=4)



[0.4222056269645691, 0.8500000238418579]

In [296]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.80      0.91      0.85        56
           1       0.91      0.80      0.85        64

    accuracy                           0.85       120
   macro avg       0.85      0.85      0.85       120
weighted avg       0.86      0.85      0.85       120



## Save best model 

In [144]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_6")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_6/assets


# Experiment 1.7: CREMA 

In [106]:
df_train = CREMA_train
df_val = CREMA_val
df_test = CREMA_test

In [107]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [108]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13)

100%|████████████████████████████████████████| 320/320 [00:00<00:00, 962.02it/s]
100%|█████████████████████████████████████████| 320/320 [00:04<00:00, 64.13it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 546.02it/s]
100%|███████████████████████████████████████████| 60/60 [00:01<00:00, 53.76it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 523.32it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 67.93it/s]


In [109]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [110]:
np.size(y_val)

60

In [111]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [112]:
X_train.shape

(320, 157, 12)

## Save Scaler

In [114]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_7.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [152]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [153]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 10:31:06.854972: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 10:31:06.855208: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 10:31:06.873335: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 10:31:06.873490: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 10:31:06.913873: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
27/27 - 1s - loss: 1.5430 - accuracy: 0.5587
Epoch 1/50
27/27 - 1s - loss: 1.5086 - accuracy: 0.5258
Epoch 1/50
27/27 - 2s - loss: 1.6615 - accuracy: 0.4720
Epoch 2/50
27/27 - 1s - loss: 1.2190 - accuracy: 0.5681
Epoch 2/50
27/27 - 1s - loss: 1.1776 - accuracy: 0.6056
Epoch 2/50
27/27 - 1s - loss: 1.2817 - accuracy: 0.5701
Epoch 1/50
27/27 - 2s - loss: 3.6491 - accuracy: 0.5446
Epoch 1/50
27/27 - 2s - loss: 1.3793 - accuracy: 0.5794
Epoch 1/50
27/27 - 2s - loss: 1.5902 - accuracy: 0.6103
Epoch 3/50
27/27 - 1s - loss: 1.2589 - accuracy: 0.5634
Epoch 3/50
27/27 - 1s - loss: 1.1772 - accuracy: 0.6432
Epoch 3/50
27/27 - 1s - loss: 1.0854 - accuracy: 0.5794
Epoch 1/50
27/27 - 3s - loss: 4.3283 - accuracy: 0.5634
Epoch 2/50
27/27 - 1s - loss: 2.9038 - accuracy: 0.5540
Epoch 1/50
27/27 - 3s - loss: 1.5459 - accuracy: 0.5211
Epoch 2/50
27/27 - 1s - loss: 0.6190 - accuracy: 0.6822
Epoch 2/50
27/27 - 1s - loss: 0.6501 - accuracy: 0.6150
Epoch 4/50
27/27 - 1s - loss: 0.9548 - accuracy:

In [203]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.6489180326461792 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6239, std=0.05571 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.6246, std=0.03508 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6204, std=0.03424 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.5834, std=0.03396 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.6373, std=0.06265 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6088, std=0.03533 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5602, std=0.01671 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6454, std=0.0756 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6489, std=0.06294 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6165, std=0.03023 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_si

## Train with best parameters

In [204]:
#Best Accuracy 0.6489180326461792 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
def create_model( init_mode='glorot_normal', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [205]:
seed = 7
np.random.seed(seed)

In [206]:
model = create_model()

In [207]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [208]:
import datetime, os

In [209]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [210]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 13:08:43.881370: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 13:08:43.881457: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 13:08:43.881573: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [211]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [212]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 32/710 [>.............................] - ETA: 3s - loss: 1.0350 - accuracy: 0.4478

2021-09-10 13:08:54.569679: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 13:08:54.569707: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 13:08:54.626547: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 13:08:54.627340: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 13:08:54.628607: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-130843/train/plugins/profile/2021_09_10_13_08_54
2021-09-10 13:08:54.629406: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-130843/train/plugins/profile/2021_09_10_13_08_54/helemanc-Latitude-5410.trace.json.gz
2021-09-10 13:08:54.630524: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-130843/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

Epoch 00010: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500

Epoch 00022: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500

Epoch 00026: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500

Epoch 00034: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500

Epoch 00039: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 40/500
Epoch 41/500
Epoch

In [213]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11919), started 1:40:19 ago. (Use '!kill 11919' to kill it.)

In [166]:
model.evaluate(X_test, y_test, batch_size=8)



[0.33261042833328247, 0.8166666626930237]

In [167]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.54      0.58      0.56        12
           1       0.89      0.88      0.88        48

    accuracy                           0.82        60
   macro avg       0.72      0.73      0.72        60
weighted avg       0.82      0.82      0.82        60



## Save best model 

In [168]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_7")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_7/assets


# Experiment 1.8: CREMA - noise

In [115]:
preprocess_path = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"
df_train = pd.read_csv(os.path.join(preprocess_path,"df_train.csv"))
df_val = pd.read_csv(os.path.join(preprocess_path,"df_val.csv"))
df_test = pd.read_csv(os.path.join(preprocess_path,"df_test.csv"))  

## Feature Extraction

In [116]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13) # 13

100%|███████████████████████████████████████| 640/640 [00:00<00:00, 1972.82it/s]
100%|█████████████████████████████████████████| 640/640 [00:09<00:00, 64.53it/s]
100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 820.70it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 60.62it/s]
100%|█████████████████████████████████████████| 60/60 [00:00<00:00, 1530.68it/s]
100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 68.62it/s]


In [117]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [118]:
np.size(y_val)

60

In [119]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [120]:
X_train.shape

(640, 157, 12)

## Save Scaler

In [121]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_8.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [175]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [176]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 11:16:26.896100: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 11:16:26.896332: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 11:16:26.937978: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 11:16:26.938169: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 11:16:26.957703: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
54/54 - 2s - loss: 1.2651 - accuracy: 0.5493
Epoch 1/50
54/54 - 2s - loss: 1.4704 - accuracy: 0.5152
Epoch 1/50
54/54 - 3s - loss: 1.2374 - accuracy: 0.5657
Epoch 1/50
54/54 - 3s - loss: 4.7883 - accuracy: 0.5258
Epoch 1/50
54/54 - 3s - loss: 1.0772 - accuracy: 0.5902
Epoch 1/50
54/54 - 3s - loss: 1.0516 - accuracy: 0.5948
Epoch 1/50
54/54 - 3s - loss: 1.4125 - accuracy: 0.5433
Epoch 1/50
54/54 - 3s - loss: 3.8248 - accuracy: 0.5386
Epoch 2/50
54/54 - 2s - loss: 1.0528 - accuracy: 0.5902
Epoch 2/50
54/54 - 2s - loss: 1.2710 - accuracy: 0.5282
Epoch 2/50
54/54 - 2s - loss: 0.8196 - accuracy: 0.6315
Epoch 2/50
54/54 - 2s - loss: 3.4557 - accuracy: 0.5329
Epoch 2/50
54/54 - 2s - loss: 0.8709 - accuracy: 0.6276
Epoch 2/50
54/54 - 2s - loss: 0.6004 - accuracy: 0.7026
Epoch 2/50
54/54 - 2s - loss: 1.6024 - accuracy: 0.5105
Epoch 3/50
54/54 - 2s - loss: 0.9759 - accuracy: 0.6159
Epoch 3/50
54/54 - 2s - loss: 1.1658 - accuracy: 0.5704
Epoch 2/50
54/54 - 2s - loss: 2.8855 - accuracy:

In [177]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8687347372372946 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.7358, std=0.06502 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8687, std=0.01342 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5954, std=0.02883 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8249, std=0.02495 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8484, std=0.01926 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6782, std=0.02387 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6328, std=0.06405 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8062, std=0.0296 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.7937, std=0.04832 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8047, std=0.02577 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_siz

## Train with best parameters

In [178]:
#Best Accuracy 0.8687347372372946 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
def create_model( init_mode='glorot_normal', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [179]:
seed = 7
np.random.seed(seed)

In [180]:
model = create_model()

In [181]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [182]:
import datetime, os

In [183]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [184]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 11:38:06.753504: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 11:38:06.753603: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 11:38:06.753765: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [185]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [187]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500

2021-09-10 11:38:13.346236: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 11:38:13.346261: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 11:38:13.382736: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 11:38:13.383801: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 11:38:13.385082: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-113806/train/plugins/profile/2021_09_10_11_38_13
2021-09-10 11:38:13.385843: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-113806/train/plugins/profile/2021_09_10_11_38_13/helemanc-Latitude-5410.trace.json.gz
2021-09-10 11:38:13.386894: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-113806/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 34/500
Epoch 35/500

In [188]:
%tensorboard --logdir logs

In [189]:
model.evaluate(X_test, y_test, batch_size=8)



[0.3305894434452057, 0.8666666746139526]

In [190]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.67      0.67      0.67        12
           1       0.92      0.92      0.92        48

    accuracy                           0.87        60
   macro avg       0.79      0.79      0.79        60
weighted avg       0.87      0.87      0.87        60



## Save best model 

In [191]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_8")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_8/assets


# Experiment 1.9: RAVDESS - TESS - SAVEE

In [122]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train])
df_val = pd.concat([RAV_val, SAVEE_val])
df_test = pd.concat([RAV_test, SAVEE_test, TESS_test])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [123]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [124]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13)

100%|██████████████████████████████████████| 2840/2840 [00:23<00:00, 123.42it/s]
100%|███████████████████████████████████████| 2840/2840 [00:40<00:00, 70.14it/s]
100%|█████████████████████████████████████████| 240/240 [00:04<00:00, 50.79it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 65.30it/s]
100%|██████████████████████████████████████| 1640/1640 [00:04<00:00, 378.41it/s]
100%|███████████████████████████████████████| 1640/1640 [00:22<00:00, 72.86it/s]


In [125]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [126]:
np.size(y_test)

1640

In [127]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [128]:
X_train.shape

(2840, 157, 12)

## Save Scaler

In [129]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_9.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [200]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [201]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 12:26:51.496383: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 12:26:51.496919: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 12:26:51.552357: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 12:26:51.552689: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 12:26:51.603473: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
237/237 - 11s - loss: 1.0643 - accuracy: 0.6070
Epoch 1/50
237/237 - 12s - loss: 1.0573 - accuracy: 0.5578
Epoch 1/50
237/237 - 12s - loss: 2.8850 - accuracy: 0.5108
Epoch 1/50
237/237 - 12s - loss: 0.5158 - accuracy: 0.7702
Epoch 1/50
237/237 - 13s - loss: 3.0564 - accuracy: 0.5832
Epoch 1/50
237/237 - 13s - loss: 0.9937 - accuracy: 0.5924
Epoch 1/50
237/237 - 13s - loss: 0.6660 - accuracy: 0.6499
Epoch 1/50
237/237 - 14s - loss: 0.6593 - accuracy: 0.6603
Epoch 2/50
237/237 - 11s - loss: 0.6168 - accuracy: 0.7285
Epoch 2/50
237/237 - 11s - loss: 0.7148 - accuracy: 0.6318
Epoch 2/50
237/237 - 11s - loss: 0.3042 - accuracy: 0.8669
Epoch 2/50
237/237 - 11s - loss: 1.5201 - accuracy: 0.5985
Epoch 2/50
237/237 - 11s - loss: 1.6404 - accuracy: 0.6746
Epoch 2/50
237/237 - 11s - loss: 0.5258 - accuracy: 0.6954
Epoch 2/50
237/237 - 11s - loss: 0.6820 - accuracy: 0.6668
Epoch 2/50
237/237 - 11s - loss: 0.4561 - accuracy: 0.7644
Epoch 3/50
237/237 - 10s - loss: 0.6045 - accuracy: 0.68

In [202]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.6489180326461792 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6239, std=0.05571 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.6246, std=0.03508 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6204, std=0.03424 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.5834, std=0.03396 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.6373, std=0.06265 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6088, std=0.03533 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5602, std=0.01671 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6454, std=0.0756 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6489, std=0.06294 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6165, std=0.03023 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_si

## Train with best parameters

In [166]:
# Best Accuracy for 0.875 using {'lr': 0.001, 'init_mode': 'glorot_uniform', 'batch_size': 8}
def create_model( init_mode='glorot_uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [167]:
seed = 7
np.random.seed(seed)

In [168]:
model = create_model()

In [169]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [170]:
import datetime, os

In [171]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [172]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-01 15:17:53.180562: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-01 15:17:53.180608: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-01 15:17:53.180661: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [173]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [174]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 19/150 [==>...........................] - ETA: 1s - loss: 0.8691 - accuracy: 0.4791

2021-09-01 15:17:54.771655: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-01 15:17:54.771679: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-01 15:17:54.849128: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-01 15:17:54.849863: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-01 15:17:54.851096: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210901-151752/train/plugins/profile/2021_09_01_15_17_54
2021-09-01 15:17:54.851821: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210901-151752/train/plugins/profile/2021_09_01_15_17_54/helemanc-Latitude-5410.trace.json.gz
2021-09-01 15:17:54.852912: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210901-151752/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500

E

In [214]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11919), started 1:59:44 ago. (Use '!kill 11919' to kill it.)

In [225]:
model.evaluate(X_test, y_test, batch_size=8)



[0.36300212144851685, 0.8909090757369995]

In [226]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.83      0.96      0.89       620
           1       0.96      0.83      0.89       700

    accuracy                           0.89      1320
   macro avg       0.90      0.89      0.89      1320
weighted avg       0.90      0.89      0.89      1320



## Save best model 

In [227]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_9")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_9/assets


# Experiment 1.10: RAVDESS - TESS - SAVEE noise

## Read dataframes

In [130]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

In [131]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess])
df_val = pd.concat([df_val_rav, df_val_savee])
df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [132]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [133]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13) # 13

100%|██████████████████████████████████████| 5680/5680 [00:48<00:00, 117.52it/s]
100%|███████████████████████████████████████| 5680/5680 [01:23<00:00, 67.62it/s]
100%|█████████████████████████████████████████| 240/240 [00:08<00:00, 29.43it/s]
100%|█████████████████████████████████████████| 240/240 [00:06<00:00, 36.69it/s]
100%|██████████████████████████████████████| 1640/1640 [00:09<00:00, 172.12it/s]
100%|███████████████████████████████████████| 1640/1640 [00:28<00:00, 58.37it/s]


In [134]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [135]:
np.size(y_val)

240

In [136]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [137]:
X_train.shape

(5680, 157, 12)

## Save Scaler

In [138]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_10.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [236]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [237]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 14:10:58.956242: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 14:10:58.956242: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 14:10:58.956269: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 14:10:58.957099: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 14:10:58.960942: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
474/474 - 17s - loss: 1.0267 - accuracy: 0.5529
Epoch 1/50
474/474 - 18s - loss: 0.9907 - accuracy: 0.5767
Epoch 1/50
474/474 - 19s - loss: 2.5446 - accuracy: 0.5747
Epoch 1/50
474/474 - 19s - loss: 0.5593 - accuracy: 0.7205
Epoch 1/50
474/474 - 19s - loss: 0.6908 - accuracy: 0.6084
Epoch 1/50
474/474 - 19s - loss: 0.8429 - accuracy: 0.6249
Epoch 1/50
474/474 - 19s - loss: 0.6646 - accuracy: 0.6295
Epoch 1/50
474/474 - 20s - loss: 2.1408 - accuracy: 0.5529
Epoch 2/50
474/474 - 17s - loss: 0.6835 - accuracy: 0.6150
Epoch 2/50
474/474 - 17s - loss: 0.5794 - accuracy: 0.7073
Epoch 2/50
474/474 - 17s - loss: 0.6821 - accuracy: 0.6150
Epoch 2/50
474/474 - 18s - loss: 0.5819 - accuracy: 0.6414
Epoch 2/50
474/474 - 18s - loss: 0.3891 - accuracy: 0.8027
Epoch 2/50
474/474 - 18s - loss: 1.1000 - accuracy: 0.6524
Epoch 2/50
474/474 - 17s - loss: 0.5158 - accuracy: 0.7029
Epoch 2/50
474/474 - 18s - loss: 1.0565 - accuracy: 0.6116
Epoch 3/50
474/474 - 17s - loss: 0.4952 - accuracy: 0.75

In [239]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.66180948416392 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6361, std=0.1271 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.607, std=0.1476 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.5905, std=0.1357 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.5417, std=0.1786 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.6618, std=0.1103 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6173, std=0.1448 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.571, std=0.1397 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.5796, std=0.1592 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6503, std=0.1322 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6305, std=0.162 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [240]:
#Best Accuracy 0.66180948416392 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [241]:
seed = 7
np.random.seed(seed)

In [242]:
model = create_model()

In [243]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [244]:
import datetime, os

In [245]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [246]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 15:19:00.678555: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 15:19:00.678636: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 15:19:00.678765: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [247]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [248]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  33/1420 [..............................] - ETA: 8s - loss: 1.1035 - accuracy: 0.4661 

2021-09-10 15:19:02.287199: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 15:19:02.287227: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 15:19:02.350503: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 15:19:02.351314: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 15:19:02.352589: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-151900/train/plugins/profile/2021_09_10_15_19_02
2021-09-10 15:19:02.353311: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-151900/train/plugins/profile/2021_09_10_15_19_02/helemanc-Latitude-5410.trace.json.gz
2021-09-10 15:19:02.354523: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-151900/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500

Epoch 00025: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500

Epoch 00029: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500

Epoch 00033: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500

Epoch 00037: ReduceLROnPlateau reducing learning rate

In [249]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11919), started 3:32:35 ago. (Use '!kill 11919' to kill it.)

In [250]:
model.evaluate(X_test, y_test, batch_size=8)



[0.6961687803268433, 0.5487805008888245]

In [251]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.49      0.70      0.57       716
           1       0.65      0.44      0.52       924

    accuracy                           0.55      1640
   macro avg       0.57      0.57      0.55      1640
weighted avg       0.58      0.55      0.54      1640



## Save best model 

In [252]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_10")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_10/assets


# Experiment 1.11: RAVDESS - TESS - SAVEE - CREMA

In [139]:
df_train = pd.concat([RAV_train, SAVEE_train, TESS_train, CREMA_train])
df_val = pd.concat([RAV_val, SAVEE_val, CREMA_val])
df_test = pd.concat([RAV_test, SAVEE_test, TESS_test, CREMA_test])
#df_test = pd.concat([RAV_train, SAVEE_test])

In [140]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [141]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13)

100%|██████████████████████████████████████| 3160/3160 [00:22<00:00, 137.85it/s]
100%|███████████████████████████████████████| 3160/3160 [01:22<00:00, 38.08it/s]
100%|█████████████████████████████████████████| 300/300 [00:06<00:00, 45.63it/s]
100%|█████████████████████████████████████████| 300/300 [00:05<00:00, 51.85it/s]
100%|██████████████████████████████████████| 1700/1700 [00:04<00:00, 375.56it/s]
100%|███████████████████████████████████████| 1700/1700 [00:44<00:00, 38.61it/s]


In [142]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [143]:
np.size(y_test)

1700

In [144]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [145]:
X_train.shape

(3160, 157, 12)

## Shuffle training data

In [146]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

## Save Scaler

In [147]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_11.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [262]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [263]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-10 15:38:14.020623: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 15:38:14.020856: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 15:38:14.137895: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-10 15:38:14.138037: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-10 15:38:14.145236: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire

Epoch 1/50
264/264 - 11s - loss: 0.7044 - accuracy: 0.6383
Epoch 1/50
264/264 - 11s - loss: 1.1751 - accuracy: 0.5418
Epoch 1/50
264/264 - 11s - loss: 0.6808 - accuracy: 0.6360
Epoch 1/50
264/264 - 11s - loss: 0.7118 - accuracy: 0.6477
Epoch 1/50
264/264 - 11s - loss: 2.5962 - accuracy: 0.5430
Epoch 1/50
264/264 - 11s - loss: 1.1999 - accuracy: 0.5577
Epoch 1/50
264/264 - 11s - loss: 3.0316 - accuracy: 0.5446
Epoch 1/50
264/264 - 12s - loss: 1.1374 - accuracy: 0.5354
Epoch 2/50
264/264 - 10s - loss: 0.7462 - accuracy: 0.6320
Epoch 2/50
264/264 - 10s - loss: 0.4880 - accuracy: 0.7293
Epoch 2/50
264/264 - 9s - loss: 1.4910 - accuracy: 0.5952
Epoch 2/50
264/264 - 10s - loss: 0.4605 - accuracy: 0.7584
Epoch 2/50
264/264 - 10s - loss: 0.4973 - accuracy: 0.7262
Epoch 2/50
264/264 - 10s - loss: 0.7989 - accuracy: 0.6184
Epoch 2/50
264/264 - 10s - loss: 1.7941 - accuracy: 0.5907
Epoch 2/50
264/264 - 10s - loss: 0.7626 - accuracy: 0.6213
Epoch 3/50
264/264 - 10s - loss: 0.4389 - accuracy: 0.756

In [265]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.8838609457015991 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.8573, std=0.007544 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.8804, std=0.0195 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8402, std=0.01114 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.8392, std=0.01183 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.8839, std=0.01629 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.85, std=0.005385 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.8386, std=0.006913 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.8661, std=0.01482 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.882, std=0.01054 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.8813, std=0.01011 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}

## Train with best parameters

In [266]:
#Best Accuracy 0.8838609457015991 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
def create_model( init_mode='uniform', lr = 0.001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6))

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [267]:
seed = 7
np.random.seed(seed)

In [268]:
model = create_model()

In [269]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [270]:
import datetime, os

In [271]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [272]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-10 16:20:46.955187: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 16:20:46.955218: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 16:20:46.955266: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [273]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [274]:
history = model.fit(X_train, y_train, batch_size=8, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
 20/395 [>.............................] - ETA: 3s - loss: 1.0719 - accuracy: 0.5520

2021-09-10 16:20:52.484094: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-10 16:20:52.484119: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-10 16:20:52.553089: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-10 16:20:52.553900: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-10 16:20:52.555184: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-162046/train/plugins/profile/2021_09_10_16_20_52
2021-09-10 16:20:52.555946: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210910-162046/train/plugins/profile/2021_09_10_16_20_52/helemanc-Latitude-5410.trace.json.gz
2021-09-10 16:20:52.557081: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210910-162046/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500

Epoch 00024: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500

Epoch 00028: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500

Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500

Epoch 00036: ReduceLROnPlateau reducing learning rate to 7.8125003

In [275]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11919), started 4:32:32 ago. (Use '!kill 11919' to kill it.)

In [276]:
model.evaluate(X_test, y_test, batch_size=8)



[0.7401905655860901, 0.5664705634117126]

In [277]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.49      0.48      0.48       728
           1       0.62      0.63      0.63       972

    accuracy                           0.57      1700
   macro avg       0.56      0.56      0.56      1700
weighted avg       0.56      0.57      0.57      1700



## Save best model 

In [278]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_11")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_11/assets


# Experiment 1.12:  RAVDESS - TESS - SAVEE - CREMA noise

## Read dataframes

In [148]:
preprocess_path_rav = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/ravdess"
preprocess_path_savee = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/savee"
preprocess_path_tess = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/tess"
preprocess_path_crema = "/home/helemanc/Desktop/Binary_Model/df_csv_noise/crema"

df_train_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_train.csv"))
df_val_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_val.csv"))
df_test_rav = pd.read_csv(os.path.join(preprocess_path_rav,"df_test.csv"))  

df_train_tess = pd.read_csv(os.path.join(preprocess_path_tess,"df_train.csv"))
df_test_tess= pd.read_csv(os.path.join(preprocess_path_tess,"df_test.csv"))  

df_train_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_train.csv"))
df_val_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_val.csv"))
df_test_savee = pd.read_csv(os.path.join(preprocess_path_savee,"df_test.csv"))  

df_train_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_train.csv"))
df_val_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_val.csv"))
df_test_crema = pd.read_csv(os.path.join(preprocess_path_crema,"df_test.csv")) 

In [149]:
df_train = pd.concat([df_train_rav, df_train_savee, df_train_tess, df_train_crema])
df_val = pd.concat([df_val_rav, df_val_savee, df_train_crema, df_train_crema])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_tess])
#df_test = pd.concat([df_test_rav, df_test_savee, df_test_crema ])
df_test = pd.concat([df_test_rav, df_test_savee ])

In [150]:
df_train.reset_index(drop = True, inplace = True) 
df_val.reset_index(drop = True, inplace = True)
df_test.reset_index(drop = True, inplace = True)

## Feature Extraction

In [151]:
X_train, y_train, X_val, y_val, X_test, y_test = feature_extractor(df_train, df_val, df_test, 13) # 13

100%|███████████████████████████████████████| 6320/6320 [01:11<00:00, 88.92it/s]
100%|███████████████████████████████████████| 6320/6320 [02:43<00:00, 38.63it/s]
100%|██████████████████████████████████████| 1520/1520 [00:05<00:00, 265.61it/s]
100%|███████████████████████████████████████| 1520/1520 [00:30<00:00, 50.24it/s]
100%|█████████████████████████████████████████| 240/240 [00:03<00:00, 61.14it/s]
100%|█████████████████████████████████████████| 240/240 [00:04<00:00, 56.52it/s]


In [152]:
X_train[0].shape

(157, 12)

In [153]:
y_train, y_val, y_test = encode_labels(y_train, y_val, y_test)

In [154]:
np.size(y_val)

1520

In [155]:
X_train, X_val, X_test, fitted_scaler = standard_scaling(X_train, X_val, X_test)

In [156]:
X_train.shape

(6320, 157, 12)

## Save Scaler

In [157]:
pkl_filename = "/home/helemanc/Desktop/Binary_Model/scalers_experiments/Experiment_1/scaler_1_12.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(fitted_scaler, file)

## Hypeparameter optimization

In [287]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=45, 
                                              verbose=1)

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [288]:
%%time

# set reproducibility 
seed = 7
np.random.seed(seed)

batch_size = 4
epochs = 50

model_CV = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=epochs, 
                           batch_size=batch_size, verbose=2)
# define the grid search parameters
init_mode = ['uniform', 'lecun_uniform', 'glorot_uniform', 'glorot_normal', 'he_normal', 'he_uniform']
batches = [4,8,16]
lr = [0.001, 0.0001, 0.00005]

param_grid = dict(init_mode=init_mode, lr = lr, batch_size = batches)
grid = RandomizedSearchCV(estimator=model_CV, param_distributions=param_grid, n_jobs=-1, cv=KFold(3))
#grid_result = grid.fit(X_train, y_train, callbacks=[reduce_lr, early_stop], class_weight = class_weights)
grid_result =  grid.fit(X_train, y_train,class_weight = class_weights)

2021-09-13 10:27:30.700024: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-13 10:27:30.700277: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-09-13 10:27:30.704344: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-13 10:27:30.704344: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-13 10:27:30.704367: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your mac

Epoch 1/50
527/527 - 21s - loss: 0.9913 - accuracy: 0.5374
Epoch 1/50
527/527 - 21s - loss: 2.2675 - accuracy: 0.5775
Epoch 1/50
527/527 - 21s - loss: 0.7146 - accuracy: 0.5830
Epoch 1/50
527/527 - 21s - loss: 0.9287 - accuracy: 0.6205
Epoch 1/50
527/527 - 21s - loss: 0.9860 - accuracy: 0.5529
Epoch 1/50
527/527 - 21s - loss: 0.6703 - accuracy: 0.6082
Epoch 1/50
527/527 - 21s - loss: 2.4049 - accuracy: 0.5267
Epoch 1/50
527/527 - 22s - loss: 0.5606 - accuracy: 0.7202
Epoch 2/50
527/527 - 19s - loss: 0.6043 - accuracy: 0.6357
Epoch 2/50
527/527 - 20s - loss: 1.0160 - accuracy: 0.6492
Epoch 2/50
527/527 - 20s - loss: 0.6938 - accuracy: 0.6082
Epoch 2/50
527/527 - 20s - loss: 0.6962 - accuracy: 0.5917
Epoch 2/50
527/527 - 20s - loss: 0.5403 - accuracy: 0.6837
Epoch 2/50
527/527 - 20s - loss: 0.5865 - accuracy: 0.7040
Epoch 2/50
527/527 - 20s - loss: 1.0992 - accuracy: 0.5616
Epoch 2/50
527/527 - 20s - loss: 0.3999 - accuracy: 0.7985
Epoch 3/50
527/527 - 20s - loss: 0.5527 - accuracy: 0.68

In [289]:
# print results
print(f'Best Accuracy {grid_result.best_score_} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print(f' mean={mean:.4}, std={stdev:.4} using {param}')

Best Accuracy 0.6966690222422282 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
 mean=0.6429, std=0.1247 using {'lr': 0.0001, 'init_mode': 'lecun_uniform', 'batch_size': 8}
 mean=0.5807, std=0.1364 using {'lr': 0.001, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.6169, std=0.1255 using {'lr': 0.0001, 'init_mode': 'he_uniform', 'batch_size': 8}
 mean=0.5599, std=0.1689 using {'lr': 0.001, 'init_mode': 'he_uniform', 'batch_size': 4}
 mean=0.6231, std=0.1367 using {'lr': 0.001, 'init_mode': 'uniform', 'batch_size': 8}
 mean=0.6706, std=0.09697 using {'lr': 5e-05, 'init_mode': 'glorot_normal', 'batch_size': 8}
 mean=0.625, std=0.1168 using {'lr': 5e-05, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.5778, std=0.1613 using {'lr': 0.001, 'init_mode': 'he_normal', 'batch_size': 8}
 mean=0.6927, std=0.1053 using {'lr': 0.0001, 'init_mode': 'glorot_normal', 'batch_size': 4}
 mean=0.6967, std=0.1012 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}


## Train with best parameters

In [290]:
#Best Accuracy 0.6966690222422282 using {'lr': 0.0001, 'init_mode': 'uniform', 'batch_size': 4}
def create_model( init_mode='uniform', lr = 0.0001):
    model = Sequential()

    model.add(layers.Conv1D(256, 5,padding='same',
                     input_shape=(157,12), kernel_initializer=init_mode)) # 157, 12
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Conv1D(128, 5,padding='same', kernel_initializer=init_mode))
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling1D(pool_size=(4)))
    model.add(layers.Dropout(0.6)) #0.6

    model.add(layers.Flatten())
    model.add(layers.Dense(64, kernel_initializer=init_mode))
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    # compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=Adam(lr = lr) , 
                  metrics=['accuracy'])
    return model

In [291]:
seed = 7
np.random.seed(seed)

In [292]:
model = create_model()

In [293]:
# Load the TensorBoard notebook extension
%reload_ext tensorboard

In [294]:
import datetime, os

In [295]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [296]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

2021-09-13 11:54:08.987049: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-13 11:54:08.987082: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-13 11:54:08.987144: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [298]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                                 factor=0.5, patience=4, 
                                                 verbose=1, mode='max', 
                                                 min_lr=0.000001)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=45, 
                                              verbose=1, restore_best_weights = True )

# classweight 
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = {l:c for l,c in zip(np.unique(y_train), class_weights)}

In [299]:
history = model.fit(X_train, y_train, batch_size=4, epochs=500, validation_data=(X_val, y_val),
           callbacks=[reduce_lr, early_stop, tensorboard_callback], class_weight = class_weights)

Epoch 1/500
  33/1580 [..............................] - ETA: 8s - loss: 0.8216 - accuracy: 0.4616 

2021-09-13 11:54:23.201137: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-09-13 11:54:23.201163: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-09-13 11:54:23.256401: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-09-13 11:54:23.257183: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-09-13 11:54:23.258468: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210913-115408/train/plugins/profile/2021_09_13_11_54_23
2021-09-13 11:54:23.259187: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to logs/20210913-115408/train/plugins/profile/2021_09_13_11_54_23/helemanc-Latitude-5410.trace.json.gz
2021-09-13 11:54:23.260289: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: logs/20210913-115408/train/plugins/p

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500

Epoch 00017: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500

Epoch 00030: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500

Epoch 00035: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-05.
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500

Epoch 00056: ReduceLROnPlateau reducing learning 

In [300]:
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 11919), started 3 days, 0:55:29 ago. (Use '!kill 11919' to kill it.)

In [329]:
model.evaluate(X_test, y_test, batch_size=4)



[0.59771329164505, 0.6833333373069763]

In [339]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
pred = [1 * (x[0]>=0.50) for x in predictions] #0.5 o 0.52? 
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.68      0.65      0.66       116
           1       0.68      0.72      0.70       124

    accuracy                           0.68       240
   macro avg       0.68      0.68      0.68       240
weighted avg       0.68      0.68      0.68       240



## Save best model 

In [310]:
model.save("/home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_12")

INFO:tensorflow:Assets written to: /home/helemanc/Desktop/Binary_Model/models_experiments/Experiment_1/model_1_12/assets
