# Hyperparam Tuning

In [1]:
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
import pickle
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

%matplotlib inline

In [2]:
LOG_DIR = f"{int(time.time())}"

audio_data = np.load("../data/audio_data.npy")
labels = np.load("../data/wav_labels.npy")

# labels: modality-vocal channel-emotion-emotional intensity-statement-repetition-actor
# emotions: 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
# odd number actors = male, even = female

# 1440 files: 24 speakers, 60 recordings per speaker
audio_data = audio_data.reshape(1440, 9480)

features = []

for i in range(1440):
    
    if (labels[i][6]%2 == 0):
        label = "Female"
    else:
        label = "Male"

    if (labels[i][2] == 1):
        em = 0
    elif (labels[i][2] == 2):
        em = 1
    elif (labels[i][2] == 3):
        em = 2
    elif (labels[i][2] == 4):
        em = 3
    elif (labels[i][2] == 5):
        em = 4
    elif (labels[i][2] == 6):
        em = 5
    elif (labels[i][2] == 7):
        em = 6
    elif (labels[i][2] == 8):
        em = 7
    
    features.append([audio_data[i], label, em])
    
feature_df = pd.DataFrame(features, columns = ["mfcc", "gender", "emotion"])

feature_df.head()


Unnamed: 0,mfcc,gender,emotion
0,"[-710.0553588867188, -709.9026489257812, -711....",Female,0
1,"[-547.765625, -548.0353393554688, -548.6129760...",Female,2
2,"[-616.4595336914062, -615.7564697265625, -615....",Female,7
3,"[-739.8626098632812, -738.7739868164062, -735....",Female,1
4,"[-698.0630493164062, -697.3838500976562, -696....",Female,4


In [3]:
#split data - males vs females
mal = feature_df.loc[feature_df['gender'] == "Male"]
fem = feature_df.loc[feature_df['gender'] == "Female"]

In [4]:
## ===== Females ===== ##
X_females_unscaled = np.array(fem.mfcc.tolist())
y_females = np.array(fem.emotion.tolist())

scaler = MinMaxScaler(feature_range=(-1,1))
scaler.fit(X_females_unscaled)
X_females_scaled = scaler.transform(X_females_unscaled)

#20-80 train-test split
X_train_females, X_test_females, y_train_females, y_test_females = train_test_split(X_females_scaled, y_females, test_size=0.20, random_state=0)

In [5]:
# Reshape for CNN input
X_train_females = np.array([x.reshape( (20, 474, 1) ) for x in X_train_females])
X_test_females = np.array([x.reshape( (20, 474, 1) ) for x in X_test_females])

# One-Hot encoding for classes
y_train_females = np.array(to_categorical(y_train_females, 8))
y_test_females = np.array(to_categorical(y_test_females, 8))

In [6]:
def build_model(hp):
    model = Sequential()

    MAX_SIZE = 128

    kernel_size = hp.Int('kernel_size', min_value=8, max_value=MAX_SIZE, step=8)
    stride_def = hp.Int('stride_def', min_value=2, max_value=8, step=1)
    model.add(Conv2D(kernel_size, (stride_def,stride_def), input_shape=X_test_females.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    model.add(Conv2D(hp.Int('conv_1_units', min_value=8, max_value=128, step=8), (3,3)))
    model.add(Activation('relu'))
    
    model.add(Conv2D(hp.Int('conv_2_units', min_value=8, max_value=128, step=8), (3,3)))
    model.add(Activation('relu'))
    model.add(Dropout(rate=hp.Int(f"dropout_val_2", 0, 5, 1) * 0.1))

    model.add(Conv2D(hp.Int('conv_3_units', min_value=8, max_value=128, step=8), (3,3)))
    model.add(Activation('relu'))
    model.add(Dropout(rate=hp.Int(f"dropout_val_3", 0, 5, 1) * 0.1))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))

    model.add(Dense(8))
    model.add(Activation('softmax'))

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=['accuracy'])

    return model

In [7]:
# model.fit(
# 	x=X_train_females,
# 	y=y_train_females,
#     epochs=50,
#     batch_size=32,
#     validation_split=0.15,
# 	verbose=0
# )

In [8]:
tuner = RandomSearch(
    build_model,
    objective = "val_accuracy",
    max_trials = 3,
    executions_per_trial = 1,
    directory = LOG_DIR
)

tuner.search(
    x=X_train_females,
    y=y_train_females,
    epochs=35,
    batch_size=32,
    validation_data=(X_test_females, y_test_females)
)

Traceback (most recent call last):
  File "/home/garrett/.local/lib/python3.6/site-packages/kerastuner/engine/hypermodel.py", line 104, in build
    model = self.hypermodel.build(hp)
  File "<ipython-input-6-7626c723787c>", line 2, in build_model
    model = Sequential()
  File "/home/garrett/.local/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 457, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "/home/garrett/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py", line 117, in __init__
    name=name, autocast=False)
  File "/home/garrett/.local/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 457, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "/home/garrett/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 308, in __init__
    self._init_batch_counters()
  File "/home/garrett/.local/lib/python3.6/site-packages/tensorfl

RuntimeError: Too many failed attempts to build model.

In [18]:
with open(f"tuner_{int(time.time())}.pkl", "wb") as f:
    pickle.dump(tuner, f)

print(tuner.get_best_hyperparameters()[0].values)

IndexError: list index out of range