In [1]:
from datetime import datetime
from dotenv import load_dotenv
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe
import numpy as np
import os
import pandas as pd
from pathlib import Path
import random
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
load_dotenv()
path = Path(os.getenv('PATH_DATASET1')).joinpath('usertracks_csv')

In [3]:
track_names = np.genfromtxt(path.joinpath('track_names.csv'), delimiter=',', dtype='U')
categories = np.genfromtxt(path.joinpath('label_categories.csv'), delimiter=',', dtype='U')

In [4]:
tot_num = len(track_names)
categ = len(categories)
print(' Total number of tracks:', tot_num)
print(' Total number of categories/playlists:', categ)

 Total number of tracks: 335
 Total number of categories/playlists: 18


In [5]:
# read_csv files and create a list of data arrays and data labels
path_ = path.joinpath('track_arrays')
data_arr = [np.genfromtxt(path_.joinpath('track_inp_{}.csv'.format(i)), delimiter=',') for i in range(tot_num)]
path_ = path.joinpath('track_labels')
data_lab = [np.genfromtxt(path_.joinpath('track_lab_{}.csv'.format(i)), delimiter=',') for i in range(tot_num)]

In [6]:
print('Sample track:' , track_names[120])
print('Input array:' , data_arr[120])
print('Label array:' , data_lab[120])
print('Label category: ', categories[data_lab[120]>0])

Sample track: Manticore
Input array: [ 0.533  0.788  0.636 ...  0.098  0.186 -0.099]
Label array: [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Label category:  ['Our old school trance 138']


In [7]:
# zip track names with input arrays before shuffling and splitting train/test sets
data = [i for i in zip(track_names, data_arr)]

In [8]:
data_train, data_test, y_train, y_test = train_test_split(data, data_lab, test_size = 0.25, random_state=17)

In [9]:
'''
data_val = data_train[-50:]
y_val = y_train[-50:]
data_train = data_train[:-50]
y_train = y_train[:-50]
'''

'\ndata_val = data_train[-50:]\ny_val = y_train[-50:]\ndata_train = data_train[:-50]\ny_train = y_train[:-50]\n'

In [10]:
print('Number of tracks in training', len(data_train))
#print('Number of tracks in validation:', len(data_val))
print('Number of tracks in testing:', len(data_test))

Number of tracks in training 251
Number of tracks in testing: 84


In [11]:
x_train = [x[1] for x in data_train]
track_train = [x[0] for x in data_train]
#x_val = [x[1] for x in data_val]
#track_val = [x[0] for x in data_val]
x_test = [x[1] for x in data_test]
track_test = [x[0] for x in data_test]

In [12]:
y_test

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0.]),
 array([0.

In [13]:
# cast lists to numpy arrays
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [14]:
def data():
    return x_train, y_train, x_test, y_test

In [15]:
data()

(array([[ 0.306,  0.381,  0.273, ..., -0.152,  0.053, -0.123],
        [ 0.124,  0.171,  0.063, ...,  0.129,  0.176,  0.022],
        [ 0.14 ,  0.065,  0.094, ...,  0.012, -0.001, -0.025],
        ...,
        [ 0.103,  0.084,  0.309, ..., -0.087, -0.053, -0.144],
        [ 0.029,  0.453,  1.   , ..., -0.119, -0.031, -0.034],
        [ 0.883,  0.695,  0.502, ..., -0.145, -0.081,  0.084]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[ 0.594,  0.264,  1.   , ..., -0.298,  0.166, -0.426],
        [ 0.388,  0.449,  0.52 , ..., -0.012,  0.017,  0.221],
        [ 1.   ,  0.561,  0.526, ...,  0.214,  0.176, -0.227],
        ...,
        [ 0.12 ,  0.424,  0.095, ...,  0.113, -0.088, -0.058],
        [ 0.259,  0.293,  0.399, ...,  0.191,  0.027,  0.014],
        [ 0.985,  0.968,  0.76

In [16]:
def create_model(x_train, y_train, x_test, y_test):
    
    # classes/playlists in the dataset
    classes = len(y_train[0])
    
    # Load encoder model
    loaded_enc = keras.models.load_model(filepath=Path(os.getenv('PATH_MODELS')).joinpath('encoder'))
    
    # Freeze and rename all the layers
    for layer in loaded_enc.layers[:]:
        layer.trainable = False
        layer._name = str('enc_') + layer.name 
    
    # Add dense layers for the classifier
    x = Dense({{choice([40,30,20])}})(loaded_enc.layers[-1].output)
    x = Activation({{choice([tf.nn.leaky_relu, 'relu', 'tanh'])}})(x)
    x = BatchNormalization()(x)
    x = Dropout({{uniform(0, 1)}})(x)
    
    x = Dense({{choice([40,30,20])}})(x)
    x = Activation({{choice([tf.nn.leaky_relu, 'relu', 'tanh'])}})(x)
    x = BatchNormalization()(x)
    x = Dropout({{uniform(0, 1)}})(x)
    
    x = Dense({{choice([40,30,20])}})(x)
    x = Activation({{choice([tf.nn.leaky_relu, 'relu', 'tanh'])}})(x)
    x = BatchNormalization()(x)
    x = Dropout({{uniform(0, 1)}})(x)
    
    x = Dense({{choice([40,30,20])}})(x)
    x = Activation({{choice([tf.nn.leaky_relu, 'relu', 'tanh'])}})(x)
    x = BatchNormalization()(x)
    x = Dropout({{uniform(0, 1)}})(x)
    
    # Set nodes of last dense layer as number of classes
    outputs = Dense(classes, activation='softmax')(x)
    
    # create model
    model = Model(inputs=loaded_enc.input, outputs=outputs, name='classifier')
    
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], \
                  optimizer='adam')
    
    result = model.fit(x_train, y_train,
              batch_size={{choice([8, 16, 32])}},
              epochs=3,
              verbose=2,
              validation_split=0.12)
    
    validation_acc = np.amax(result.history['val_acc'])
    print('Best validation acc of epoch:', validation_acc)
    
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}
    

In [18]:
best_run, best_model = optim.minimize(model=create_model, data=data, algo=tpe.suggest,\
                                      max_evals=5, trials=Trials(), notebook_name='create_classifier_nn-Copy1')

print("Evalutation of best performing model:")
print(best_model.evaluate(x_test, y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)

>>> Imports:
#coding=utf-8

try:
    from datetime import datetime
except:
    pass

try:
    from dotenv import load_dotenv
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import os
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    from pathlib import Path
except:
    pass

try:
    import random
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    import tensorflow.keras as keras
except:
    pass

try:
    from tensorflow.keras import Model, Input
except:
    pass

try:
    from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
      

job exception: name 'y_train' is not defined



  0%|                                                                   | 0/5 [00:00<?, ?trial/s, best loss=?]


NameError: name 'y_train' is not defined

In [None]:
def create_classifier(base_model, nodes, dropout=True, new_idx=True):
    
    def dense_layer(x, nodes_, activation=tf.nn.leaky_relu, dropout=dropout, name_idx=None):
    
        # define a dense layer section
        x = Dense(nodes_, name='dense_{}'.format(name_idx))(x)
        x = Activation(activation, name='act_{}'.format(name_idx))(x)
        x = BatchNormalization(name='bn_{}'.format(name_idx))(x)
        if dropout:
            x = Dropout(0.25, name='dropout_{}'.format(name_idx))(x)
            
        return x
    
    # classes/playlists in the dataset
    classes = len(y_train[0])
    
    if new_idx:
        # create new index for naming layers
        idx = int(base_model.layers[-1].name[-1]) + 1
    else:
        idx = 0
    # Create classifier by adding dense layers to the base model
    x = dense_layer(base_model.layers[-1].output, nodes[0], activation='tanh', name_idx=idx + 0 )
   
    for i in range(1, len(nodes)):
        x = dense_layer(x, nodes[i], name_idx=idx + i)
    
    # Set nodes of last dense layer as number of classes
    outputs = Dense(classes, activation='softmax', name='dense_{}'.format(idx + len(nodes)))(x)
    
    # create model
    model = Model(inputs=base_model.input, outputs=outputs, name='classifier')
    
    return  model

In [None]:
tf.keras.backend.clear_session()

In [None]:
# Load encoder model
loaded_enc = keras.models.load_model(filepath=Path(os.getenv('PATH_MODELS')).joinpath('encoder'))
    
# Freeze and rename all the layers
for layer in loaded_enc.layers[:]:
    layer.trainable = False
    layer._name = str('enc_') + layer.name
    
# Set number of units/nodes for the dense layers added in the classifier

In [None]:

inputlength = 1200
inputs = Input(shape=(inputlength,), name='base_input')
x = Dense(800, name='base_dense')(inputs)
x = Activation(tf.nn.leaky_relu, name='base_act')(x)
outputs = BatchNormalization(name='base_bn')(x)

basemodel = Model(inputs, outputs, name='base_model')

In [None]:
nodes = [400, 100, 200, 100, 50, 40, 30, 20]
cls_nn = create_classifier(basemodel, nodes, dropout=True, new_idx=False)

In [None]:
#cls_nn.summary()

In [None]:
cls_nn.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
BATCH_SIZE = 16
TRAIN_STEPS_PER_EPOCH = np.ceil(len(x_train)/ BATCH_SIZE)       #np.ceil(TRAIN_COUNT/BATCH_SIZE)
VAL_STEPS_PER_EPOCH = np.ceil(len(x_val)/BATCH_SIZE)        #np.ceil(VAL_COUNT/BATCH_SIZE)
TEST_STEPS = len(x_test)
EPOCHS = 500

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=20)
result = cls_nn.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              verbose=1,
              epochs=EPOCHS,
              validation_split=0.12,
              callbacks=[callback])

In [None]:
len(result.history['loss'])

In [None]:
result.history

In [None]:
int('3')