In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras import layers
from sklearn import preprocessing

import datetime
import math 

In [2]:
def process_data():
    df_test = pd.read_csv(r'test_data.csv', sep = ",", engine='python')
    df_train = pd.read_csv(r'train_data.csv', sep = ",", engine='python')

    test = df_test.drop(df_test.columns[0],axis=1)

    test_targets = test.iloc[:,[13]]
    test_inputs = test.loc[:,test.columns != 'target']

    train = df_train.drop(df_train.columns[0],axis=1)
    train_targets = train.iloc[:,[13]]
    train_inputs = train.loc[:,train.columns != 'target']
    
    np.savez('Spotify_data_train', inputs=train_inputs, targets=train_targets)
    np.savez('Spotify_data_test', inputs=test_inputs, targets=test_targets)

    npz = np.load('Spotify_data_train.npz')
    train_inputs,train_targets = npz['inputs'].astype(np.float),npz['targets'].astype(np.int)

    npz = np.load('Spotify_data_test.npz')
    test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

    test_inputs = preprocessing.scale(test_inputs)
    
    return train_inputs,train_targets,test_inputs,test_targets

train_inputs,train_targets,test_inputs,test_targets = process_data()
total_samples = len(train_inputs)
split = math.ceil(total_samples/5)
k = 5

In [3]:
def crossFold(train_inputs,train_targets,i):
    
    if(i==0):
        validation_inputs = preprocessing.scale(train_inputs[:split])
        validation_targets = train_targets[:split]
        inputs = preprocessing.scale(train_inputs[split:])
        targets = train_targets[split:]
    elif(i == 4):
        validation_inputs = preprocessing.scale(train_inputs[i*split:])
        validation_targets = train_targets[i*split:]
        inputs = preprocessing.scale(train_inputs[:i*split])
        targets = train_targets[:i*split]
    else:
        i1 = train_inputs[:i*split]
        i2 = train_inputs[(i+1)*split:]
        inputs = preprocessing.scale(np.concatenate((i1,i2)))
        
        it1 = train_targets[:i*split]
        it2 = train_targets[(i+1)*split:]
        targets = np.concatenate((it1,it2))
        
        validation_inputs = preprocessing.scale(train_inputs[i*split:(i+1)*split])
        validation_targets = train_targets[i*split:(i+1)*split]
        
    return inputs,targets,validation_inputs,validation_targets

In [10]:
#Setting the parameter values we are trying to test


HP_NUM_LAYERS = hp.HParam('num_layers', hp.Discrete([1,2,5])) 
HP_NUM_NODES = hp.HParam('num_nodes', hp.Discrete([10,19,30]))
 
    
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-2,
        decay_steps=10000,
        decay_rate=0.9)
optimizers = [
    #tf.keras.optimizers.SGD(learning_rate=lr_schedule),
    #tf.keras.optimizers.Adam(learning_rate=lr_schedule),
    'adam','sgd','RMSprop'
]

#HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam','sgd','RMSprop']))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(optimizers))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([10,50,100])) 
HP_MAX_EPOCHS = hp.HParam('max_epochs', hp.Discrete([5,10,25])) 

HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(0.001,0.01))
METRIC_ACCURACY = 'accuracy'

###################
METRICS = [
    hp.Metric(
        "val_accuracy", group="validation", display_name="accuracy (val.)",
    ),
    hp.Metric("val_loss", group="validation", display_name="loss (val.)",),
    hp.Metric(
        "accuracy", group="train", display_name="accuracy (train)",
    ),
    hp.Metric("loss", group="train", display_name="loss (train)",),
]
METRICS = np.asarray(METRICS)
#######################

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_NUM_LAYERS,HP_NUM_NODES,HP_OPTIMIZER,HP_BATCH_SIZE, HP_MAX_EPOCHS,HP_L2], 
    #metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    #metrics=METRICS,
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )
   
        
def train_test_model(run_dir,hparams,t_inputs,t_targets,v_inputs,v_targets): 
    
    
    input_size = 18 
    output_size = 1
    
    
    model = tf.keras.Sequential()
    model.add(layers.Dense(hparams[HP_NUM_NODES], activation='relu',input_shape=(18,)))
    for i in range(hparams[HP_NUM_LAYERS]-1):
        #model.add(layers.Dense(hparams[HP_NUM_NODES],kernel_regularizer=tf.keras.regularizers.l2(0.001), activation='relu'))
        model.add(layers.Dense(hparams[HP_NUM_NODES],kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2]), activation='relu'))
    model.add(layers.Dense(output_size, activation='sigmoid'))
    
    #model.compile(optimizer=hparams[HP_OPTIMIZER], loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=hparams[HP_OPTIMIZER], loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
    
    #logdir = run_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=path, histogram_freq=1)

    batch_size = hparams[HP_BATCH_SIZE]
    max_epochs = hparams[HP_MAX_EPOCHS]
    
    callbacksList = [
        tf.keras.callbacks.TensorBoard(run_dir,histogram_freq=1,update_freq=1,embeddings_freq=1),
        tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5, mode='max',baseline=None),
        tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=7, verbose=0, mode='min',baseline=0.6)
    ]
        
    history = model.fit(  t_inputs,t_targets,
                          batch_size=batch_size,
                          epochs=max_epochs, 
                          validation_data=(v_inputs, v_targets),
                          callbacks=callbacksList
                                #tf.keras.callbacks.TensorBoard(run_dir,histogram_freq=1,update_freq='epoch'),  # log metrics
                                
                                
                                #hp.KerasCallback(run_dir, hparams),  # log hparams
                                #tf.keras.callbacks.TensorBoard.on_epoch_end(epoch,logs=None)
                                ,
                              verbose = 0) 

    test_loss,test_accuracy = model.evaluate(test_inputs, test_targets)
    
    #test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
    
    #train_loss = history.history['loss']
    #print(train_loss,'train\n')
    #val_loss = history.history['val_loss']
    #print(val_loss,'val \n')
    #tf.summary.scalar('runs_split{}'.format(session_num)+'Loss/train', train_loss)
    #tf.summary.scalar('runs_split{}'.format(session_num)+'Loss/val', val_loss)
    
    
    return test_loss, test_accuracy   
    
def run(run_dir,hparams,t_inputs,t_targets,v_inputs,v_targets):
    with tf.summary.create_file_writer(run_dir+"/Kfold"+str(i)).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(run_dir+"/Kfold"+str(i),hparams,t_inputs,t_targets,v_inputs,v_targets)
        #
        #accuracy= tf.reshape(tf.convert_to_tensor(accuracy), []).numpy()
        #
        tf.summary.scalar("loss",accuracy[0],step=session_num)
        tf.summary.scalar(METRIC_ACCURACY, accuracy[1], step=session_num)

    return accuracy[1]
        

In [9]:
print(len(HP_NUM_LAYERS.domain.values)*len(HP_NUM_NODES.domain.values)*len(HP_OPTIMIZER.domain.values)*len(HP_BATCH_SIZE.domain.values)*len(HP_MAX_EPOCHS.domain.values))


243


In [11]:
session_num = 0
start = datetime.datetime.now()


for num_layers in HP_NUM_LAYERS.domain.values:
    for num_nodes in HP_NUM_NODES.domain.values:
        for optimizer in HP_OPTIMIZER.domain.values:
            for batch_size in HP_BATCH_SIZE.domain.values:
                for max_epoch in HP_MAX_EPOCHS.domain.values:
                        for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
                            hparams = {
                                HP_NUM_LAYERS: num_layers,
                                HP_NUM_NODES: num_nodes,
                                HP_OPTIMIZER: optimizer,
                                HP_BATCH_SIZE: batch_size, 
                                HP_MAX_EPOCHS: max_epoch,
                                HP_L2: l2,
                            }
                            combined_acc = 0
                            
                            start_time_run = datetime.datetime.now()
                            
                            for i in range(5):
                                start_time_fold = datetime.datetime.now()
                                t_inputs,t_targets,v_inputs,v_targets = crossFold(train_inputs,train_targets,i)
                                run_name = "run-%d" % session_num
                                print('----------- Starting trial: %s' % run_name + '-----------')
                                print({h.name: hparams[h] for h in hparams})
                                print("Fold nr "+str(i+1))
                                
                                accuracy = run('logs/hparam_tuning/' + run_name, hparams,t_inputs,t_targets,v_inputs,v_targets)
                                combined_acc += accuracy
                                
                                
                                finished_time = datetime.datetime.now()
                                
                                print('Total time elapsed: ', finished_time-start,'\n')
                                print('Time elapsed for fold: ', finished_time-start_time_fold ,'\n')
                            #accuracy = train_test_model(hparams)
                            #tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
                            combined_acc = combined_acc/k
                            tf.summary.scalar('Combined accuray:', combined_acc , step=1)
                            print("Combined accuray: ",combined_acc,'\n')
                            
                            print('Time elapsed for run: ',finished_time-start_time_run)
                            
                            
                            session_num += 1

print("----- Session finished -----")


----------- Starting trial: run-0-----------
{'num_layers': 1, 'num_nodes': 10, 'optimizer': 'RMSprop', 'batch_size': 10, 'max_epochs': 5, 'l2 regularizer': 0.001}
Fold nr 1
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Total time elapsed:  0:00:13.557606 

Time elapsed for fold:  0:00:13.557606 

----------- Starting trial: run-0-----------
{'num_layers': 1, 'num_nodes': 10, 'optimizer': 'RMSprop', 'batch_size': 10, 'max_epochs': 5, 'l2 regularizer': 0.001}
Fold nr 2
Total time elapsed:  0:00:26.575654 

Time elapsed for fold:  0:00:13.012772 

----------- Starting trial: run-0-----------
{'num_layers': 1, 'num_nodes': 10, 'optimizer': 'RMSprop', 'batch_size': 10, 'max_epochs': 5, 'l2 regularizer': 0.001}
Fold nr 3
Total time elapsed:  0:00:38.914447 

Time elapsed for fold:  0:00:12.336771 

----------- Starting trial: run-0-----------
{'num_layers': 1, 'num_nodes': 10, 'optimizer': 'RMSprop', 'batch_size': 10, 'max_epochs': 5, 'l2 regularizer': 0.001}
Fold 

KeyboardInterrupt: 