# Evaluation of the methods chosen after hyperparametertuning 

Press Kernal -> Restart & Run All

In [None]:
import numpy as np 
import pandas as pd 
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix,recall_score,precision_score,f1_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import math 

In [None]:
def process_data():
    
    df_test = pd.read_csv(r'../test_data_clean', sep = ",", engine='python')
    df_train = pd.read_csv(r'../train_data_clean', sep = ",", engine='python')

    #Removing unwanted column
    test = df_test.drop(df_test.columns[0],axis=1)
    train = df_train.drop(df_train.columns[0],axis=1)
    
    #Seperating "target" into own dataframe
    test_targets = test.iloc[:,[13]]
    test_inputs = test.loc[:,test.columns != 'target']    
    train_targets = train.iloc[:,[13]]
    train_inputs = train.loc[:,train.columns != 'target']
     
    #Saving the data, not necessary to do this every time, but it is implemented in the method to show the entire process    
    np.savez('Spotify_data_train', inputs=train_inputs, targets=train_targets)
    np.savez('Spotify_data_test', inputs=test_inputs, targets=test_targets)

    npz = np.load('Spotify_data_train.npz')
    train_inputs,train_targets = npz['inputs'].astype(np.float),npz['targets'].astype(np.int)

    npz = np.load('Spotify_data_test.npz')
    test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
    
    #Preprocessing using the sklearn method
    test_inputs = preprocessing.scale(test_inputs)
    #train_inputs = preprocessing.scale(train_inputs)

    return train_inputs,train_targets,test_inputs,test_targets

train_inputs,train_targets,test_inputs,test_targets = process_data()

total_samples = len(train_inputs)
split = math.ceil(total_samples/5)
k = 5

In [None]:
def build_model(n_nodes,n_layers,regulizer_const):
    
    input_size = 17
    output_size = 1
    
    model = tf.keras.Sequential()
    #Specifying input shape in the dense layer, Keras automaticly adds an input layer.
    model.add(layers.Dense(n_nodes, activation='relu',input_shape=(input_size,)))
    for i in range(n_layers-1):
        model.add(layers.Dense(n_nodes,kernel_regularizer=tf.keras.regularizers.l2(regulizer_const), activation='relu'))
    model.add(layers.Dense(output_size, activation='sigmoid'))
    
    return model

#Method for implementing crossvalidation, takes as input the train inputs and train targets. 
#Returns the training set and validation set depending on which fold to use.
def crossFold(train_inputs,train_targets,i):
    
    if(i==0):
        #Doesnt preprocess the data until it is seperated into validation and training set, to prevent data leakage. 
        validation_inputs = preprocessing.scale(train_inputs[:split])
        validation_targets = train_targets[:split]
        inputs = preprocessing.scale(train_inputs[split:])
        targets = train_targets[split:]
    elif(i == 4):
        validation_inputs = preprocessing.scale(train_inputs[i*split:])
        validation_targets = train_targets[i*split:]
        inputs = preprocessing.scale(train_inputs[:i*split])
        targets = train_targets[:i*split]
    else:
        i1 = train_inputs[:i*split]
        i2 = train_inputs[(i+1)*split:]
        inputs = preprocessing.scale(np.concatenate((i1,i2)))
        
        it1 = train_targets[:i*split]
        it2 = train_targets[(i+1)*split:]
        targets = np.concatenate((it1,it2))
        
        validation_inputs = preprocessing.scale(train_inputs[i*split:(i+1)*split])
        validation_targets = train_targets[i*split:(i+1)*split]
        
    return inputs,targets,validation_inputs,validation_targets
    
def compile_model(model,chosen_optimizer): 
    model.compile(optimizer=chosen_optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
    
def fit_model(model,b_size,m_epochs,t_inputs,t_targets,v_inputs,v_targets):
    
    
    batch_size = b_size
    max_epochs = m_epochs
    
    history = model.fit(  t_inputs,t_targets,
                          batch_size=batch_size,
                          epochs=max_epochs, 
                          validation_data=(v_inputs, v_targets),
                          verbose = 0) 
    return history

def evaluate_model(model):
    test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
    print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))
    return test_loss,test_accuracy
    
    
def predict(model):
    predictions = model.predict(test_inputs)
    finished_predictions = []
    for i in range(len(predictions)):
        if(predictions[i]>0.5):
            finished_predictions.append([1])
        else:
            finished_predictions.append([0])
    return finished_predictions

def build_single_model(n_nodes,n_layers,regulizer_const,optimizer,b_size,m_epochs):
    model = build_model(n_nodes,n_layers,regulizer_const)
    compile_model(model,optimizer)

    for i in range(5):
        print('Fold nr:',str(i+1))
        t_inputs,t_targets,v_inputs,v_targets = crossFold(train_inputs,train_targets,i)
        history = fit_model(model,b_size,m_epochs,t_inputs,t_targets,v_inputs,v_targets)
    print('----Finished----')
    evaluate_model(model)

    predictions = predict(model)
    
    return model, predictions   

def print_results(predictions,modelnr):
    print('Model nr: ',str(modelnr) )
    print(' \nConfusion Matrix: \n',confusion_matrix(test_targets, predictions))
    print('\nF1 Score:',f1_score(test_targets,predictions))
    print('\nRecall Score:',recall_score(test_targets,predictions))
    print('\nPrecision Score:',precision_score(test_targets,predictions))    

In [None]:
#build_single_model(n_nodes,n_layers,regulizer_const,optimizer,b_size,m_epochs)
model1,predictions_model1 = build_single_model(30,1,0.01,'adam',50,25)
print_results(predictions_model1,1)

model2,predictions_model2 = build_single_model(30,2,0.001,'adam',10,25)
print_results(predictions_model2,2)

model3,predictions_model3 = build_single_model(30,1,0.01,'RMSprop',5,50)
print_results(predictions_model3,3)

In [None]:
print('Model 1: ',model1.summary(),'\n')
print('----')
print('Model 2: ',model2.summary(),'\n')
print('----')
print('Model 3: ',model3.summary())