In [1]:
import numpy as np 
import pandas as pd 
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix,recall_score,precision_score,f1_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers

import math 

In [2]:
def process_data():
    df_test = pd.read_csv(r'test_data.csv', sep = ",")
    df_train = pd.read_csv(r'train_data.csv', sep = ",")

    test = df_test.drop(df_test.columns[0],axis=1)

    test_targets = test.iloc[:,[13]]
    test_inputs = test.loc[:,test.columns != 'target']

    train = df_train.drop(df_train.columns[0],axis=1)
    train_targets = train.iloc[:,[13]]
    train_inputs = train.loc[:,train.columns != 'target']
    
    np.savez('Spotify_data_train', inputs=train_inputs, targets=train_targets)
    np.savez('Spotify_data_test', inputs=test_inputs, targets=test_targets)

    npz = np.load('Spotify_data_train.npz')
    train_inputs,train_targets = npz['inputs'].astype(np.float),npz['targets'].astype(np.int)

    npz = np.load('Spotify_data_test.npz')
    test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

    test_inputs = preprocessing.scale(test_inputs)
    train_inputs = preprocessing.scale(train_inputs)
    
    return train_inputs,train_targets,test_inputs,test_targets

train_inputs,train_targets,test_inputs,test_targets = process_data()

total_samples = len(train_inputs)
split = math.ceil(total_samples/5)
k = 5

# Evaluering av metoden med hyperparametre fra ModelCross

Forklarende tekst

In [3]:
def build_model(n_nodes,n_layers):
    
    input_size = 18
    output_size = 1
    
    model = tf.keras.Sequential()
    #Specifying input shape in the dense layer, Keras automaticly adds an input layer.
    model.add(layers.Dense(n_nodes, activation='relu',input_shape=(input_size,)))
    for i in range(n_layers-1):
        model.add(layers.Dense(n_nodes,kernel_regularizer=tf.keras.regularizers.l2(0.001), activation='relu'))
    #model.add(layers.Dense(output_size, activation='softmax'))
    model.add(layers.Dense(output_size, activation='sigmoid'))
    
    return model
    
def compile_model(model,chosen_optimizer):
    #Decreases the learning rate with a factor of 0.9, every 10 000 steps. 
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-2,
        decay_steps=10000,
        decay_rate=0.9)
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
    #model.compile(optimizer=chosen_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
    
def fit_model(model,b_size,m_epochs):
    
    batch_size = b_size
    max_epochs = m_epochs
    
    history = model.fit(  train_inputs,train_targets,
                          batch_size=batch_size,
                          epochs=max_epochs,
                          validation_split = 0.2,
                          #validation_data=(v_inputs, v_targets),
                          #callbacks=[
                                #tf.keras.callbacks.TensorBoard(run_dir,histogram_freq=1,update_freq=1,embeddings_freq=1),
                                #tf.keras.callbacks.EarlyStopping(
                                    #monitor='val_accuracy', min_delta=0.001, patience=5, mode='max',
                                    #baseline=None),
                                #tf.keras.callbacks.EarlyStopping(
                                    #monitor='val_loss', min_delta=0.001, patience=7, verbose=0, mode='min',
                                    #baseline=0.6)
                                #],
                              verbose = 1) 
    return history


def plot_history(history):
    
    training_loss = history.history['loss']
    validation_loss = history.history['val_loss']
    accuracy = history.history['accuracy']
    epoch_count = range(1, len(training_loss) + 1)

    
    plt.plot(epoch_count, training_loss, 'r--')
    plt.plot(epoch_count, validation_loss, 'b-')
    plt.plot(epoch_count,accuracy)
    plt.legend(['Training Loss', 'Validation Loss','Accuracy'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show();
    
def evaluate_model(model):
    test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)
    print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))
    
def predict(model,inputs):
    predictions = model.predict(inputs)
    predictions_arr = np.argmax(predictions,axis=1)
    return predictions,predictions_arr
  
    
def plot_value_array(i, predictions_array, true_label):
    true_label = test_targets[i]
    plt.grid(False)

    thisplot = plt.bar(range(2), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)
    
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'
    
    plt.ylabel("{:2.0f}%".format(100*np.max(predictions_array),color=color))

    
    thisplot[predicted_label].set_color('red')
    thisplot[true_label[0]].set_color('blue')
    
def plot_result(rows,cols,predictions):
    plt.figure(figsize=(2*2*cols, 2*rows))
    for i in range(rows*cols):
        plt.subplot(rows, 2*cols, 2*i+2)
        plot_value_array(i, predictions[i], test_targets)
    plt.tight_layout()
    plt.show()
    
def get_wrong_guess_data(predictions):
    wrong_guess = []
    for i in range(100):
        index = np.argmax(predictions[i])
        if(index!=test_targets[i]):
            arr = np.concatenate((test_inputs[i],test_targets[i])) 
            wrong_guess.append(arr)
            #wrong_guess.append(test_targets[i])
    return wrong_guess

In [4]:
def create_model(n_nodes,n_layers,optimizer,b_size,m_epochs):
    model = build_model(n_nodes,n_layers)
    compile_model(model,optimizer)
    history = fit_model(model,b_size,m_epochs)
    evaluate_model(model)
    
    predictions,predictions_arr = predict(model,test_inputs)
    plot_result(2,2,predictions)
    wrong_guesses = get_wrong_guess_data(predictions)
    print(' \nConfusion Matrix: \n',confusion_matrix(test_targets, predictions_arr))
    print('\nF1 Score:',f1_score(test_targets,predictions_arr))
    print('\nRecall Score:',recall_score(test_targets,predictions_arr))
    print('\nPrecision Score:',precision_score(test_targets,predictions_arr))

In [None]:
create_model(5,5,'adam',200,10)

In [4]:
model = build_model(5,2)
compile_model(model,'RMSprop')
history = fit_model(model,5,15)
evaluate_model(model)
predictions,predictions_arr = predict(model,test_inputs)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

Test loss: 0.51. Test accuracy: 74.62%


In [None]:
model.predict(test_inputs)

In [None]:
%load_ext tensorboard.notebook