In [71]:
%load_ext tensorboard

In [59]:
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import sys, os
from loguru import logger
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import layers
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.python.keras.layers import Dense, Flatten, Input
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.callbacks import EarlyStopping, TensorBoard

sys.path.append('..')

from definitions import get_project_root
from src.data.make_dataset import create_train_test_validation
from src.visualization.visualize import plot_results
from src.models.train_model import simple_baseline

root = get_project_root()


In [25]:
## Create train, validation and test sets
x_train, x_valid, x_test, y_train, y_valid, y_test = create_train_test_validation()
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape, x_test.shape, y_test.shape

2022-02-12 16:23:55.368 | INFO     | src.data.make_dataset:create_train_test_validation:73 - found file labeled_data.csv, procceed with creating train, test and validation sets


((61711, 23), (61711, 1), (13225, 23), (13225, 1), (13224, 23), (13224, 1))

In [26]:
result = {}
score = {}

<h2> simple model

In [28]:
## Simple model / baseline: constant prediction.
## yhat = 'Rock'

score['simple_baseline'] = simple_baseline()

score['simple_baseline']

## So if we always predict the genre being 'Rock', we'd have a accuracy of 38.4%. That's due to a signifiacnt class imbalance as we already observed during the EDA.

0.3842256503327284

In [57]:
## Simple decision tree
rf_clf = RandomForestClassifier(n_estimators=100,random_state=0)
rf_clf.fit(x_train,y_train)

  rf_clf.fit(x_train,y_train)


RandomForestClassifier(random_state=0)

In [58]:
score['randomforest'] = rf_clf.score(x_test,y_test)

<H2> baseline model

In [30]:
## first neural network.

early_stop = EarlyStopping(patience=5,restore_best_weights=True)
tensorboard_callback = TensorBoard(log_dir = root / 'src' / 'logs',histogram_freq=1) 

base_model = Sequential(
    [   
        Dense(23,activation='relu', name = 'input',input_shape=(len(x_train[0]),)),
        Dense(100,activation='relu', name = 'hidden_1'),
        Dense(15, activation='softmax',name='output')
    ]
)

base_model.summary()
base_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy']) 
result['base'] = base_model.fit(x_train, y_train, epochs = 100, validation_data=(x_valid,y_valid),callbacks=[early_stop,tensorboard_callback],verbose=1)
score['base'] = base_model.evaluate(x_test,y_test)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (Dense)                (None, 23)                552       
_________________________________________________________________
hidden_1 (Dense)             (None, 100)               2400      
_________________________________________________________________
output (Dense)               (None, 15)                1515      
Total params: 4,467
Trainable params: 4,467
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100


In [31]:
score['base']

[1.3466856479644775, 0.5502873659133911]

In [74]:
## let's have a look at the results
##plot_results(result,ymin=0,ymax=1,yscale="linear")
%tensorboard


ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.

<h2> Adding more complexity to baseline model

In [32]:
## second neural network.

tensorboard_callback = TensorBoard(log_dir = root / 'src' / 'logs',histogram_freq=1) 

base_model_deep = Sequential(
    [   
        Dense(23,activation='relu', name = 'input',input_shape=(len(x_train[0]),)),
        Dense(150,activation='relu'),
        Dense(50,activation='relu'),
        Dense(50,activation='relu'),
        Dense(50,activation='relu'),
        Dense(15, activation='softmax',name='output')
    ]
)

base_model_deep.build()
base_model_deep.summary()

base_model_deep.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
base_model_deep.fit(x_train,y_train,epochs=100,validation_data=(x_valid,y_valid),callbacks=[early_stop,tensorboard_callback],verbose=1)

score['base_deep'] = base_model_deep.evaluate(x_test,y_test)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (Dense)                (None, 23)                552       
_________________________________________________________________
dense_4 (Dense)              (None, 150)               3600      
_________________________________________________________________
dense_5 (Dense)              (None, 50)                7550      
_________________________________________________________________
dense_6 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_7 (Dense)              (None, 50)                2550      
_________________________________________________________________
output (Dense)               (None, 15)                765       
Total params: 17,567
Trainable params: 17,567
Non-trainable params: 0
__________________________________________________

In [None]:
score

In [36]:
## Saving base model
file_model = root / 'src' / 'models' / 'best_base_model.model' 
base_model.save(file_model)

INFO:tensorflow:Assets written to: c:\Users\huube\OneDrive\Master of Informatics\Machine Learning\Eindopdracht\src\models\best_base_model.model\assets


{'dummy_baseline': 0.3842256503327284,
 'base': [1.3469150066375732, 0.5529340505599976],
 'base2': [1.3545866012573242, 0.5489261746406555]}