In [1]:
import keras.losses
import numpy as np
import pandas as pd

In [2]:
from src.notebooks import config
from src.helpers.visualisation import Visualiser

In [3]:
model_filepath = '../../data/models/tsne'

In [4]:
dataset = pd.read_pickle(config.tsne_result_dir, compression='bz2')

In [5]:
metadata = dataset[config.metadata_columns]
target = dataset[['cluster']]
target

Unnamed: 0,cluster
0,5
1,5
2,6
3,4
4,4
...,...
420,7
421,5
422,0
423,6


In [6]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
target_one_hot = encoder.fit_transform(target)
target_one_hot

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 0.]])

In [7]:
target_one_hot.shape[1]

8

In [8]:
dataset = dataset.drop(['TSNE1', 'TSNE2', 'cluster'] + config.metadata_columns, axis=1)
dataset

Unnamed: 0,zero_crossings_mean,zero_crossings_var,bpm,spectral_centroid_mean,spectral_centroid_var,spectral_rolloff_mean,spectral_rolloff_var,spectral_flux_mean,spectral_flux_var,spectral_flatness_mean,...,mfcc_mean_6,mfcc_var_6,mfcc_mean_7,mfcc_var_7,mfcc_mean_8,mfcc_var_8,mfcc_mean_9,mfcc_var_9,mfcc_mean_10,mfcc_var_10
0,0.030815,0.029865,143.554688,728.505121,164591.144472,1044.706810,1.492931e+06,0.606103,0.621713,0.000768,...,-1.621859,92.088676,-5.985138,59.754135,-7.222707,53.279484,-6.638159,63.120949,-7.461281,58.827705
1,0.028196,0.027401,103.359375,615.425486,95544.686241,892.440162,8.328338e+05,0.683619,0.792420,0.000813,...,1.046833,60.844543,-3.295691,66.532372,-7.709404,67.271782,-8.138650,51.361748,-8.201083,51.866173
2,0.052121,0.049405,143.554688,1053.924804,248527.612506,1937.848230,1.480721e+06,0.761462,0.734870,0.002109,...,-1.518074,97.376602,0.252557,113.514336,-4.448510,74.239128,-2.306997,76.906097,-2.640234,74.699959
3,0.044240,0.042283,161.499023,619.260455,49458.448746,981.976649,2.497879e+05,0.836040,0.742858,0.000305,...,-5.978751,111.920937,0.971603,109.742783,3.967615,61.414219,4.160887,66.464058,-0.379875,82.376328
4,0.048969,0.046571,161.499023,677.808914,72961.813450,1098.534181,4.831254e+05,0.978895,1.435121,0.000446,...,-5.143861,121.065163,-0.958290,75.191750,-0.318088,72.683647,-0.499459,70.440079,-3.557541,71.182747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420,0.050498,0.047948,99.384014,1690.752815,996877.763828,3788.666509,6.661065e+06,1.573207,6.421918,0.012454,...,0.559879,71.918671,6.266844,85.660057,-0.473025,55.165321,1.394155,64.965172,-3.817915,56.059143
421,0.019365,0.018990,151.999081,500.969118,144860.433188,764.697603,1.021311e+06,0.754525,0.569333,0.001192,...,10.077353,55.430840,7.092884,42.235184,1.522114,30.496746,-3.054044,27.486416,-6.413583,28.495449
422,0.053039,0.050226,129.199219,1618.810406,687875.800819,3493.059627,5.250681e+06,0.973384,0.490438,0.009676,...,2.584433,58.274635,2.368633,62.413822,-2.939895,52.945293,2.519716,61.408394,-2.011139,51.967083
423,0.042857,0.041020,129.199219,1234.479968,276949.182834,2474.291523,1.718215e+06,1.057050,1.088975,0.003614,...,2.503661,131.399948,1.199921,118.341034,-1.348665,86.386292,1.038753,90.882286,-4.584059,92.075935


In [9]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split

In [10]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=len(dataset.columns)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(units=32, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(units=16, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(units=target_one_hot.shape[1], activation='softmax'))

In [11]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

In [12]:
x_train, x_test, y_train, y_test = train_test_split(dataset, target_one_hot, test_size=0.3)
y_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [13]:
history = model.fit(x_train, y_train, epochs=4000, batch_size=20)

Epoch 1/4000
Epoch 2/4000
Epoch 3/4000
Epoch 4/4000
Epoch 5/4000
Epoch 6/4000
Epoch 7/4000
Epoch 8/4000
Epoch 9/4000
Epoch 10/4000
Epoch 11/4000
Epoch 12/4000
Epoch 13/4000
Epoch 14/4000
Epoch 15/4000
Epoch 16/4000
Epoch 17/4000
Epoch 18/4000
Epoch 19/4000
Epoch 20/4000
Epoch 21/4000
Epoch 22/4000
Epoch 23/4000
Epoch 24/4000
Epoch 25/4000
Epoch 26/4000
Epoch 27/4000
Epoch 28/4000
Epoch 29/4000
Epoch 30/4000
Epoch 31/4000
Epoch 32/4000
Epoch 33/4000
Epoch 34/4000
Epoch 35/4000
Epoch 36/4000
Epoch 37/4000
Epoch 38/4000
Epoch 39/4000
Epoch 40/4000
Epoch 41/4000
Epoch 42/4000
Epoch 43/4000
Epoch 44/4000
Epoch 45/4000
Epoch 46/4000
Epoch 47/4000
Epoch 48/4000
Epoch 49/4000
Epoch 50/4000
Epoch 51/4000
Epoch 52/4000
Epoch 53/4000
Epoch 54/4000
Epoch 55/4000
Epoch 56/4000
Epoch 57/4000
Epoch 58/4000
Epoch 59/4000
Epoch 60/4000
Epoch 61/4000
Epoch 62/4000
Epoch 63/4000
Epoch 64/4000
Epoch 65/4000
Epoch 66/4000
Epoch 67/4000
Epoch 68/4000
Epoch 69/4000
Epoch 70/4000
Epoch 71/4000
Epoch 72/4000
E

In [14]:
y_pred = model.predict(x_test)



In [15]:
np.round(y_pred, 0)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
y_test

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [17]:
model.evaluate(x_test, y_test)



[1.7571464776992798, 0.4140625]

In [18]:
model.save(model_filepath)

INFO:tensorflow:Assets written to: ../../data/models/tsne\assets
