In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.python import keras
from tensorflow.python.keras import layers
from tensorflow.python.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
dataset_path = 'dataset.csv'
column_names = ['ID', 'Danceability', 'Acousticness', 'Energy', 'Loudness', 'Tempo', 'Valence', 'Category']

raw_dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t', skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.iloc[0]

ID              TRIQYKR128F931D388
Danceability                 0.467
Acousticness                   0.2
Energy                       0.244
Loudness                   -14.588
Tempo                      123.884
Valence                      0.223
Category                     happy
Name: 0, dtype: object

In [3]:
dataset.Category = pd.Categorical(dataset.Category)
dataset['Label'] = dataset.Category.cat.codes

dataset.Label.value_counts()

4    3115
2    2297
1     868
0     531
3     386
Name: Label, dtype: int64

In [4]:
model_variables = ['Danceability', 'Acousticness', 'Energy', 'Loudness', 'Tempo', 'Valence', 'Label']

dataset_relevant = dataset[model_variables]
dataset_relevant_encoded = pd.get_dummies(dataset_relevant)

training_features = dataset_relevant_encoded.drop(['Label'], axis=1)
training_target = dataset_relevant_encoded['Label']

std = StandardScaler()
train_dataset = std.fit_transform(training_features)

train_dataset.shape

(7197, 6)

In [5]:
def baseline_model():
    model = keras.Sequential([
        layers.Dense(128, input_dim=6, activation=tf.nn.relu),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(64, activation=tf.nn.relu),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(64, activation=tf.nn.relu),
        layers.Dense(5, activation=tf.nn.softmax),
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [6]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=100, batch_size=128, verbose=0)
kfold = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=7)

for train_index, test_index in kfold.split(train_dataset, training_target):
    X_tr, X_tes = train_dataset[train_index], train_dataset[test_index]
    y_tr, y_tes = training_target[train_index], training_target[test_index]
    estimator.fit(X_tr, y_tr) 

    y_pred=estimator.predict(X_tes)
    acc = accuracy_score(y_tes, y_pred)
    cnf_matrix = confusion_matrix(y_tes, y_pred)
    print(acc)
    print(cnf_matrix)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
0.5944444444444444
[[  0  14  25   0  67]
 [  0  84  48   0  42]
 [  0  27 301   0 132]
 [  0   2   8   0  67]
 [  0  31 121   0 471]]
