In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras 
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import keras_tuner as kt
import sklearn as skl

In [None]:
# Load the data into a Pandas dataframe
df = pd.read_csv('/content/final_genre_12k.csv')


In [None]:
df.drop(columns={'Unnamed: 0','track_id','track','artist','primary_genre'}, inplace=True)

In [None]:
df

Unnamed: 0,genre,duration_ms,key,mode,time_signature,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo
0,"['R&B', 'Pop', 'Electro', 'Electro-Pop', 'Danc...",207320,1,1,4,0.22900,0.717,0.653,0.000000,0.1010,-5.634,0.0658,0.412,106.966
1,"['R&B', 'Pop', 'Contemporary R&B', 'Electronic...",201661,6,1,4,0.29700,0.752,0.488,0.000009,0.0936,-7.050,0.0705,0.533,136.041
2,"['Rap', 'Memes', 'Progressive Pop', 'Texas Rap...",312820,8,1,4,0.00513,0.834,0.730,0.000000,0.1240,-3.714,0.2220,0.446,155.008
3,"['R&B', 'Pop', 'Piano', 'New Wave', 'Christian...",241107,7,1,4,0.16400,0.335,0.625,0.000000,0.0708,-7.462,0.0386,0.346,150.277
4,"['Pop', 'Holiday', 'Cover', 'Christmas']",133547,0,1,4,0.68500,0.665,0.398,0.000000,0.1720,-11.886,0.0300,0.864,140.456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11709,"['Pop', 'Pop-Rock', 'Sixties']",198600,7,1,4,0.42900,0.541,0.334,0.000017,0.5070,-12.667,0.0283,0.596,110.025
11710,['Pop'],130227,9,1,4,0.00849,0.650,0.800,0.155000,0.0678,-8.202,0.0321,0.956,136.200
11711,"['Rock', 'Jazz Fusion', 'Blue-Eyed Soul', 'Pop...",175636,0,0,4,0.72300,0.652,0.440,0.000006,0.1120,-11.474,0.0315,0.711,106.863
11712,"['Pop', 'UK']",198453,2,1,4,0.15100,0.360,0.298,0.000003,0.3400,-10.468,0.0275,0.266,79.550


In [None]:
df3 = df.iloc[:, 0:].copy()

In [None]:
df3

Unnamed: 0,genre,duration_ms,key,mode,time_signature,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo
0,"['R&B', 'Pop', 'Electro', 'Electro-Pop', 'Danc...",207320,1,1,4,0.22900,0.717,0.653,0.000000,0.1010,-5.634,0.0658,0.412,106.966
1,"['R&B', 'Pop', 'Contemporary R&B', 'Electronic...",201661,6,1,4,0.29700,0.752,0.488,0.000009,0.0936,-7.050,0.0705,0.533,136.041
2,"['Rap', 'Memes', 'Progressive Pop', 'Texas Rap...",312820,8,1,4,0.00513,0.834,0.730,0.000000,0.1240,-3.714,0.2220,0.446,155.008
3,"['R&B', 'Pop', 'Piano', 'New Wave', 'Christian...",241107,7,1,4,0.16400,0.335,0.625,0.000000,0.0708,-7.462,0.0386,0.346,150.277
4,"['Pop', 'Holiday', 'Cover', 'Christmas']",133547,0,1,4,0.68500,0.665,0.398,0.000000,0.1720,-11.886,0.0300,0.864,140.456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11709,"['Pop', 'Pop-Rock', 'Sixties']",198600,7,1,4,0.42900,0.541,0.334,0.000017,0.5070,-12.667,0.0283,0.596,110.025
11710,['Pop'],130227,9,1,4,0.00849,0.650,0.800,0.155000,0.0678,-8.202,0.0321,0.956,136.200
11711,"['Rock', 'Jazz Fusion', 'Blue-Eyed Soul', 'Pop...",175636,0,0,4,0.72300,0.652,0.440,0.000006,0.1120,-11.474,0.0315,0.711,106.863
11712,"['Pop', 'UK']",198453,2,1,4,0.15100,0.360,0.298,0.000003,0.3400,-10.468,0.0275,0.266,79.550


In [None]:
df3

In [None]:
# Preprocess the data
X = df3.drop(columns=['genre'])
y = df3['genre'].values

# One-hot encode the target variable
y_one_hot = pd.get_dummies(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, random_state=78)

# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_train.shape[1]

13

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=X_train.shape[1]))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    # Since we are now performing multi-class classification, we need to use
    # a softmax activation function in the output layer
    nn_model.add(tf.keras.layers.Dense(units=y_train.shape[1], activation="softmax"))

    # Compile the model
    nn_model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [None]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))


Trial 72 Complete [00h 01m 23s]
val_accuracy: 0.23933082818984985

Best val_accuracy So Far: 0.24274496734142303
Total elapsed time: 00h 07m 44s


In [None]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 7,
 'num_layers': 2,
 'units_0': 9,
 'units_1': 1,
 'units_2': 1,
 'units_3': 5,
 'units_4': 5,
 'units_5': 5,
 'tuner/epochs': 7,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 1,
 'tuner/round': 0}

In [None]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

92/92 - 1s - loss: 8.2711 - accuracy: 0.2427 - 1s/epoch - 11ms/step
Loss: 8.27107048034668, Accuracy: 0.24274496734142303


In [None]:
# # Save the model
# model.save('genre_predicting_model.h5')