In [1]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

In [2]:
# Create sqlalchemy
from sqlalchemy import create_engine
engine = create_engine(f"sqlite:///spotify.sqlite")

In [3]:
# Connect to database
with engine.connect() as conn:
    songs_df = pd.read_sql("SELECT * FROM songs", conn)

In [4]:
songs_df.head()

Unnamed: 0,track_id,explicit,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,hit,genre
0,4pR4oQJULf7FDB54TleMyH,0,0.443,0.778,-7.564,0.266,0.241,0.0,0.215,0.628,128.25,1.0,Hip_Hop_R&B
1,4Ds4bq6aqOSAp1T7DikOi4,0,0.58,0.8,-7.528,0.345,0.281,0.0,0.0708,0.483,81.303,0.0,Hip_Hop_R&B
2,7nYbKsvhM88mHCmFsAIQVp,0,0.805,0.746,-5.211,0.185,0.238,0.0,0.215,0.717,127.922,0.0,Hip_Hop_R&B
3,7gVNP7rI9UBZndge0ulKfL,0,0.582,0.66,-4.988,0.0544,0.399,0.0,0.127,0.484,83.789,0.0,Hip_Hop_R&B
4,1hLvWelTny8vttEEZIXVjw,0,0.776,0.314,-9.513,0.034,0.881,0.000723,0.139,0.476,124.055,0.0,Hip_Hop_R&B


In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(songs_df.genre.values.reshape(-1,1)))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(['genre'])
encode_df.head()



Unnamed: 0,genre_Cinema,genre_Classical,genre_Country,genre_Dance,genre_Foreign,genre_Hip_Hop_R&B,genre_Indie,genre_Jazz_Blues,genre_Kids,genre_Latin,genre_Pop,genre_Reggae,genre_Rock
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Merge the dataframes and drop genre
songs_df = songs_df.merge(encode_df, left_index=True, right_index=True).drop("genre",1)

  


In [7]:
songs_df.head()

Unnamed: 0,track_id,explicit,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,...,genre_Dance,genre_Foreign,genre_Hip_Hop_R&B,genre_Indie,genre_Jazz_Blues,genre_Kids,genre_Latin,genre_Pop,genre_Reggae,genre_Rock
0,4pR4oQJULf7FDB54TleMyH,0,0.443,0.778,-7.564,0.266,0.241,0.0,0.215,0.628,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4Ds4bq6aqOSAp1T7DikOi4,0,0.58,0.8,-7.528,0.345,0.281,0.0,0.0708,0.483,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,7nYbKsvhM88mHCmFsAIQVp,0,0.805,0.746,-5.211,0.185,0.238,0.0,0.215,0.717,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,7gVNP7rI9UBZndge0ulKfL,0,0.582,0.66,-4.988,0.0544,0.399,0.0,0.127,0.484,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1hLvWelTny8vttEEZIXVjw,0,0.776,0.314,-9.513,0.034,0.881,0.000723,0.139,0.476,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Split our preprocessed data into our features and target arrays
y = songs_df['hit'].values
X = songs_df.drop(['hit', 'track_id'],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

  This is separate from the ipykernel package so we can avoid doing imports until


In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 54
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 15

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="sigmoid")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 54)                1296      
                                                                 
 dense_1 (Dense)             (None, 30)                1650      
                                                                 
 dense_2 (Dense)             (None, 15)                465       
                                                                 
 dense_3 (Dense)             (None, 1)                 16        
                                                                 
Total params: 3,427
Trainable params: 3,427
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [12]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [13]:
# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch',
    period=5)



In [14]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100, callbacks=[cp_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: saving model to checkpoints\weights.05.hdf5
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: saving model to checkpoints\weights.10.hdf5
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: saving model to checkpoints\weights.15.hdf5
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 20: saving model to checkpoints\weights.20.hdf5
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 25: saving model to checkpoints\weights.25.hdf5
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 30: saving model to checkpoints\weights.30.hdf5
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 35: saving model to checkpoints\weights.35.hdf5
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 40: saving model to checkpoints\weights.40.hdf5
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 

Epoch 64/100
Epoch 65/100
Epoch 65: saving model to checkpoints\weights.65.hdf5
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 70: saving model to checkpoints\weights.70.hdf5
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 75: saving model to checkpoints\weights.75.hdf5
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 80: saving model to checkpoints\weights.80.hdf5
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 85: saving model to checkpoints\weights.85.hdf5
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 90: saving model to checkpoints\weights.90.hdf5
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 95: saving model to checkpoints\weights.95.hdf5
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 100: saving model to checkpoints\weights.100.hdf5


In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

735/735 - 1s - loss: 0.5145 - accuracy: 0.7538 - 815ms/epoch - 1ms/step
Loss: 0.514484703540802, Accuracy: 0.7538297772407532


In [16]:
# Export model to HDF5 file
nn.save("Spotify_ml.h5")