In [32]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from keras import models, layers
from keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os

from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
df = pd.read_csv('/content/drive/MyDrive/ai/datasets/spotify.csv')

In [3]:
df.columns

Index(['Unnamed: 0', 'track_id', 'artists', 'album_name', 'track_name',
       'popularity', 'duration_ms', 'explicit', 'danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature',
       'track_genre'],
      dtype='object')

In [4]:
df['Unnamed: 0'].value_counts()

0         1
75997     1
76008     1
76007     1
76006     1
         ..
37995     1
37994     1
37993     1
37992     1
113999    1
Name: Unnamed: 0, Length: 114000, dtype: int64

In [5]:
df = df.drop('Unnamed: 0', axis=1)

In [6]:
df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [7]:
df = df.drop(['track_id', 'artists', 'album_name', 'track_name'], axis=1)

In [8]:
df['explicit'].value_counts()

False    104253
True       9747
Name: explicit, dtype: int64

In [9]:
df['explicit'] = df['explicit'].astype(float)

In [10]:
df['track_genre'].value_counts()

acoustic             1000
punk-rock            1000
progressive-house    1000
power-pop            1000
pop                  1000
                     ... 
folk                 1000
emo                  1000
electronic           1000
electro              1000
world-music          1000
Name: track_genre, Length: 114, dtype: int64

In [11]:
df['track_genre'].unique()

array(['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
       'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
       'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
       'chill', 'classical', 'club', 'comedy', 'country', 'dance',
       'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
       'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm',
       'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk',
       'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove',
       'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
       'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
       'indie-pop', 'indie', 'industrial', 'iranian', 'j-dance', 'j-idol',
       'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
       'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
       'new-age', 'opera', 'pagode', 'party', 'piano', 'pop-film', 'pop',
       'pow

In [12]:
df = df[~df['track_genre'].isin(['singer-songwriter', 'songwriter', 'study'])]

In [13]:
df['track_genre'] = df['track_genre'].replace({
    'acoustic': 'emotional',
    'ambient': 'emotional',
    'chill': 'emotional',
    'classical': 'emotional',
    'gospel': 'emotional',
    'groove': 'emotional',
    'happy': 'emotional',
    'jazz': 'emotional',
    'new-age': 'emotional',
    'opera': 'emotional',
    'piano': 'emotional',
    'romance': 'emotional',
    'sad': 'emotional',
    'sleep': 'emotional',

    'afrobeat': 'ethnic',
    'bluegrass': 'ethnic',
    'brazil': 'ethnic',
    'british': 'ethnic',
    'folk': 'ethnic',
    'forro': 'ethnic',
    'french': 'ethnic',
    'german': 'ethnic',
    'indian': 'ethnic',
    'iranian': 'ethnic',
    'latin': 'ethnic',
    'latino': 'ethnic',
    'malay': 'ethnic',
    'pagode': 'ethnic',
    'reggae': 'ethnic',
    'reggaeton': 'ethnic',
    'salsa': 'ethnic',
    'samba': 'ethnic',
    'sertanejo': 'ethnic',
    'ska': 'ethnic',
    'spanish': 'ethnic',
    'swedish': 'ethnic',
    'tango': 'ethnic',
    'turkish': 'ethnic',

    'alt-rock': 'rock-metal',
    'death-metal': 'rock-metal',
    'emo': 'rock-metal',
    'black-metal': 'rock-metal',
    'garage': 'rock-metal',
    'goth': 'rock-metal',
    'grindcore': 'rock-metal',
    'grunge': 'rock-metal',
    'guitar': 'rock-metal',
    'hard-rock': 'rock-metal',
    'hardcore': 'rock-metal',
    'hardstyle': 'rock-metal',
    'heavy-metal': 'rock-metal',
    'j-rock': 'rock-metal',
    'metal': 'rock-metal',
    'metalcore': 'rock-metal',
    'psych-rock': 'rock-metal',
    'punk-rock': 'rock-metal',
    'punk': 'rock-metal',
    'rock-n-roll': 'rock-metal',
    'rock': 'rock-metal',
    'rockabilly': 'rock-metal',

    'alternative': 'pop',
    'anime': 'pop',
    'cantopop': 'pop',
    'children': 'pop',
    'comedy': 'pop',
    'disney': 'pop',
    'country': 'pop',
    'funk': 'pop',
    'honky-tonk': 'pop',
    'indie-pop': 'pop',
    'industrial': 'pop',
    'indie': 'pop',
    'j-dance': 'pop',
    'j-idol': 'pop',
    'j-pop': 'pop',
    'k-pop': 'pop',
    'kids': 'pop',
    'mandopop': 'pop',
    'mpb': 'pop',
    'pop-film': 'pop',
    'pop': 'pop',
    'power-pop': 'pop',
    'show-tunes': 'pop',
    'synth-pop': 'pop',
    'world-music': 'pop',

    'breakbeat': 'electronic',
    'chicago-house': 'electronic',
    'club': 'electronic',
    'dance': 'electronic',
    'dancehall': 'electronic',
    'deep-house': 'electronic',
    'detroit-techno': 'electronic',
    'disco': 'electronic',
    'drum-and-bass': 'electronic',
    'dub': 'electronic',
    'dubstep': 'electronic',
    'edm': 'electronic',
    'electro': 'electronic',
    'electronic': 'electronic',
    'house': 'electronic',
    'idm': 'electronic',
    'party': 'electronic',
    'minimal-techno': 'electronic',
    'progressive-house': 'electronic',
    'techno': 'electronic',
    'trance': 'electronic',

    'hip-hop': 'hiphop',
    'r-n-b': 'hiphop',
    'blues': 'hiphop',
    'soul': 'hiphop',
    'trip-hop': 'hiphop'
})

In [14]:
df['track_genre'].value_counts()

pop           25000
ethnic        24000
rock-metal    22000
electronic    21000
emotional     14000
hiphop         5000
Name: track_genre, dtype: int64

In [15]:
equivalent_map = {}
class_amount = len(df['track_genre'].unique())
num = float(0)

for genre in df['track_genre'].unique():
  equivalent_map[genre] = num
  num += float(1)

df['track_genre'] = df['track_genre'].map(equivalent_map)

In [16]:
df.head()

Unnamed: 0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,73,230666,0.0,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,0.0
1,55,149610,0.0,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,0.0
2,57,210826,0.0,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,0.0
3,71,201933,0.0,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,0.0
4,82,198853,0.0,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,0.0


In [17]:
y = df.iloc[:, -1]
del df['track_genre']
x = df.iloc[:,:-1]
del df

In [18]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [19]:
x_train = tf.constant(x_train, dtype=tf.float32)
x_test = tf.constant(x_test, dtype=tf.float32)
y_train = tf.constant(y_train, dtype=tf.float32)
y_test = tf.constant(y_test, dtype=tf.float32)

In [31]:
model = models.Sequential()
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(class_amount, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7bcb906e3190>

In [30]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'test loss = {test_loss}\ntest accuracy = {test_acc}')

test loss = 1.126686453819275
test accuracy = 0.5777027010917664


In [33]:
root_path = '/content/drive/MyDrive/ai/models'
model_name = 'spotAI_58_acc.joblib'
absolute_path = os.path.join(root_path, model_name)

joblib.dump(model, absolute_path)

['/content/drive/MyDrive/ai/models/spotAI_58_acc.joblib']