# Hit prediction

In [1]:
# Import libraries
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, confusion_matrix, f1_score
pd.options.mode.chained_assignment = None

In [2]:
# Load data
all_songs = pd.read_csv('all_time_billboard_wrap_up_cleaned_spotify.csv')
songs = all_songs[all_songs['Year'] > 2008]
# display(songs)

In [3]:
songs['Is hit'] = [1 if i <= 10 else 0 for i in songs['Position']]

In [4]:
# Count hits and non-hits
songs['Is hit'].value_counts()

0    1042
1     133
Name: Is hit, dtype: int64

In [5]:
# Split data into train and test subsets
X = songs[['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']]
y = songs['Is hit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 55)

print('Train data shape: {}'.format(X_train.shape))
print('Test data shape: {}'.format(X_test.shape))

Train data shape: (881, 13)
Test data shape: (294, 13)


In [6]:
# Handling imbalanced train data with smote
oversample = SMOTE()
X_smote, y_smote = oversample.fit_resample(X_train, y_train)

In [7]:
print(X_smote.shape, y_smote.shape)

(1560, 13) (1560,)


In [8]:
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 3)
random_forest.fit(X_smote, y_smote)
y_pred = random_forest.predict(X_test)

In [9]:
# Count predicted values
pd.Series(y_pred).value_counts()

0    271
1     23
dtype: int64

In [10]:
print("Train accuracy: ", random_forest.score(X_smote, y_smote))
print("Test accuracy: ", random_forest.score(X_test, y_test))

Train accuracy:  0.9814102564102564
Test accuracy:  0.8537414965986394


In [11]:
# Load data from the last week
last_week_chart = pd.read_csv('last_week_chart.csv')
display(last_week_chart)

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1,Harry Styles,As It Was,Harry Styles,4LRPiXqCikLlN15c3yImP7,0.520,0.731,6,-5.338,0,...,0.3110,0.662,173.930,audio_features,4LRPiXqCikLlN15c3yImP7,spotify:track:4LRPiXqCikLlN15c3yImP7,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,167303,4
1,2,Jack Harlow,First Class,Jack Harlow,1rDQ4oMwGJI7B4tovsBOxc,0.905,0.563,8,-6.135,1,...,0.1130,0.324,106.998,audio_features,1rDQ4oMwGJI7B4tovsBOxc,spotify:track:1rDQ4oMwGJI7B4tovsBOxc,https://api.spotify.com/v1/tracks/1rDQ4oMwGJI7...,https://api.spotify.com/v1/audio-analysis/1rDQ...,173948,4
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp,0.761,0.525,11,-6.900,1,...,0.0921,0.531,80.870,audio_features,3USxtqRwSYz57Ewm6wWRMp,spotify:track:3USxtqRwSYz57Ewm6wWRMp,https://api.spotify.com/v1/tracks/3USxtqRwSYz5...,https://api.spotify.com/v1/audio-analysis/3USx...,238805,4
3,4,Latto,Big Energy,Latto,6Zu3aw7FfjAF9WA0fA81Oq,0.935,0.807,11,-3.838,0,...,0.3490,0.813,106.017,audio_features,6Zu3aw7FfjAF9WA0fA81Oq,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq,https://api.spotify.com/v1/tracks/6Zu3aw7FfjAF...,https://api.spotify.com/v1/audio-analysis/6Zu3...,173182,4
4,5,Imagine Dragons X JID,Enemy,Imagine Dragons,1r9xUipOqoNwggBpENDsvJ,0.728,0.783,11,-4.424,0,...,0.4340,0.555,77.011,audio_features,1r9xUipOqoNwggBpENDsvJ,spotify:track:1r9xUipOqoNwggBpENDsvJ,https://api.spotify.com/v1/tracks/1r9xUipOqoNw...,https://api.spotify.com/v1/audio-analysis/1r9x...,173381,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,96,Gunna Featuring Drake,P Power,Gunna,0lEjxUUlKqjqXrVlIHFduD,0.828,0.591,1,-6.217,1,...,0.3610,0.202,119.251,audio_features,0lEjxUUlKqjqXrVlIHFduD,spotify:track:0lEjxUUlKqjqXrVlIHFduD,https://api.spotify.com/v1/tracks/0lEjxUUlKqjq...,https://api.spotify.com/v1/audio-analysis/0lEj...,193347,4
94,97,Yeat,Money So Big,Yeat,7BbaIYAdi3pg4MGl6PHwPv,0.693,0.575,2,-7.633,1,...,0.1040,0.353,138.016,audio_features,7BbaIYAdi3pg4MGl6PHwPv,spotify:track:7BbaIYAdi3pg4MGl6PHwPv,https://api.spotify.com/v1/tracks/7BbaIYAdi3pg...,https://api.spotify.com/v1/audio-analysis/7Bba...,160052,4
95,98,Coi Leray & Nicki Minaj,Blick Blick!,Coi Leray,7LczcBaamU9pTkV4Cl9NyX,0.916,0.824,8,-4.444,0,...,0.0661,0.870,139.926,audio_features,7LczcBaamU9pTkV4Cl9NyX,spotify:track:7LczcBaamU9pTkV4Cl9NyX,https://api.spotify.com/v1/tracks/7LczcBaamU9p...,https://api.spotify.com/v1/audio-analysis/7Lcz...,178413,4
96,99,Bailey Zimmerman,Fall In Love,Bailey Zimmerman,5gVCfYmQRPy1QJifP8f5gg,0.524,0.643,9,-6.055,1,...,0.2210,0.397,135.962,audio_features,5gVCfYmQRPy1QJifP8f5gg,spotify:track:5gVCfYmQRPy1QJifP8f5gg,https://api.spotify.com/v1/tracks/5gVCfYmQRPy1...,https://api.spotify.com/v1/audio-analysis/5gVC...,232059,4


In [12]:
# Delete songs from last week which appeared in year-end chart in 2021
# cond = last_week_chart['Spotify ID'].isin(songs['Spotify ID'])
# last_week_chart.drop(last_week_chart[cond].index, inplace = True)
# display(last_week_chart)

In [13]:
# Results
chart_audio_features = last_week_chart[['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']]
last_week_chart['Predict as hit'] = random_forest.predict(chart_audio_features)
display(last_week_chart[last_week_chart['Predict as hit'] == 1])

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID,danceability,energy,key,loudness,mode,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Predict as hit
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp,0.761,0.525,11,-6.9,1,...,0.531,80.87,audio_features,3USxtqRwSYz57Ewm6wWRMp,spotify:track:3USxtqRwSYz57Ewm6wWRMp,https://api.spotify.com/v1/tracks/3USxtqRwSYz5...,https://api.spotify.com/v1/audio-analysis/3USx...,238805,4,1
8,10,Justin Bieber,Ghost,Justin Bieber,6I3mqTwhRpn34SLVafSH7G,0.601,0.741,2,-5.569,1,...,0.441,153.96,audio_features,6I3mqTwhRpn34SLVafSH7G,spotify:track:6I3mqTwhRpn34SLVafSH7G,https://api.spotify.com/v1/tracks/6I3mqTwhRpn3...,https://api.spotify.com/v1/audio-analysis/6I3m...,153190,4,1
11,13,GAYLE,abcdefu,GAYLE,4fouWK6XVHhzl78KzQ1UjL,0.695,0.54,4,-5.692,1,...,0.415,121.932,audio_features,4fouWK6XVHhzl78KzQ1UjL,spotify:track:4fouWK6XVHhzl78KzQ1UjL,https://api.spotify.com/v1/tracks/4fouWK6XVHhz...,https://api.spotify.com/v1/audio-analysis/4fou...,168602,4,1
16,18,The Weeknd & Ariana Grande,Save Your Tears,The Weeknd,5QO79kh1waicV47BqGRL3g,0.68,0.826,0,-5.487,1,...,0.644,118.051,audio_features,5QO79kh1waicV47BqGRL3g,spotify:track:5QO79kh1waicV47BqGRL3g,https://api.spotify.com/v1/tracks/5QO79kh1waic...,https://api.spotify.com/v1/audio-analysis/5QO7...,215627,4,1
19,21,Dua Lipa,Levitating,Dua Lipa,5nujrmhLynf4yMoMtj8AQF,0.702,0.825,6,-3.787,0,...,0.915,102.977,audio_features,5nujrmhLynf4yMoMtj8AQF,spotify:track:5nujrmhLynf4yMoMtj8AQF,https://api.spotify.com/v1/tracks/5nujrmhLynf4...,https://api.spotify.com/v1/audio-analysis/5nuj...,203064,4,1
30,33,Olivia Rodrigo,Good 4 U,Olivia Rodrigo,4ZtFanR9U6ndgddUvNcjcG,0.563,0.664,9,-5.044,1,...,0.688,166.928,audio_features,4ZtFanR9U6ndgddUvNcjcG,spotify:track:4ZtFanR9U6ndgddUvNcjcG,https://api.spotify.com/v1/tracks/4ZtFanR9U6nd...,https://api.spotify.com/v1/audio-analysis/4ZtF...,178147,4,1
32,35,Lauren Spencer-Smith,Fingers Crossed,Lauren Spencer-Smith,3yMC1KsTwh0ceXdIe4QQAQ,0.56,0.473,5,-7.23,1,...,0.441,109.414,audio_features,3yMC1KsTwh0ceXdIe4QQAQ,spotify:track:3yMC1KsTwh0ceXdIe4QQAQ,https://api.spotify.com/v1/tracks/3yMC1KsTwh0c...,https://api.spotify.com/v1/audio-analysis/3yMC...,175345,4,1
47,50,Lizzo,About Damn Time,Lizzo,1PckUlxKqWQs3RlWXVBLw3,0.836,0.743,10,-6.305,0,...,0.722,108.966,audio_features,1PckUlxKqWQs3RlWXVBLw3,spotify:track:1PckUlxKqWQs3RlWXVBLw3,https://api.spotify.com/v1/tracks/1PckUlxKqWQs...,https://api.spotify.com/v1/audio-analysis/1Pck...,191822,4,1
