# Hit prediction

In [1]:
# Import libraries
import pandas as pd
import os
import re
import billboard
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from langdetect import detect
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, confusion_matrix, f1_score
pd.options.mode.chained_assignment = None

In [2]:
# Spotify authorization
spotify_client_id = os.environ.get('spotify_client_id')
spotify_client_secret = os.environ.get('spotify_client_secret')
client_credentials_manager = SpotifyClientCredentials(client_id = spotify_client_id, client_secret = spotify_client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [3]:
# Download data from last week
newest_chart = billboard.ChartData('hot-100')

In [4]:
# Creating Data Frame for data from last week
song_titles, artists, positions = [[] for i in range(3)]

position = 0
for i in newest_chart:
    position += 1
    positions.append(position)
    song = str(i).split(' by ')
    song_titles.append(song[0].strip("\'"))
    artists.append(song[1])

In [5]:
billboard_weekly_wrap_up = pd.DataFrame({'Position': positions, 'Artist': artists, 'Song Title': song_titles})
display(billboard_weekly_wrap_up)

Unnamed: 0,Position,Artist,Song Title
0,1,Harry Styles,As It Was
1,2,Jack Harlow,First Class
2,3,Glass Animals,Heat Waves
3,4,Latto,Big Energy
4,5,Imagine Dragons X JID,Enemy
...,...,...,...
95,96,Imagine Dragons,Bones
96,97,Queen Naija & Big Sean,Hate Our Love
97,98,Lucky Daye,Over
98,99,"Fivio Foreign, Kanye West & Alicia Keys",City Of Gods


In [6]:
# Spliting authors by 'and', 'featuring', 'feat.', Featuring', 'Feat.', '&' to make api searching easier and more efficient
artists = billboard_weekly_wrap_up['Artist'].to_list()
split_artists = []
for artist in artists:
    if any(re.findall(r'and|featuring|feat.|Featuring|Feat.|&|X', str(artist))):
        result = re.split(r'and|featuring|feat.|Featuring|Feat.|&|X', str(artist))[0].strip()
        split_artists.append(result)
    else:
        split_artists.append(artist)

billboard_weekly_wrap_up['Split Names'] = split_artists
display(billboard_weekly_wrap_up)

Unnamed: 0,Position,Artist,Song Title,Split Names
0,1,Harry Styles,As It Was,Harry Styles
1,2,Jack Harlow,First Class,Jack Harlow
2,3,Glass Animals,Heat Waves,Glass Animals
3,4,Latto,Big Energy,Latto
4,5,Imagine Dragons X JID,Enemy,Imagine Dragons
...,...,...,...,...
95,96,Imagine Dragons,Bones,Imagine Dragons
96,97,Queen Naija & Big Sean,Hate Our Love,Queen Naija
97,98,Lucky Daye,Over,Lucky Daye
98,99,"Fivio Foreign, Kanye West & Alicia Keys",City Of Gods,"Fivio Foreign, Kanye West"


In [7]:
# Get Spotify ID
def get_spotify_ID(artist, track):
    sp_id_list = []
    music_genre_list = []
    audio_features = {}
    try:
        track_id = sp.search(q = 'artist:' + artist + ' track:' + track, type = 'track')
        sp_id = track_id['tracks']['items'][0]['id']
    except IndexError:
        sp_id = ''
    return sp_id
        
spotify_ids = []
for i in billboard_weekly_wrap_up.index:
    data = get_spotify_ID(billboard_weekly_wrap_up['Split Names'][i], billboard_weekly_wrap_up['Song Title'][i])
    spotify_ids.append(data)
    
billboard_weekly_wrap_up['Spotify ID'] = spotify_ids

In [8]:
sum(billboard_weekly_wrap_up['Spotify ID'] == '')

2

In [9]:
billboard_weekly_wrap_up = billboard_weekly_wrap_up[billboard_weekly_wrap_up['Spotify ID'] != '']
display(billboard_weekly_wrap_up)

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID
0,1,Harry Styles,As It Was,Harry Styles,4LRPiXqCikLlN15c3yImP7
1,2,Jack Harlow,First Class,Jack Harlow,1rDQ4oMwGJI7B4tovsBOxc
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp
3,4,Latto,Big Energy,Latto,6Zu3aw7FfjAF9WA0fA81Oq
4,5,Imagine Dragons X JID,Enemy,Imagine Dragons,1r9xUipOqoNwggBpENDsvJ
...,...,...,...,...,...
95,96,Imagine Dragons,Bones,Imagine Dragons,0HqZX76SFLDz2aW8aiqi7G
96,97,Queen Naija & Big Sean,Hate Our Love,Queen Naija,3pEINn37iH5fx8TpXYNGIO
97,98,Lucky Daye,Over,Lucky Daye,1DOgkaR5Gqa8JtPEIbQORC
98,99,"Fivio Foreign, Kanye West & Alicia Keys",City Of Gods,"Fivio Foreign, Kanye West",4huBDGP4I3S0pYI0EaRN1c


In [10]:
# Function returns audio features for specific Spotify ID
def get_audio_features(spotify_id):
    af = pd.DataFrame(sp.audio_features(spotify_id))
    return af

temp_audio_features_list = []
for i in billboard_weekly_wrap_up['Spotify ID']:
    temp_song_audio_features = get_audio_features(i)
    temp_audio_features_list.append(temp_song_audio_features)
temp_audio_features_df = pd.concat(temp_audio_features_list)
billboard_weekly_wrap_up = billboard_weekly_wrap_up.merge(temp_audio_features_df, how = 'inner', left_on = 'Spotify ID', right_on = 'id')
display(billboard_weekly_wrap_up)

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1,Harry Styles,As It Was,Harry Styles,4LRPiXqCikLlN15c3yImP7,0.520,0.731,6,-5.338,0,...,0.3110,0.662,173.930,audio_features,4LRPiXqCikLlN15c3yImP7,spotify:track:4LRPiXqCikLlN15c3yImP7,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,167303,4
1,2,Jack Harlow,First Class,Jack Harlow,1rDQ4oMwGJI7B4tovsBOxc,0.905,0.563,8,-6.135,1,...,0.1130,0.324,106.998,audio_features,1rDQ4oMwGJI7B4tovsBOxc,spotify:track:1rDQ4oMwGJI7B4tovsBOxc,https://api.spotify.com/v1/tracks/1rDQ4oMwGJI7...,https://api.spotify.com/v1/audio-analysis/1rDQ...,173948,4
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp,0.761,0.525,11,-6.900,1,...,0.0921,0.531,80.870,audio_features,3USxtqRwSYz57Ewm6wWRMp,spotify:track:3USxtqRwSYz57Ewm6wWRMp,https://api.spotify.com/v1/tracks/3USxtqRwSYz5...,https://api.spotify.com/v1/audio-analysis/3USx...,238805,4
3,4,Latto,Big Energy,Latto,6Zu3aw7FfjAF9WA0fA81Oq,0.935,0.807,11,-3.838,0,...,0.3490,0.813,106.017,audio_features,6Zu3aw7FfjAF9WA0fA81Oq,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq,https://api.spotify.com/v1/tracks/6Zu3aw7FfjAF...,https://api.spotify.com/v1/audio-analysis/6Zu3...,173182,4
4,5,Imagine Dragons X JID,Enemy,Imagine Dragons,1r9xUipOqoNwggBpENDsvJ,0.728,0.783,11,-4.424,0,...,0.4340,0.555,77.011,audio_features,1r9xUipOqoNwggBpENDsvJ,spotify:track:1r9xUipOqoNwggBpENDsvJ,https://api.spotify.com/v1/tracks/1r9xUipOqoNw...,https://api.spotify.com/v1/audio-analysis/1r9x...,173381,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,96,Imagine Dragons,Bones,Imagine Dragons,0HqZX76SFLDz2aW8aiqi7G,0.773,0.742,5,-3.678,0,...,0.0754,0.571,114.071,audio_features,0HqZX76SFLDz2aW8aiqi7G,spotify:track:0HqZX76SFLDz2aW8aiqi7G,https://api.spotify.com/v1/tracks/0HqZX76SFLDz...,https://api.spotify.com/v1/audio-analysis/0HqZ...,165265,4
94,97,Queen Naija & Big Sean,Hate Our Love,Queen Naija,3pEINn37iH5fx8TpXYNGIO,0.662,0.655,2,-5.440,1,...,0.1240,0.356,82.079,audio_features,3pEINn37iH5fx8TpXYNGIO,spotify:track:3pEINn37iH5fx8TpXYNGIO,https://api.spotify.com/v1/tracks/3pEINn37iH5f...,https://api.spotify.com/v1/audio-analysis/3pEI...,226374,4
95,98,Lucky Daye,Over,Lucky Daye,1DOgkaR5Gqa8JtPEIbQORC,0.677,0.541,6,-6.057,0,...,0.1250,0.379,103.369,audio_features,1DOgkaR5Gqa8JtPEIbQORC,spotify:track:1DOgkaR5Gqa8JtPEIbQORC,https://api.spotify.com/v1/tracks/1DOgkaR5Gqa8...,https://api.spotify.com/v1/audio-analysis/1DOg...,205276,4
96,99,"Fivio Foreign, Kanye West & Alicia Keys",City Of Gods,"Fivio Foreign, Kanye West",4huBDGP4I3S0pYI0EaRN1c,0.474,0.801,8,-5.978,0,...,0.3220,0.497,147.356,audio_features,4huBDGP4I3S0pYI0EaRN1c,spotify:track:4huBDGP4I3S0pYI0EaRN1c,https://api.spotify.com/v1/tracks/4huBDGP4I3S0...,https://api.spotify.com/v1/audio-analysis/4huB...,256000,4


In [11]:
# Load data
#all_songs = pd.read_csv('spotify_data.csv')
all_songs = pd.read_csv('all_time_billboard_wrap_up_cleaned_spotify.csv')
songs = all_songs[all_songs['Year'] > 2008]
# display(songs)

In [12]:
songs['Is hit'] = [1 if i <= 10 else 0 for i in songs['Position']]

In [13]:
# Count hits and non-hits
songs['Is hit'].value_counts()

0    1042
1     133
Name: Is hit, dtype: int64

In [14]:
# Split data into train and test subsets
X = songs[['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']]
y = songs['Is hit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 55)

print('Train data shape: {}'.format(X_train.shape))
print('Test data shape: {}'.format(X_test.shape))

Train data shape: (881, 13)
Test data shape: (294, 13)


In [15]:
# Handling imbalanced train data with smote
oversample = SMOTE()
X_smote, y_smote = oversample.fit_resample(X_train, y_train)

In [16]:
print(X_smote.shape, y_smote.shape)

(1560, 13) (1560,)


In [17]:
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 3)
random_forest.fit(X_smote, y_smote)
y_pred = random_forest.predict(X_test)

In [18]:
# Count predicted values
pd.Series(y_pred).value_counts()

0    270
1     24
dtype: int64

In [19]:
print("Train accuracy: ", random_forest.score(X_smote, y_smote))
print("Test accuracy: ", random_forest.score(X_test, y_test))

Train accuracy:  0.9814102564102564
Test accuracy:  0.8503401360544217


In [20]:
# Data from the last week
display(billboard_weekly_wrap_up)

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,1,Harry Styles,As It Was,Harry Styles,4LRPiXqCikLlN15c3yImP7,0.520,0.731,6,-5.338,0,...,0.3110,0.662,173.930,audio_features,4LRPiXqCikLlN15c3yImP7,spotify:track:4LRPiXqCikLlN15c3yImP7,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,167303,4
1,2,Jack Harlow,First Class,Jack Harlow,1rDQ4oMwGJI7B4tovsBOxc,0.905,0.563,8,-6.135,1,...,0.1130,0.324,106.998,audio_features,1rDQ4oMwGJI7B4tovsBOxc,spotify:track:1rDQ4oMwGJI7B4tovsBOxc,https://api.spotify.com/v1/tracks/1rDQ4oMwGJI7...,https://api.spotify.com/v1/audio-analysis/1rDQ...,173948,4
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp,0.761,0.525,11,-6.900,1,...,0.0921,0.531,80.870,audio_features,3USxtqRwSYz57Ewm6wWRMp,spotify:track:3USxtqRwSYz57Ewm6wWRMp,https://api.spotify.com/v1/tracks/3USxtqRwSYz5...,https://api.spotify.com/v1/audio-analysis/3USx...,238805,4
3,4,Latto,Big Energy,Latto,6Zu3aw7FfjAF9WA0fA81Oq,0.935,0.807,11,-3.838,0,...,0.3490,0.813,106.017,audio_features,6Zu3aw7FfjAF9WA0fA81Oq,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq,https://api.spotify.com/v1/tracks/6Zu3aw7FfjAF...,https://api.spotify.com/v1/audio-analysis/6Zu3...,173182,4
4,5,Imagine Dragons X JID,Enemy,Imagine Dragons,1r9xUipOqoNwggBpENDsvJ,0.728,0.783,11,-4.424,0,...,0.4340,0.555,77.011,audio_features,1r9xUipOqoNwggBpENDsvJ,spotify:track:1r9xUipOqoNwggBpENDsvJ,https://api.spotify.com/v1/tracks/1r9xUipOqoNw...,https://api.spotify.com/v1/audio-analysis/1r9x...,173381,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,96,Imagine Dragons,Bones,Imagine Dragons,0HqZX76SFLDz2aW8aiqi7G,0.773,0.742,5,-3.678,0,...,0.0754,0.571,114.071,audio_features,0HqZX76SFLDz2aW8aiqi7G,spotify:track:0HqZX76SFLDz2aW8aiqi7G,https://api.spotify.com/v1/tracks/0HqZX76SFLDz...,https://api.spotify.com/v1/audio-analysis/0HqZ...,165265,4
94,97,Queen Naija & Big Sean,Hate Our Love,Queen Naija,3pEINn37iH5fx8TpXYNGIO,0.662,0.655,2,-5.440,1,...,0.1240,0.356,82.079,audio_features,3pEINn37iH5fx8TpXYNGIO,spotify:track:3pEINn37iH5fx8TpXYNGIO,https://api.spotify.com/v1/tracks/3pEINn37iH5f...,https://api.spotify.com/v1/audio-analysis/3pEI...,226374,4
95,98,Lucky Daye,Over,Lucky Daye,1DOgkaR5Gqa8JtPEIbQORC,0.677,0.541,6,-6.057,0,...,0.1250,0.379,103.369,audio_features,1DOgkaR5Gqa8JtPEIbQORC,spotify:track:1DOgkaR5Gqa8JtPEIbQORC,https://api.spotify.com/v1/tracks/1DOgkaR5Gqa8...,https://api.spotify.com/v1/audio-analysis/1DOg...,205276,4
96,99,"Fivio Foreign, Kanye West & Alicia Keys",City Of Gods,"Fivio Foreign, Kanye West",4huBDGP4I3S0pYI0EaRN1c,0.474,0.801,8,-5.978,0,...,0.3220,0.497,147.356,audio_features,4huBDGP4I3S0pYI0EaRN1c,spotify:track:4huBDGP4I3S0pYI0EaRN1c,https://api.spotify.com/v1/tracks/4huBDGP4I3S0...,https://api.spotify.com/v1/audio-analysis/4huB...,256000,4


In [21]:
# Delete songs from last week which appeared in year-end chart in 2021
# cond = billboard_weekly_wrap_up['Spotify ID'].isin(songs['Spotify ID'])
# billboard_weekly_wrap_up.drop(billboard_weekly_wrap_up[cond].index, inplace = True)
# display(billboard_weekly_wrap_up)

In [22]:
# Results
chart_audio_features = billboard_weekly_wrap_up[['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']]
billboard_weekly_wrap_up['Predict as hit'] = random_forest.predict(chart_audio_features)
display(billboard_weekly_wrap_up[billboard_weekly_wrap_up['Predict as hit'] == 1])

Unnamed: 0,Position,Artist,Song Title,Split Names,Spotify ID,danceability,energy,key,loudness,mode,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,Predict as hit
2,3,Glass Animals,Heat Waves,Glass Animals,3USxtqRwSYz57Ewm6wWRMp,0.761,0.525,11,-6.9,1,...,0.531,80.87,audio_features,3USxtqRwSYz57Ewm6wWRMp,spotify:track:3USxtqRwSYz57Ewm6wWRMp,https://api.spotify.com/v1/tracks/3USxtqRwSYz5...,https://api.spotify.com/v1/audio-analysis/3USx...,238805,4,1
12,13,GAYLE,abcdefu,GAYLE,4fouWK6XVHhzl78KzQ1UjL,0.695,0.54,4,-5.692,1,...,0.415,121.932,audio_features,4fouWK6XVHhzl78KzQ1UjL,spotify:track:4fouWK6XVHhzl78KzQ1UjL,https://api.spotify.com/v1/tracks/4fouWK6XVHhz...,https://api.spotify.com/v1/audio-analysis/4fou...,168602,4,1
15,16,The Weeknd & Ariana Grande,Save Your Tears,The Weeknd,5QO79kh1waicV47BqGRL3g,0.68,0.826,0,-5.487,1,...,0.644,118.051,audio_features,5QO79kh1waicV47BqGRL3g,spotify:track:5QO79kh1waicV47BqGRL3g,https://api.spotify.com/v1/tracks/5QO79kh1waic...,https://api.spotify.com/v1/audio-analysis/5QO7...,215627,4,1
17,18,Dua Lipa,Levitating,Dua Lipa,5nujrmhLynf4yMoMtj8AQF,0.702,0.825,6,-3.787,0,...,0.915,102.977,audio_features,5nujrmhLynf4yMoMtj8AQF,spotify:track:5nujrmhLynf4yMoMtj8AQF,https://api.spotify.com/v1/tracks/5nujrmhLynf4...,https://api.spotify.com/v1/audio-analysis/5nuj...,203064,4,1
31,32,Megan Thee Stallion & Dua Lipa,Sweetest Pie,Megan Thee Stallion,7mFj0LlWtEJaEigguaWqYh,0.814,0.628,7,-7.178,1,...,0.677,123.977,audio_features,7mFj0LlWtEJaEigguaWqYh,spotify:track:7mFj0LlWtEJaEigguaWqYh,https://api.spotify.com/v1/tracks/7mFj0LlWtEJa...,https://api.spotify.com/v1/audio-analysis/7mFj...,201334,4,1
34,35,Olivia Rodrigo,Good 4 U,Olivia Rodrigo,4ZtFanR9U6ndgddUvNcjcG,0.563,0.664,9,-5.044,1,...,0.688,166.928,audio_features,4ZtFanR9U6ndgddUvNcjcG,spotify:track:4ZtFanR9U6ndgddUvNcjcG,https://api.spotify.com/v1/tracks/4ZtFanR9U6nd...,https://api.spotify.com/v1/audio-analysis/4ZtF...,178147,4,1
41,44,Lauren Spencer-Smith,Fingers Crossed,Lauren Spencer-Smith,3yMC1KsTwh0ceXdIe4QQAQ,0.56,0.473,5,-7.23,1,...,0.441,109.414,audio_features,3yMC1KsTwh0ceXdIe4QQAQ,spotify:track:3yMC1KsTwh0ceXdIe4QQAQ,https://api.spotify.com/v1/tracks/3yMC1KsTwh0c...,https://api.spotify.com/v1/audio-analysis/3yMC...,175345,4,1
97,100,Pusha T,Brambleton,Pusha T,7qQMtg44hxQ0ga7NhyyNxR,0.739,0.658,1,-4.145,1,...,0.636,80.955,audio_features,7qQMtg44hxQ0ga7NhyyNxR,spotify:track:7qQMtg44hxQ0ga7NhyyNxR,https://api.spotify.com/v1/tracks/7qQMtg44hxQ0...,https://api.spotify.com/v1/audio-analysis/7qQM...,170911,4,1
