# Get Songs from Billboard

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from time import sleep

In [2]:
url = "https://www.billboard.com/charts/hot-100"

In [3]:
response = requests.get(url)
response.status_code # 200 status code means OK!

200

In [325]:
soup = BeautifulSoup(response.content, "html.parser")

In [6]:
artist = []
title = []
last_week = []
peak_rank = []
weeks_on_charts = []

for i in range(100):
    title.append(soup.select(".chart-element__information")[i].get_text().split('\n')[1])
    artist.append(soup.select(".chart-element__information")[i].get_text().split('\n')[2])
    last_week.append(soup.select(".chart-element__information")[i].get_text().split('\n')[5].split(' ')[0])
    peak_rank.append(soup.select(".chart-element__information")[i].get_text().split('\n')[6].split(' ')[0])
    weeks_on_charts.append(soup.select(".chart-element__information")[i].get_text().split('\n')[7].split(' ')[0])

In [7]:
rank = list(range(1,101))

In [8]:
billboard_df = pd.DataFrame({'rank':rank, 'artist':artist, 'title':title, 'last_week_rank':last_week, 'peak_rank':peak_rank, 'weeks_on_charts': weeks_on_charts}, index = rank)

In [9]:
billboard_df['last_week_rank']=billboard_df['last_week_rank'].replace('-','0')

In [10]:
billboard_df['last_week_rank']=billboard_df['last_week_rank'].astype(int)
billboard_df['peak_rank']=billboard_df['peak_rank'].astype(int)
billboard_df['weeks_on_charts']=billboard_df['weeks_on_charts'].astype(int)

In [11]:
billboard_df

Unnamed: 0,rank,artist,title,last_week_rank,peak_rank,weeks_on_charts
1,1,Olivia Rodrigo,Drivers License,1,1,6
2,2,Ariana Grande,34+35,6,2,16
3,3,Lil Tjay Featuring 6LACK,Calling My Phone,0,3,1
4,4,The Weeknd,Blinding Lights,3,1,63
5,5,Cardi B,Up,2,2,2
...,...,...,...,...,...,...
96,96,Jordan Davis,Almost Maybes,96,95,4
97,97,Saweetie Featuring Jhene Aiko,Back To The Streets,75,58,13
98,98,Juice WRLD & Young Thug,Bad Boy,83,22,5
99,99,YBN Nahmir Featuring 21 Savage,Opp Stoppa,0,97,2


# Get songs from Wikipedia

In [193]:
url2 = "https://en.wikipedia.org/wiki/Triple_J_Hottest_100"

In [194]:
response_ = requests.get(url2)
response_.status_code # 200 status code means OK!

200

In [326]:
soup_ = BeautifulSoup(response_.content, "html.parser")

In [269]:
right

'Never Be like You'

In [283]:
artist = []
title = []
for item in range(len(soup_.select('table.wikitable > tbody > tr > td > ol > li '))):
    song = soup_.select('table.wikitable > tbody > tr > td > ol > li ')[item].get_text()
    artist.append(song.split('–')[0])
    title.append(song.split('–')[1])
    
for t in range(len(title)):
    try:
        title[t] = title[t].split('"')[1]
    except:
        continue

In [286]:
Triple_J_Hottest = pd.DataFrame([title,artist]).T
Triple_J_Hottest.columns = ['title', 'artist']

# Get Spotify Features

In [12]:
from spotify_client import *
import pandas as pd
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [13]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= client_id,
                                                           client_secret= client_secret))

In [14]:
def features(tracks):
    
    song_features = {}
    
    for i in range(len(tracks)):
        song_features[tracks[i]['track']['id']] = sp.audio_features(tracks[i]["track"]["uri"])
    
    return song_features

# Clustering

In [15]:
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [343]:
X = pd.read_csv('data.csv')

In [344]:
X.set_index('id', inplace = True)

In [348]:
to_drop = ['name', 'artists', 'explicit','key', 'mode', 'popularity', 'release_date', 'year', 'duration_ms']

In [349]:
X.drop(to_drop, axis = 1, inplace = True)

In [350]:
X_prep = StandardScaler().fit_transform(X)
transformer = StandardScaler().fit(X)

In [351]:
K = range(2, 20)
inertia = []

for k in K:
    kmeans = KMeans(n_clusters=k,
                    random_state=1234)
    kmeans.fit(X_prep)
    inertia.append(kmeans.inertia_)

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(16,8))
plt.plot(K, inertia, 'bx-')
plt.xlabel('k')
plt.ylabel('inertia')
plt.xticks(np.arange(min(K), max(K)+1, 1.0))
plt.title('Elbow Method showing the optimal k')

KeyboardInterrupt: 

In [15]:
K = range(2, 20)
silhouette = []

for k in K:
    kmeans = KMeans(n_clusters=k,
                    random_state=1234)
    kmeans.fit(X_prep)
    silhouette.append(silhouette_score(X_prep, kmeans.predict(X_prep)))


plt.figure(figsize=(16,8))
plt.plot(K, silhouette, 'bx-')
plt.xlabel('k')
plt.ylabel('silhouette score')
plt.xticks(np.arange(min(K), max(K)+1, 1.0))
plt.title('Silhouette Method showing the optimal k')

KeyboardInterrupt: 

In [352]:
kmeans = KMeans(n_clusters=40, random_state=1234)
kmeans.fit(X_prep)

KMeans(n_clusters=40, random_state=1234)

In [353]:
clusters = kmeans.predict(X_prep)
#pd.Series(clusters).value_counts().sort_index()
X_df = pd.DataFrame(X)
X_df["cluster"] = clusters

# Insert Song

In [23]:
# Check if Song is on Billboard or Triple_J_Hottest_100 

In [354]:
def check_Triple_J_Hottest(search):

    from difflib import SequenceMatcher
    from IPython.display import clear_output
    in_Triple_J_Hottest = 0
    

    for title in range(len(Triple_J_Hottest['title'])):
        if (SequenceMatcher(None, search.lower(), Triple_J_Hottest['title'].iloc[title].lower()).ratio()) > 0.8:
            print('>> Your Song is in the Triple_J_Hottest Top 100!!! <<')
            print('\nHere are some other hot songs:\n')

            in_Triple_J_Hottest = 1
            more_songs = 'yes'
            while more_songs != 'no':

                recommendations_df = Triple_J_Hottest.sample(n=5)

                for r in range(len(recommendations_df['title'])):
                    print(recommendations_df['title'].iloc[r], ' - ', recommendations_df['artist'].iloc[r])
                    try:
                        print(sp.search(recommendations_df['title'].iloc[r], type='track')['tracks']['items'][0]['external_urls']['spotify'])
                    except: continue
                    print()

                more_songs = input('Do you want more songs? [Yes/No]').lower()
                clear_output()
        else: continue 
                   
    return in_Triple_J_Hottest

In [355]:
def check_billboard(search):

    from difflib import SequenceMatcher
    from IPython.display import clear_output
    in_billboard = 0
    

    for title in range(len(billboard_df['title'])):
        if (SequenceMatcher(None, search.lower(), billboard_df['title'].iloc[title].lower()).ratio()) > 0.8:
            print('>> Your Song is in the Billboard Top 100!!! <<')
            print('\nHere are some other hot songs:\n')

            in_billboard = 1
            more_songs = 'yes'
            while more_songs != 'no':

                recommendations_df = billboard_df.sample(n=5)

                for r in range(len(recommendations_df['title'])):
                    print(recommendations_df['title'].iloc[r], ' - ', recommendations_df['artist'].iloc[r])
                    
                    try:
                        print(sp.search(recommendations_df['title'].iloc[r], type='track')['tracks']['items'][0]['external_urls']['spotify'])
                    except: continue
                    print()

                more_songs = input('Do you want more songs? [Yes/No]').lower()
                clear_output()
        else: continue 
                   
    return in_billboard

In [359]:
def recommender():

            
        
    from IPython.display import clear_output
    print('\n ======== WELCOME ========\n')
    print('Here you can find similar songs to your favorite hits!!\n')
    
    search = input('\n\nPlease insert artist and title: ')
    clear_output()
    
    in_Triple_J_Hottest = check_Triple_J_Hottest(search)

    in_billboard = check_billboard(search)
    
    # print(in_Triple_J_Hottest, in_billboard)


    if in_billboard == 0 and in_Triple_J_Hottest == 0:

        print('\n============================================')
        print('Which Song Do You Mean?')
        print('============================================')

        for i in range(len(sp.search(search, type='track', offset = 0, limit = 5)['tracks']['items'])):
            print(i, ' ',sp.search(search)['tracks']['items'][i]['artists'][0]['name'], '-', sp.search(search)['tracks']['items'][i]['name'])

        result = int(input('\nNumber: '))

        more_songs = 'yes'

        while more_songs != 'no':


            uri = sp.search(search, type='track')['tracks']['items'][result]['uri']

            search_feature = sp.audio_features(uri)
            search_song_id = sp.search(search, type='track')['tracks']['items'][result]['id']

            search_df = pd.json_normalize(search_feature)
            search_df['id'] = search_song_id
            search_df.set_index('id', inplace = True)
            search_df = search_df[['acousticness', 'danceability', 'energy',
                   'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo',
                   'valence']]

            search_df_transformed = transformer.transform(search_df)

            search_df['cluster'] = kmeans.predict(search_df_transformed)

            cluster_df = X.loc[X['cluster'] == search_df['cluster'][0]]
            recommendations_df = cluster_df.sample(n=5)


            clear_output()

            print('\n\n>> Here are your recommendations: <<\n')

            for r in recommendations_df.index:
                print(sp.track(r)['name'], ' - ', sp.track(r)['artists'][0]['name'])
                print(sp.track(r)['external_urls']['spotify'])
                print()

            more_songs = input('\n\nDo you want more recommendations?  [Yes/No]').lower()

    clear_output()        
    
    new_song = input('\nDo you want to find similar songs for another song? [Yes/No]').lower()
    
    if new_song == 'yes':
        clear_output()
        recommender()
    else:
        clear_output()
        print('\nThanks and see you again!\n')


# Get your songs!

In [360]:
recommender()


Thanks and see you again!

