In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
from IPython.display import IFrame
from IPython.display import display

In [3]:
import numpy as np
import pickle

In [None]:
# Refer to "Week 6_Project_model-scaler-csv" for original modelling and dataframe creation

In [4]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [5]:
scaler = load("spotify_scaler.pickle")
model = load("spotify_24.pickle")

In [6]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [7]:
url = "https://www.billboard.com/charts/hot-100/"
response = requests.get(url)
soup = BeautifulSoup(response.content)

In [8]:
def extract_billboard_100(soup):
    
    titles = []
    artists = []

    for title in soup.select('h3.c-title.a-no-trucate.a-font-primary-bold-s'):
        titles.append(title.text.strip())

    for artist in soup.select('span.c-label.a-no-trucate.a-font-primary-s'):
        artists.append(artist.text.strip())

    billboard_df = pd.DataFrame({
        'Title': titles,
        'Artist': artists
    })

    return billboard_df

bb100_df = extract_billboard_100(soup)

In [9]:
af_df = pd.read_csv('af_df.csv')

In [10]:
X = af_df.select_dtypes(include=['number'])
X.drop('duration_ms',axis=1,inplace=True)

In [11]:
X_scaled = scaler.transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns = X.columns)

labels = model.labels_
clusters = model.predict(X_scaled_df)
pd.Series(clusters).value_counts().sort_index()
X["cluster"] = labels

In [12]:
def play_user_song():
    user_input = input('Please enter a song name (include artist name for more accuracy): \n').strip().lower().replace(',',' ')
   
    song_search = sp.search(user_input, type="track", limit=1, market="GB")
    track_id = song_search["tracks"]["items"][0]["id"]
    
    print(f"You've chosen: {song_search['tracks']['items'][0]['name']}, by {song_search['tracks']['items'][0]['artists'][0]['name']}.")
    
    iframe = IFrame(src="https://open.spotify.com/embed/track/" + track_id,
                    width="320",
                    height="80",
                    frameborder="0",
                    allowtransparency="true",
                    allow="encrypted-media")
    display(iframe)
    
    return song_search

In [13]:
def play_next_song(song_search):
    song_artist = song_search["tracks"]["items"][0]["artists"][0]["name"].lower()
    song_title = song_search["tracks"]["items"][0]["name"].lower()
  
    # check if song artist and title is from billboard top 100 
    bb100_match = bb100_df[(bb100_df['Title'].str.lower() == song_title) & (bb100_df['Artist'].str.lower() == song_artist)]
    if not bb100_match.empty:
        
        # if yes, play next random song from bb100
        random_bb100 = bb100_df.sample()
        next_bb100_title = random_bb100['Title']
        next_bb100_artist = random_bb100['Artist'].str.slice()
        bb100_next_track = sp.search(q="{next_bb100_title}, {next_bb100_artist}", type="track", limit=1, market="GB")
        bb100_next_id = bb100_next_track['tracks']['items'][0]['id']
        print("Your song is hot!\n")
        print(f"We can also recommend {next_bb100_title.values[0]}, by {next_bb100_artist.values[0]} from the Billboard Hot 100 list.\n")

        iframe_bb100 = IFrame(src="https://open.spotify.com/embed/track/"+bb100_next_id,
               width="320", 
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media")
        display(iframe_bb100)
        
  
    # else pick next random song from big playlist to play
    else:
        # get and scale audio features of user input song to predict cluster
        track_id = song_search["tracks"]["items"][0]["id"]
        input_af = pd.DataFrame(
            sp.audio_features(track_id)).select_dtypes(include=['number']).drop('duration_ms',axis=1)
        af_scaled = pd.DataFrame(scaler.transform(input_af), columns = input_af.columns)
        af_scaled["cluster"] = model.predict(af_scaled)
        
        # get a inner merged df with just the matching cluster of songs and their ids
        song_cluster_df = pd.DataFrame(X)
        song_cluster_df = song_cluster_df[song_cluster_df['cluster'] == af_scaled['cluster'].iloc[0]]
        next_song_df = af_df.merge(
            song_cluster_df['cluster'], left_index=True, right_index=True, how='inner')
        
        # choose random song from clustered df and play
        next_song = next_song_df.sample()
        next_song_id = str(next_song['id'].values[0])
        next_song_details = sp.track(next_song_id)
        next_song_artist = next_song_details['artists'][0]['name']
        next_song_title = next_song_details['name']
        
        print("You've got great taste!\n")
        print(f"We can also recommend {next_song_title}, by {next_song_artist} from a similar genre.\n")
        
        iframe_playlist = IFrame(src="https://open.spotify.com/embed/track/"+next_song_id,
               width="320", 
               height="80",
               frameborder="0",
               allowtransparency="true",
               allow="encrypted-media")
        display(iframe_playlist)

In [23]:
song_search = play_user_song()

Please enter a song name (include artist name for more accuracy): 
in the end linkin park
You've chosen: In the End, by Linkin Park.


In [24]:
next_song = play_next_song(song_search)

You've got great taste!

We can also recommend Island In The Sun, by Weezer from a similar genre.

