# Music Recommender

## Functions for Spotipy

#### Actions
If song not in scraped top 100 dataframe...
1. get songs and artists from spotipy playlist and conduct same check
    1. fetch songs, artists, audiofeatures
    2. make dataframe
    3. check
2. if song in spotify playlist, recommend another song from the playlist
    1. get random song
    2. check if its the same song as the requested one
        1. if yes, get different random song
        2. else, recommend song
3. if song not in playlist, submit song to spotify for audio features

In [7]:
# libraries
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import getpass
import numpy as np
import plotly
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from sklearn.cluster import KMeans
from sklearn import datasets

# LOGIN
#client_id = str(getpass.getpass('client_id?'))
#client_secret = str(getpass.getpass('client_secret?'))
client_id = '656c9257e8044fe98858ea5f578096bd'
client_secret = '9b459f6afdf244168d86bac6cce0a90e'
sp=spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id = client_id, client_secret= client_secret))


# formats the dataframe - lowercases and removes special characters and spaces from df
def format_df(df):
    df_lower = df.applymap(lambda s: s.lower() if type(s) == str else s)
    formatter = lambda s: ''.join(e for e in s if e.isalnum())
    df_clean = df_lower.applymap(formatter)
    return df_clean



# gets closest Spotipy matches to user query, asks user to choose and returns selection as dict
def get_closest_matches(song, df_clean, df):
    
    # fetch 5 song results from spotipy
    results = sp.search(q=f"track:{song}", limit=6, offset=0, type='track', market=None)
    
    # add song names, artist names and ids from results to dict
    items = results['tracks']['items']
    results_dict = {}
    for i in range(len(items)):
        results_dict[str(i)] = [items[i]['artists'][0]['name'], items[i]['name'], items[i]['id']]
    
    # propose options to user and get decision
    print('CLOSEST MATCHES\n')
    for item in results_dict:
        print(f'{item}. \"{results_dict[item][1]}\" by {results_dict[item][0]}')

    user_response = input('''\nENTER the NUMBER (0-5) of the CLOSEST MATCH for your search, 
    or type "n" to try a new search: \n''')
    
    if user_response == 'n':
        return 'new_search'
    elif user_response in ('0', '1', '2', '3', '4', '5'):
        return results_dict[user_response]
    else:
        print("That didn't work...please try again")
        get_closest_matches(song, df_clean, df)


# function which handles more than 100 results
def get_playlist_tracks(user_id, playlist_id):
        
    # Query Spotify
    print('Fetching songs from Spotify...')
    results = sp.user_playlist_tracks(user_id, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks



# gets songs from chosen spotify playlist
def fetch_hot_songs_spotify(match):

    # specify playlists
    #user_id = 'maka_97'
    #playlist_id = '6mtYuOxzl58vSGnEDtZ9uB'
    # WORKS FINE
    user_id = '12174441449' 
    playlist_id = '1T2VEWeXUzUzZ6piswWBjZ'
    '''---------------------------ENTER PLAYLIST INFORMATION HERE-----------------------'''
    #user_id = 'Peter Endacott'
    #playlist_id = '6brlZremfjqciPBepLAPlW'
    #user_id = 'Jevacat'
    #playlist_id = '181lAD9vYEewIW4xoYsFoM'

    # get songs from playlist
    playlist = get_playlist_tracks(user_id, playlist_id)
     
    # append songs and artists to lists
    song_ids = [r["track"]["id"] for r in playlist]
    songs = [r["track"]["name"] for r in playlist]
    artists = [r["track"]["artists"][0]['name'] for r in playlist]
    
    '''ADD IN SONG MATCH'''
    song_ids.append(match[2])
    songs.append(match[1])
    artists.append(match[0])
    
    # write to DataFrame
    songs_df = pd.DataFrame({'song_ids':song_ids, 'songs':songs, 'artists':artists})
    
    # break song_ids into chunks of 100
    blocks = [song_ids[x:x+100] for x in range(0, len(song_ids), 100)]
    
    # get audio features for fetched Spotify songs
    print('Fetching audio features...')
    # submit blocks to spotify and add results to list
    feats = []
    for x in tqdm(range(len(blocks))):
        feats.append(sp.audio_features(tracks= blocks[x])) # returns list with blocks of dicts
    
    # add each dict in each block to new list
    feat_dicts = []
    for block in feats:
        for feat_dict in block:
            feat_dicts.append(feat_dict)
     
    feats_df = pd.DataFrame(feat_dicts)
    
    # join dataframes 
    df = pd.concat([songs_df, feats_df], axis=1)
    
    # export to csv
    df.to_csv('TopSongs_Spotify.csv', index=False) 
    
    # write timestamp to txt file
    timestamp= datetime.today().strftime('%Y-%m-%d')
    with open('last_scrape_spotify.txt', 'w') as f:
        f.write(timestamp)
            
    print('Fetch completed!\n')
    return df


def cluster_and_recommend(df):
    # prepare dataframe
    new_df= df.copy()
    new_df.index = new_df['song_ids']
    new_df.rename_axis(None, inplace=True)
    trimmed_df = new_df.drop(['track_href', 'uri', 'analysis_url', 'id', 'song_ids', 'type', 'songs', 'artists'], axis=1 )

    # apply KMeans
    kmeans = KMeans(n_clusters = 25)
    cluster = kmeans.fit(trimmed_df)

    # add cluster number to ORIGINAL dataframe
    df['cluster'] = cluster.labels_

    # get cluster number of last song in df (i.e. searched song)
    cluster_num = df.iloc[-1]['cluster']
    # delete last row (with searched song)
    search_df = df[:-1]

    # get matching cluster rows
    cluster_df = search_df.loc[search_df['cluster'] == cluster_num]
    # get random sample
    recommendation = cluster_df.sample()

    return recommendation


In [8]:
# import tools
from bs4 import BeautifulSoup
import pandas as pd
import requests
from tqdm.notebook import tqdm
from datetime import datetime
import random
from IPython.display import clear_output
from datetime import datetime

### Functions for search

In [9]:
   
# displays exit message
def say_goodbye():
    print('Thanks for using BEETz! Come back soon!')
    return



# checks for last scrape by reading txt file in which last date was stored
def check_last_scrape():
    with open('last_scrape.txt', 'r') as f:
        date = f.read()
    return date
        
    
    
# random excitement
def exclaim():
    exclamations=['SOO hot right now!', 'So hot its stolen!', 'Sizzzzzling!']
    exclamation = random.choice(exclamations)
    print('\n', exclamation)

    
    
# scrapes songs from online billboard, writes to csv, returns as df and records timestamp in txt file
def fetch_hot_songs():
    print('Fetching hot songs and artists from the web...')
    
    url = "https://www.billboard.com/charts/hot-100/"
    response = requests.get(url)
    website_data = response.text
    soup = BeautifulSoup(website_data, "html.parser")
    
    # append songs and artists to lists
    songs = []
    artists = []
    for i in tqdm(range(100)):
        songs.append(soup.select('h3.c-title.a-no-trucate')[i].get_text(strip=True))
        artists.append(soup.select('span.c-label.a-no-trucate')[i].get_text(strip=True))
    
    # write to DataFrame
    df = pd.DataFrame({'songs':songs, 'artists':artists})
    df.to_csv('TopSongs.csv', index=False) 
    
    # write timestamp to txt file
    timestamp= datetime.today().strftime('%Y-%m-%d')
    with open('last_scrape.txt', 'w') as f:
        f.write(timestamp)
            
    print('Fetch completed!\n')
    return df
    
    
    
# gets random song from scraped billboard and checks if identical with requested song
def get_random_song(song_request, df_clean, df):
    # takes sample from unclean dataframe
    sample = df.sample()
    recommended_song = str(sample.iloc[0, 0])
    recommended_artist = str(sample.iloc[0, 1])
    
    # clean song and check to make sure its not the same song as the one requested
    rec_song_lower = recommended_song.lower()
    rec_song_cleaned = ''.join(e for e in rec_song_lower if e.isalnum())
    if song_request in rec_song_cleaned:
        get_random_song(song_request, df_clean, df)
    else:
        print(f'BEETz recommends {recommended_song} by {recommended_artist}!')
        run_search(df_clean, df)
    
        

        
# Based on run_search, asks user to enter song, formats and checks to see if song is in 'hot' songs
def song_search(df_clean, df):
    song_req = input('Please enter the title of a song you enjoy: \n').lower()
    # remove spaces and special characters
    song_cleaned = ''.join(e for e in song_req if e.isalnum())
    
    # check if song title is in top hits
    if df_clean['songs'].str.contains(song_cleaned).any():
        exclaim()
        get_random_song(song_req, df_clean, df)

    # if not, switch to spotify
    else:
        print('Can\'t find it in the top 100...checking Spotify')
        '''SWITCH TO SPOTIFY''' 
        return song_req
    
    
    
# Based on run_search, asks user to enter artist, formats and checks if artist is in 'hot' artists
def artist_search(df_clean, df):
    artist = input('Please enter the name of an artist you enjoy: \n').lower()
    # remove spaces and special characters
    artist_cleaned = ''.join(e for e in artist if e.isalnum())
    
    # check if artist is in top artists
    if df_clean['artists'].str.contains(artist_cleaned).any():
        exclaim()
        return '-'
    # if not, offer another search
    else:
        print('Your artist doesn\'t seem to be very popular...try again? ')
        run_search(df_clean, df)
            

            
# gets user search preference and executes search accordingly
def run_search(df_clean, df):    
    response = input('''
    Enter 1 to search by song.
    Enter 3 to exit.\n\n''')  
    if response == '1':
        requested_song = song_search(df_clean, df)  
    
    
        '''-----------------------SPOTIPY--------------------------'''
    
        match = get_closest_matches(requested_song, df_clean, df)

        if match == 'new_search':
            run_search(df_clean, df)
        else:
            df_spotify = fetch_hot_songs_spotify(match)

            recommendation = cluster_and_recommend(df_spotify)

            rec_artist = recommendation.iloc[0]['artists']
            rec_song = recommendation.iloc[0]['songs']

            print(f'BEETz recommends {rec_song} by {rec_artist}!')

            run_search(df_clean, df)
        
    #elif response == '2':
     #   x = artist_search(df_clean, df)
    
    elif response == '3':
        say_goodbye()
        return None
    else:
        print('Hm...that didn\'t work. Please try again.')
        run_search(df_clean, df)
        


        

## Primary function

In [10]:
# the MOTHER function

def beetz():
    
    print('''
     ____  ______ ______ _______  
 |  _ \|  ____|  ____|__   __| 
 | |_) | |__  | |__     | |____
 |  _ <|  __| |  __|    | |_  /
 | |_) | |____| |____   | |/ / 
 |____/|______|______|  |_/___|
 ''')
        
    print('HELLO! Welcome to the BEETz song recommender!\n')
    print('BEETz will recommend a song related to the song or artist you enter.\n')
    
    
    # checks for last scrape - if not from today, launches new scrape
    if check_last_scrape() == datetime.today().strftime('%Y-%m-%d'):
        print('We have an up-to-date list of HOT songs from today on file!')
        df_scraped = pd.read_csv('TopSongs.csv', index_col=False)
    else:
        print('We will fetch an up-to-date list of HOT songs from today!')
        df_scraped = fetch_hot_songs()
    
    # lowercase and remove special characters and spaces from df
    df_copy = df_scraped.copy()
    df_clean = format_df(df_copy)
    
    # gets user search preference and executes search accordingly
    if run_search(df_clean, df_copy) == None:
        return
    
 

In [None]:
beetz()


     ____  ______ ______ _______  
 |  _ \|  ____|  ____|__   __| 
 | |_) | |__  | |__     | |____
 |  _ <|  __| |  __|    | |_  /
 | |_) | |____| |____   | |/ / 
 |____/|______|______|  |_/___|
 
HELLO! Welcome to the BEETz song recommender!

BEETz will recommend a song related to the song or artist you enter.

We have an up-to-date list of HOT songs from today on file!

    Enter 1 to search by song.
    Enter 3 to exit.

1
Please enter the title of a song you enjoy: 
hello
Can't find it in the top 100...checking Spotify
CLOSEST MATCHES

0. "Hello" by Adele
1. "Hello (feat. A Boogie Wit da Hoodie)" by Pop Smoke
2. "Hello?" by Clairo
3. "Hello" by KAROL G
4. "Hello My Old Heart" by The Oh Hellos
5. "hello!" by ROLE MODEL

ENTER the NUMBER (0-5) of the CLOSEST MATCH for your search, 
    or type "n" to try a new search: 
0
Fetching songs from Spotify...
Fetching audio features...


  0%|          | 0/19 [00:00<?, ?it/s]

Fetch completed!

BEETz recommends Make It Wit Chu by Queens of the Stone Age!


### Apply clustering

In [None]:
import numpy as np
import plotly
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from sklearn.cluster import KMeans
from sklearn import datasets

In [None]:
recommendation

In [None]:
# Apply clustering 
kmeans = KMeans(n_clusters = 24)
cluster = kmeans.fit(trimmed_df)

In [None]:
trimmed_df['cluster'] = cluster.labels_

In [None]:
trimmed_df.head()

In [None]:
trimmed_df['cluster'].value_counts()