# Import Statements

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances, cosine_distances, euclidean_distances
from spotifyClient import SpotifyAPI


pd.set_option('display.max_columns', None)

# Read in Data

In [2]:
df = pd.read_csv('../data/spotify_final.csv').drop('Unnamed: 0', axis=1)

In [3]:
df.head()

Unnamed: 0,song,album,artist,popularity,track_id,track_explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,num_samples,duration,end_of_fade_in,start_of_fade_out,tempo_confidence,time_signature_confidence,key_confidence,mode_confidence,genres,year,decade
0,el paso city,The Essential Marty Robbins 1951-1982,marty robbins,41,4CIaUS9qVxS6RsQBnC37EU,0,0.597,0.472,0,-11.721,1,0.0342,0.828,2e-06,0.144,0.561,107.59,251773,4,5551602,251.77333,0.11723,244.99956,0.767,0.708,0.914,0.788,"['cowboy western', 'nashville sound']",1960,1960
1,faleena from el paso,The Drifter,marty robbins,38,2jqx9Oq9ZErm5ywDblnvHi,0,0.644,0.352,4,-10.562,1,0.0358,0.874,0.0,0.14,0.643,96.896,498387,3,10989426,498.38666,0.24721,492.39075,0.378,0.607,0.89,0.714,"['cowboy western', 'nashville sound']",1960,1960
2,running bear remastered,Golden Selection (Remastered),johnny preston,32,0x6gSfnYA91AHPLvULn5NK,0,0.762,0.419,5,-9.312,0,0.0653,0.744,0.0,0.144,0.765,119.964,158213,4,3488604,158.21333,0.0,151.93398,0.639,1.0,0.766,0.602,"['brill building pop', 'deep adult standards',...",1960,1960
3,running bear,Greatest Hits,johnny preston,36,1RYznli2VNO7FCbW1Hq4KM,0,0.772,0.297,5,-14.679,0,0.053,0.854,8e-06,0.125,0.822,119.987,158200,4,3488310,158.2,1.13624,151.88753,0.7,1.0,0.686,0.585,"['brill building pop', 'deep adult standards',...",1960,1960
4,teen angel,The Lovin' Touch,mark dinning,30,3PymNAkWROfyEVeYq6XtjD,0,0.584,0.0863,0,-15.537,1,0.0403,0.775,0.0,0.212,0.46,101.493,158200,4,3488310,158.2,0.30227,151.19093,0.438,0.767,0.886,0.725,[],1960,1960


# Sample Search

In [28]:
class RecommendSong:
    col_name = [
        'song', 'album', 'artist', 'popularity', 'track_id',
       'track_explicit', 'danceability', 'energy', 'key', 'loudness', 'mode',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature', 'num_samples',
       'duration', 'end_of_fade_in', 'start_of_fade_out', 'tempo_confidence',
       'time_signature_confidence', 'key_confidence', 'mode_confidence']
    df = pd.read_csv('../data/spotify_final.csv').drop('Unnamed: 0', axis=1)
    
    def __init__(self, song_name, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.song_name = song_name
        self.song_id = None
        self.track_data = []
        
    def find_song(self):
        spotify = SpotifyAPI()
        song_id = 0
        song_name = self.song_name
        i = 0
        song_search = spotify.search({"track": song_name}, search_type="track")
        if len(song_search['tracks']['items']) > 0:
            while i < len(song_search['tracks']['items']):
                if self.song_name == song_search['tracks']['items'][i]['name']:
                    song_id = song_search['tracks']['items'][i]['id']
                    return song_id
                else:
                    i += 1
            if song_id != 0:
                return song_id
            else:
                return 404
        else:
            return 404
        return 404
            
    def get_track_data(self):
        song_id = self.song_id
        spotify = SpotifyAPI()
        track = spotify.get_track(song_id)
        self.track_data.append(track['name'])
        self.track_data.append(track['album']['name'])
        self.track_data.append(track['artists'][0]['name'])
        self.track_data.append(track['popularity'])
        self.track_data.append(track['id'])
        self.track_data.append(int(track['explicit']))
    
    def get_track_features(self):
        song_id = self.song_id
        spotify = SpotifyAPI()
        track_features = spotify.get_features(song_id)
        self.track_data.append(track_features['danceability'])
        self.track_data.append(track_features['energy'])
        self.track_data.append(track_features['key'])
        self.track_data.append(track_features['loudness'])
        self.track_data.append(track_features['mode'])
        self.track_data.append(track_features['speechiness'])
        self.track_data.append(track_features['acousticness'])
        self.track_data.append(track_features['instrumentalness'])
        self.track_data.append(track_features['liveness'])
        self.track_data.append(track_features['valence'])
        self.track_data.append(track_features['tempo'])
        self.track_data.append(track_features['duration_ms'])
        self.track_data.append(track_features['time_signature'])
        
    def get_track_analysis(self):
        song_id = self.song_id
        spotify = SpotifyAPI()
        track_analysis = spotify.get_analysis(song_id)
        self.track_data.append(track_analysis['track']['num_samples'])
        self.track_data.append(track_analysis['track']['duration'])
        self.track_data.append(track_analysis['track']['end_of_fade_in'])
        self.track_data.append(track_analysis['track']['start_of_fade_out'])
        self.track_data.append(track_analysis['track']['tempo_confidence'])
        self.track_data.append(track_analysis['track']['time_signature_confidence'])
        self.track_data.append(track_analysis['track']['key_confidence'])
        self.track_data.append(track_analysis['track']['mode_confidence'])
        
    def search(self):
        self.song_id = self.find_song()
        if self.song_id == 404:
            raise Exception("Song not found.")
        else:
            self.get_track_data()
            self.get_track_features()
            self.get_track_analysis()
        return pd.DataFrame([self.track_data], columns=self.col_name)
    
    def print_recommendations(self, indi, rec_df):
        print(f"For the song {self.song_name} by {rec_df['artist'].iloc[indi[0]]}, we recommend you check out:\n")
        artist_already_featured = []
        c = 0
        for i in indi[1:]:
            if rec_df['artist'].iloc[i] in artist_already_featured:
                pass
            else:
                print(f"{c+1}. {rec_df['song'].iloc[i].title()} by {rec_df['artist'].iloc[i].title()}\n")
                artist_already_featured.append(rec_df['artist'].iloc[i])
                c += 1
                if c >= 10:
                    break
        
    
    def recommend(self):
        search_df = self.search()
        rec_df = pd.concat([df, search_df], ignore_index=True)
        features = [x for x in df.columns if x not in ['song', 'album', 'artist', 'track_id', 'year', 'decade', 'genres']]
        cosine_similarities = cosine_similarity(rec_df[features])
        indicies = pd.Series(rec_df.index, index=rec_df['song'])
        idx = indicies[self.song_name]
        sim_scores = list(enumerate(cosine_similarities[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        indi = [i[0] for i in sim_scores]
        return self.print_recommendations(indi, rec_df)

In [29]:
test = RecommendSong('Bad Guy')

In [30]:
test_recommendation = test.recommend()

For the song Bad Guy by Eminem, we recommend you check out:

1. Hotel California  Live On Mtv, 1994 by Eagles

2. One More Try by George Michael

3. Three Times A Lady by Commodores

4. Keep On Truckin by Eddie Kendricks

5. Reunited by Peaches & Herb

6. Hard To Say Im Sorry  Get Away by Chicago

7. Good Times  2018 Remaster by Chic

8. Faleena From El Paso by Marty Robbins

9. Blaze Of Glory by Bon Jovi

10. Love Hangover by Diana Ross



In [None]:
test_recommendation

In [None]:
new_df = pd.concat([df, test_df], ignore_index=True)

In [None]:
new_df

In [None]:
features = [x for x in df.columns if x not in ['song', 'album', 'artist', 'track_id', 'year', 'decade', 'genres']]

In [None]:
cosine_similarities = cosine_similarity(new_df[features])

In [None]:
indicies = pd.Series(new_df.index, index=new_df['song'])

In [None]:
idx = indicies['Floodgates']

In [None]:
sim_scores = list(enumerate(cosine_similarities[idx]))

In [None]:
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]

In [None]:
indi = [i[0] for i in sim_scores]

In [None]:
new_df['song'].iloc[indi]