## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from numpy import dot
from numpy.linalg import norm

from sklearn.preprocessing import StandardScaler

Functions from first recommeder notebook will be used. As well as importing the necessary dataframe as it is required for the functions to work.

## Import Datasets

In [2]:
#import data
data_df = pd.read_csv('../data/spotify_songs_1922/cleaned_data.csv')

### create neccesary dataframes

In [3]:
#create dataframe only used to apply cosine similarity calculation on
# only take the numerical columns and drop the numerical ones that do not represent audio features
data_df_cosim = data_df.select_dtypes(include=['int64','float64']).drop(columns=['year','popularity','explicit'])

#concat with id so that we can use the grouby function on it
data_df_cosim = pd.concat([data_df_cosim,data_df.id], axis =1)


In [4]:
#this puts the dataset into a matrix and indexed by the id
song_sim_matrix = data_df_cosim.groupby('id').sum()

song_sim_matrix.head()

Unnamed: 0_level_0,valence,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
000G1xMMuwxNHmwVsBdtj1,0.555,0.0131,0.256,182347,0.895,0.000106,2,0.0821,-4.86,1,0.0707,191.307
000GyYHG4uWmlXieKLij8u,0.146,0.469,0.797,180160,0.898,0.0,1,0.0824,-5.922,0,0.52,89.926
000Npgk5e2SgwGaIsN3ztv,0.494,0.98,0.277,206972,0.145,0.879,3,0.111,-19.898,1,0.0845,75.644
000ZxLGm7jDlWCHtcXSeBe,0.854,0.795,0.685,314667,0.483,0.878,2,0.113,-10.202,0,0.0337,97.694
000jBcNljWTnyjB4YO7ojf,0.969,0.656,0.788,179747,0.808,0.0,5,0.154,-6.59,1,0.0395,113.046


In [5]:
#creating scaling function
def scaler(data):
    ss = StandardScaler()
    return ss.fit_transform(data)

#scale our dataframe
song_sim_matrix_scaled = scaler(song_sim_matrix)

#convert our numpy matrix into dataframe
song_sim_matrix_df = pd.DataFrame(song_sim_matrix_scaled, index=song_sim_matrix.index,columns=song_sim_matrix.columns)

## First Recomender Functions

In [8]:
# Use code from first recommender system
#getting the score 

def sim_score_generator(song,artist,number_of_recommedations):
    
    """takes in song and artist to return list of similar songs"""
    
    #takes artist and song name to find song id
    song_id = data_df[(data_df.main_artist == artist)&(data_df.name == song)]['id'].values[0]
    
    song_array = song_sim_matrix_df.loc[song_id].to_numpy()
    
    #tuple of sim score with song_index
    sim_score = []
    #counting the index
    song_index = -1
    
    #iterate through all songs in dataset
    for song in range(len(data_df)):
        
        song_index += 1

        # convert in to array
        other_song_array = song_sim_matrix_df.iloc[song].to_numpy()
        # sim score calculation
        score = dot(song_array,other_song_array)/(norm(song_array)*norm(other_song_array))
        
        #get other song id
        other_song_id = song_sim_matrix_df.iloc[song].name
        
        # append to list
        song_score_and_id = (score,song_index, other_song_id)
    
        # assign the position index of the song with the score
        sim_score.append(song_score_and_id)
    
    
    #list of N recommendations based on highest sim score and corresponding index
    return(sorted(sim_score,reverse=True)[1:number_of_recommedations+1])

def show_recommendations(top_recomendations):
    #unpack tuples in list of recommendation
    for score,song_idx,other_song_id in top_recomendations:
        #find the id of the from the sim matrix 
        
        sim_song_id = song_sim_matrix_df.iloc[song_idx].name
        
        
        #print the corresponding artist and song to that matches the id from the data_df
        print(f"Artist: {data_df[data_df.id.str.match(sim_song_id)]['main_artist'].values[0]}")
        print(f"Song: {data_df[data_df.id.str.match(sim_song_id)]['name'].values[0]}")
        print(f"Year & Popularity: {data_df[data_df.id.str.match(sim_song_id)]['year'].values[0]}, {data_df[data_df.id.str.match(sim_song_id)]['popularity'].values[0]}\n") 


## Playlist Functions

In [30]:
def create_playlist():
    
    playlist_df = pd.DataFrame()
    
    return playlist_df

def add_song_playlist(song, playlist_df):
    
    """take in song from recommendations
    put into dataframe"""
    
    
    sim_score, song_idx, song_id = song
    
    #get pandas row of song
    song_row = song_sim_matrix_df.loc[[song_id]]
    
    #append to playlist dataframe
    playlist_df = playlist_df.append(song_row)
    
    return playlist_df

def playlist_reccomendations(playlist_df, number_of_recommedations):
    
    """takes in playlist dataframe and recommends n songs"""
    
    #create weights between 0.3 to 1
    weights = np.linspace(0.3,1,len(playlist_df))
    #reshape weight to be 2D
    weights = weights.reshape((len(weights),1))

    
    #convert playlist to an array
    playlist_array = playlist_df.to_numpy()
    
    #apply weights to playlist array
    adjusted_playlist_array = playlist_array*weights
    
    #sum all the song vectors and get average to be compared with other song vectors
    playlist_vector = sum(adjusted_playlist_array)/len(adjusted_playlist_array)
    
    #tuple of sim score with song_index
    sim_score = []
    #counting the index
    song_index = -1
    
    for song in range(len(data_df)):
        
        song_index += 1

        # convert in to array
        other_song_array = song_sim_matrix_df.iloc[song].to_numpy()
        # sim score calculation
        score = dot(playlist_vector,other_song_array)/(norm(playlist_vector)*norm(other_song_array))
        
        #get other song id
        other_song_id = song_sim_matrix_df.iloc[song].name
        
        # append to list
        song_score_and_id = (score,song_index, other_song_id)
    
        # assign the position index of the song with the score
        sim_score.append(song_score_and_id)
    
    
    #list of N recommendations based on highest sim score and corresponding index
    return(sorted(sim_score,reverse=True)[1:number_of_recommedations+1])

def show_current_playlist(playlist_df):
    
    """takes in playlist_df, and returns what is in the current playlist"""
    
    for song in playlist_df.index.to_list():
        
        #print the corresponding artist and song to that matches the id from the data_df
        print(f"Artist: {data_df[data_df.id.str.match(song)]['main_artist'].values[0]}")
        print(f"Song: {data_df[data_df.id.str.match(song)]['name'].values[0]}\n")

I plan to make a playlist for my classmates as this project would be the last we work on. This playlist would be created using the functions made. 

In [31]:
#get Vitamin C song ID
song = "Graduation (Friends Forever)"
artist = "Vitamin C"

vitaminc_song_id = data_df[(data_df.main_artist == artist)&(data_df.name == song)]['id'].values[0]

In [75]:
#create new playlist
dsi_playlist = create_playlist()

In [76]:
#append to dsi_playlist dataframe

dsi_playlist = dsi_playlist.append(song_sim_matrix_df.loc[[vitaminc_song_id]])

In [77]:
#get recommendation for song Graduation (Friends Forever) by Vitamin C

#return 10 similar song with raw similiarity scores and assign into list 
recommendations = sim_score_generator(song,artist,10)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Taylor Swift
Song: All Too Well
Year & Popularity: 2012, 69

Artist: Gladys Knight
Song: Licence To Kill
Year & Popularity: 1989, 50

Artist: Aerosmith
Song: Hole In My Soul
Year & Popularity: 1997, 62

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: TV On The Radio
Song: Young Liars
Year & Popularity: 2003, 58

Artist: South Park Mexican
Song: The System
Year & Popularity: 2001, 41

Artist: Brad Paisley
Song: Waitin' On a Woman
Year & Popularity: 2005, 52

Artist: Travis Scott
Song: Pray 4 Love (feat. The Weeknd)
Year & Popularity: 2015, 64

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: Jamey Johnson
Song: High Cost Of Living
Year & Popularity: 2008, 62



In [78]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[8],dsi_playlist)

In [79]:
show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster



In [80]:
recommendations = playlist_reccomendations(dsi_playlist,10)

show_recommendations(recommendations)

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: Tim McGraw
Song: Please Remember Me
Year & Popularity: 1999, 43

Artist: X Ambassadors
Song: American Oxygen
Year & Popularity: 2015, 59

Artist: Ozzy Osbourne
Song: Dreamer
Year & Popularity: 2001, 65

Artist: Vitamin C
Song: Graduation (Friends Forever)
Year & Popularity: 1999, 55

Artist: Aerosmith
Song: Hole In My Soul
Year & Popularity: 1997, 62

Artist: Porter Robinson
Song: Divinity
Year & Popularity: 2014, 57

Artist: Chris Tomlin
Song: God Of This City
Year & Popularity: 2008, 45

Artist: Boyz II Men
Song: The Color of Love
Year & Popularity: 2002, 50

Artist: Jeff Buckley
Song: Dream Brother
Year & Popularity: 1994, 45



In [81]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[6],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity



In [82]:
#return 10 similar song with raw similiarity scores and assign into list 
recommendations = playlist_reccomendations(dsi_playlist,10)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: Ying Yang Twins
Song: Get Low
Year & Popularity: 2002, 71

Artist: A Foot In Coldwater
Song: (Make Me Do) Anything You Want
Year & Popularity: 1972, 33

Artist: Jhay Cortez
Song: Kobe En LA 2.0
Year & Popularity: 2020, 59

Artist: Chris Tomlin
Song: God Of This City
Year & Popularity: 2008, 45

Artist: Bon Jovi
Song: I'll Be There For You
Year & Popularity: 1988, 68

Artist: Aerosmith
Song: Hole In My Soul
Year & Popularity: 1997, 62

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: Jeff Buckley
Song: Dream Brother
Year & Popularity: 1994, 45

Artist: Boyz II Men
Song: The Color of Love
Year & Popularity: 2002, 50



In [83]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[1],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity

Artist: Ying Yang Twins
Song: Get Low



In [84]:
#return 15 similar song with raw similiarity scores and assign into list 
recommendations = playlist_reccomendations(dsi_playlist,15)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Ying Yang Twins
Song: Get Low
Year & Popularity: 2002, 71

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: A Foot In Coldwater
Song: (Make Me Do) Anything You Want
Year & Popularity: 1972, 33

Artist: Jeff Buckley
Song: Dream Brother
Year & Popularity: 1994, 45

Artist: Roy Jones Jr.
Song: Go Hard, Go Home (feat. Giz, Swellz & Choppa)
Year & Popularity: 2004, 59

Artist: Jhay Cortez
Song: Kobe En LA 2.0
Year & Popularity: 2020, 59

Artist: Aerosmith
Song: Hole In My Soul
Year & Popularity: 1997, 62

Artist: Taylor Swift
Song: The Last Time
Year & Popularity: 2012, 57

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: Pretenders
Song: Hymn to Her - 2007 Remaster
Year & Popularity: 1986, 53

Artist: Chris Tomlin
Song: God Of This City
Year & Popularity: 2008, 45

Artist: AC/DC
Song: Little Lover
Year & Popularity: 1976, 46

Artist: "Christine DClario"
Song: Gloria en Lo Alto
Year & Popularity: 2012, 55

A

In [85]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[12],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity

Artist: Ying Yang Twins
Song: Get Low

Artist: "Christine DClario"
Song: Gloria en Lo Alto



In [86]:
#return 10 similar song with raw similiarity scores and assign into list 
recommendations = playlist_reccomendations(dsi_playlist,10)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Ying Yang Twins
Song: Get Low
Year & Popularity: 2002, 71

Artist: Roy Jones Jr.
Song: Go Hard, Go Home (feat. Giz, Swellz & Choppa)
Year & Popularity: 2004, 59

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: "Christine DClario"
Song: Gloria en Lo Alto
Year & Popularity: 2012, 55

Artist: A Foot In Coldwater
Song: (Make Me Do) Anything You Want
Year & Popularity: 1972, 33

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: AC/DC
Song: Little Lover
Year & Popularity: 1976, 46

Artist: M2M
Song: Pretty Boy
Year & Popularity: 2000, 64

Artist: Highly Suspect
Song: 16
Year & Popularity: 2019, 63

Artist: Jeff Buckley
Song: Dream Brother
Year & Popularity: 1994, 45



In [87]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[7],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity

Artist: Ying Yang Twins
Song: Get Low

Artist: "Christine DClario"
Song: Gloria en Lo Alto

Artist: M2M
Song: Pretty Boy



In [88]:
#return 15 similar song with raw similiarity scores and assign into list 
recommendations = playlist_reccomendations(dsi_playlist,15)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Roy Jones Jr.
Song: Go Hard, Go Home (feat. Giz, Swellz & Choppa)
Year & Popularity: 2004, 59

Artist: M2M
Song: Pretty Boy
Year & Popularity: 2000, 64

Artist: "Christine DClario"
Song: Gloria en Lo Alto
Year & Popularity: 2012, 55

Artist: Porter Robinson
Song: Divinity
Year & Popularity: 2014, 57

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: Highly Suspect
Song: 16
Year & Popularity: 2019, 63

Artist: A Foot In Coldwater
Song: (Make Me Do) Anything You Want
Year & Popularity: 1972, 33

Artist: Boyz II Men
Song: The Color of Love
Year & Popularity: 2002, 50

Artist: Jeff Buckley
Song: Dream Brother
Year & Popularity: 1994, 45

Artist: Aaron Shust
Song: My Savior My God
Year & Popularity: 2005, 55

Artist: Vitamin C
Song: Graduation (Friends Forever)
Year & Popularity: 1999, 55

Artist: Taylor Swift
Song: The Last Time
Year & Popularity: 2012, 57

Artist: AC

In [90]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[14],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity

Artist: Ying Yang Twins
Song: Get Low

Artist: "Christine DClario"
Song: Gloria en Lo Alto

Artist: M2M
Song: Pretty Boy

Artist: Ozzy Osbourne
Song: Dreamer



In [91]:
#return 15 similar song with raw similiarity scores and assign into list 
recommendations = playlist_reccomendations(dsi_playlist,15)

#print similar songs with corresponding artist and song name
show_recommendations(recommendations)

Artist: Roy Jones Jr.
Song: Go Hard, Go Home (feat. Giz, Swellz & Choppa)
Year & Popularity: 2004, 59

Artist: Tesla
Song: Games People Play
Year & Popularity: 1994, 35

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster
Year & Popularity: 1986, 40

Artist: Ying Yang Twins
Song: Get Low
Year & Popularity: 2002, 71

Artist: Ozzy Osbourne
Song: Dreamer
Year & Popularity: 2001, 65

Artist: Highly Suspect
Song: 16
Year & Popularity: 2019, 63

Artist: Porter Robinson
Song: Divinity
Year & Popularity: 2014, 57

Artist: "Christine DClario"
Song: Gloria en Lo Alto
Year & Popularity: 2012, 55

Artist: Boyz II Men
Song: The Color of Love
Year & Popularity: 2002, 50

Artist: A Foot In Coldwater
Song: (Make Me Do) Anything You Want
Year & Popularity: 1972, 33

Artist: The Calling
Song: Stigmatized
Year & Popularity: 2001, 54

Artist: Aaron Shust
Song: My Savior My God
Year & Popularity: 2005, 55

Artist: Chris Tomlin
Song: God Of This City
Year & Popularity: 2008, 45

Artist: Vitamin C


In [94]:
#add song into playlist after looking at recommendation

dsi_playlist = add_song_playlist(recommendations[10],dsi_playlist)

show_current_playlist(dsi_playlist)

Artist: Vitamin C
Song: Graduation (Friends Forever)

Artist: Chicago
Song: Will You Still Love Me? - 2009 Remaster

Artist: Porter Robinson
Song: Divinity

Artist: Ying Yang Twins
Song: Get Low

Artist: "Christine DClario"
Song: Gloria en Lo Alto

Artist: M2M
Song: Pretty Boy

Artist: Ozzy Osbourne
Song: Dreamer

Artist: The Calling
Song: Stigmatized



And thats it! This will be the 8 song in that I would base my DSI 21 Grad playlist on!!

## Recommendations

Implicit
- or get implicit feedback on the number times a user skips recommended suggestion (not the most accurate as clicks can happen by accident)
- what is the ratio of songs added to the playlist against suggested
- the duration of the song you listen to
- how often the song is repeated provided its added into the playlist


Explicit
- have the user for explicit feedback whether they like the recommendation (perceived quality)