## Import pandas and read files 

In [370]:
import pandas as pd 
pd.set_option("display.max_rows", 6)

# read the csv files and make them DataFrames
taylorData = pd.read_csv("taylor_swift_spotify.csv")
spotifyData = pd.read_csv("spotify-tracks-dataset.csv") 

Since Taylor Swift often releases the same titled tracks under different albums. We will get rid of all duplicates to start with a cleaner dataset. 

In [371]:
# drop any duplicate tracks 
# (Taylor sometimes has the same tracks in different albums) 
taylorData = taylorData.drop_duplicates(subset = ["name"])
taylorData = taylorData.reset_index(drop=True)

## Find spotify track data 

In [372]:
def getTrackId(link): 
    # uses a spotify link to access the spotify id of a track 
    arr = link.split("/") 
    id = arr[len(arr)-1] # get the last element 
    arr = id.split("?")
    id = arr[0]
    return id

def findSong(link, dataset=spotifyData): 
    # returns a dataframe with the desired song title
    id = getTrackId(link) # get the id
    song = dataset.loc[dataset["track_id"] == id]
    return song.reset_index(drop=True)


song = findSong("https://open.spotify.com/track/3hUxzQpSfdDqwM3ZTFQY0K?si=42d0177e14ef4cfd")
print(song.to_string())

   Unnamed: 0                track_id       artists album_name track_name  popularity  duration_ms  explicit  danceability  energy  key  loudness  mode  speechiness  acousticness  instrumentalness  liveness  valence   tempo  time_signature track_genre
0       81556  3hUxzQpSfdDqwM3ZTFQY0K  Taylor Swift   folklore     august          86       261922     False         0.532   0.623    5    -9.208     1       0.0331         0.538          0.000073    0.0925    0.403  89.937               4         pop


## Get data from track 

In [373]:
# These are the attributes that will be used in recommending a song
categories = {"danceability", 
              "energy", 
              "loudness", 
              "speechiness", 
              "acousticness", 
              "instrumentalness", 
              "valence",
              "liveness",
              "tempo"}

def buildDict(df, categories): 
    # builds a dictionary with all the given categories 
    if (len(df) < 1): 
        return {}
    else: 
        dict = {} 
        row = df.index.values.tolist()[0]
        for cat in categories: 
            dict.update({cat : df[cat][row]})
        return dict

dict = buildDict(song, categories)
print(dict)

{'loudness': -9.208, 'acousticness': 0.538, 'tempo': 89.937, 'speechiness': 0.0331, 'energy': 0.623, 'danceability': 0.532, 'liveness': 0.0925, 'instrumentalness': 7.28e-05, 'valence': 0.403}


## Generating song similarity 

In [374]:
def calculateDeltaAvg(dict, row):
    sum = 0
    for key in dict.keys(): 
        delta = dict[key] - row[key]
        delta = abs(delta)
        '''
        if (key == "tempo"): 
            delta = delta/100 # average tempo of a song
        elif (key == "loudness"): 
            delta = delta/60 # range of average loudness values (-60 to 0)
        elif (key == "speechiness" or key == "acousticness"): 
            delta = delta * 10
        elif (key == "energy"): 
            delta = delta * 5
        '''
        
        sum += delta
    return sum/len(dict)
taylorDataMatched = taylorData.copy()

# create a column with the "match sum":
# a sum of all the distances between the inputted song's attributes and Taylor's songs
taylorDataMatched["match_sum"] = taylorDataMatched.apply(lambda row: calculateDeltaAvg(dict, row), axis=1) 
taylorDataMatched

Unnamed: 0.1,Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,match_sum
0,0,Welcome To New York (Taylor's Version),1989 (Taylor's Version) [Deluxe],2023-10-27,1,4WUepByoeqcedHoYhSNHRt,spotify:track:4WUepByoeqcedHoYhSNHRt,0.009420,0.757,0.610,0.000037,0.3670,-4.840,0.0327,116.998,0.685,72,212600,3.639168
1,1,Blank Space (Taylor's Version),1989 (Taylor's Version) [Deluxe],2023-10-27,2,0108kcWLnn2HlH2kedi1gn,spotify:track:0108kcWLnn2HlH2kedi1gn,0.088500,0.733,0.733,0.000000,0.1680,-5.376,0.0670,96.057,0.701,73,231833,1.235553
2,2,Style (Taylor's Version),1989 (Taylor's Version) [Deluxe],2023-10-27,3,3Vpk1hfMAQme8VJ0SNRSkd,spotify:track:3Vpk1hfMAQme8VJ0SNRSkd,0.000421,0.511,0.822,0.019700,0.0899,-4.785,0.0397,94.868,0.305,74,231000,1.137601
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,524,Mary's Song (Oh My My My),Taylor Swift,2006-10-24,10,2QrQCMel6v2JiLxqrg4p2O,spotify:track:2QrQCMel6v2JiLxqrg4p2O,0.017700,0.403,0.627,0.000000,0.1820,-5.280,0.0292,74.900,0.374,59,213080,2.193419
327,528,A Perfectly Good Heart,Taylor Swift,2006-10-24,14,1spLfUJxtyVyiKKTegQ2r4,spotify:track:1spLfUJxtyVyiKKTegQ2r4,0.003490,0.483,0.751,0.000000,0.1280,-5.726,0.0365,156.092,0.268,54,220146,7.835831
328,529,Teardrops on My Guitar - Pop Version,Taylor Swift,2006-10-24,15,4pJi1rVt9GNegU9kywjg4z,spotify:track:4pJi1rVt9GNegU9kywjg4z,0.040200,0.459,0.753,0.000000,0.0863,-3.827,0.0537,199.997,0.483,55,179066,12.916519


## Sorting and cleaning 

In [375]:
# sort the data based on the match sum 
taylorDataMatched = taylorDataMatched.sort_values("match_sum")



If the user inputs a Taylor Swift song, it wouldn't be interesting if the program recommended that very same song. Therefore, we get rid of the suggestion if they're the same. 

In [376]:
firstTSRec = taylorDataMatched["name"][0]
songTitle = song["track_name"][0] # this is assuming that the chosen track exists in the spotify dataset

if (firstTSRec == songTitle): 
    taylorDataMatched = taylorDataMatched.drop([0]) # drop the row with that track
    
taylorDataMatched = taylorDataMatched.reset_index(drop=True)
taylorDataMatched

Unnamed: 0.1,Unnamed: 0,name,album,release_date,track_number,id,uri,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,duration_ms,match_sum
0,216,august,folklore: the long pond studio sessions (from ...,2020-11-25,8,1UHoF9p75IVsvUt6WdTYhn,spotify:track:1UHoF9p75IVsvUt6WdTYhn,0.5530,0.387,0.634,0.000069,0.0931,-9.222,0.0363,89.567,0.4200,52,261920,0.063978
1,233,august - the long pond studio sessions,folklore: the long pond studio sessions (from ...,2020-11-25,8,4MGexoZc12lqE0hYkq9YYx,spotify:track:4MGexoZc12lqE0hYkq9YYx,0.5000,0.554,0.663,0.000048,0.7910,-8.961,0.0324,89.977,0.5450,58,260000,0.136469
2,186,ivy,evermore (deluxe version),2021-01-07,10,43Ykum9T72UOPhBN31grpN,spotify:track:43Ykum9T72UOPhBN31grpN,0.8550,0.515,0.545,0.000020,0.0921,-9.277,0.0353,88.856,0.5350,70,260440,0.188517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,56,Long Live (Taylor's Version),Speak Now (Taylor's Version),2023-07-07,14,4hqJ4bSlYJOXb6Z4SRmzxs,spotify:track:4hqJ4bSlYJOXb6Z4SRmzxs,0.0122,0.375,0.650,0.000000,0.1890,-4.031,0.0402,203.890,0.0752,78,317960,13.363475
327,287,Soon You’ll Get Better (feat. The Chicks),Lover,2019-08-23,12,4AYtqFyFbX0Xkc2wtcygTr,spotify:track:4AYtqFyFbX0Xkc2wtcygTr,0.9070,0.433,0.182,0.000000,0.1230,-12.566,0.0641,207.476,0.4210,70,201586,13.542841
328,140,State Of Grace (Acoustic Version) (Taylor's Ve...,Red (Taylor's Version),2021-11-12,20,5jAIouBES8LWMiriuNq170,spotify:track:5jAIouBES8LWMiriuNq170,0.6630,0.445,0.131,0.000002,0.1080,-13.778,0.0564,208.918,0.1010,65,321640,13.843986
