In [11]:
import spotipy
import pandas as pd
import numpy as np
from spotipy.oauth2 import SpotifyClientCredentials
from spot_secrets import *

In [28]:
#Authentication
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

### Functions

In [None]:
def normalize(df, columns):
    result = df.copy()
    for feature_name in columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [16]:
def get_features(dataframe, 
                column, 
                links = [],
                danceability = [],
                energy = [],
                loudness = [],
                speechiness = [],
                acousticness = [],
                instrumentalness = [],
                liveness = [],
                valence = [],
                tempo = []):

    for link in dataframe[column]:  
        
        connection = sp.audio_features(link)[0]
        
        links.append(link)
        danceability.append(connection["danceability"]) if connection["danceability"] is not None else danceability.append(None)
        energy.append(connection["energy"]) if connection["energy"] is not None else energy.append(None)
        loudness.append(connection["loudness"]) if connection["loudness"] is not None else loudness.append(None)
        speechiness.append(connection["speechiness"]) if connection["speechiness"] is not None else speechiness.append(None)
        acousticness.append(connection["acousticness"]) if connection["acousticness"] is not None else acousticness.append(None)
        instrumentalness.append(connection["instrumentalness"]) if connection["instrumentalness"] is not None else instrumentalness.append(None)
        liveness.append(connection["liveness"]) if connection["liveness"] is not None else liveness.append(None)
        valence.append(connection["valence"]) if connection["valence"] is not None else valence.append(None)
        tempo.append(connection["tempo"]) if connection["tempo"] is not None else tempo.append(None)
  
    features_df = pd.DataFrame({"song_url": links,
                                "danceability": danceability,
                                "energy": energy,
                                "loudness": loudness,
                                "speechiness": speechiness,
                                "acousticness": acousticness,
                                "instrumentalness": instrumentalness,
                                "liveness": liveness,
                                "valence": valence,
                                "tempo": tempo})
    return features_df

In [2]:
def call_playlist(creator, playlist_id):

    playlist_features_list = ["artist",
                              "track_name",  
                              "danceability",
                              "energy",
                              "loudness",
                              "speechiness", 
                              "acousticness", 
                              "instrumentalness",
                              "liveness",
                              "valence",
                              "tempo"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)

    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:

        # Create empty dict
        playlist_features = {}

        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[2:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)

    
    return playlist_df

### Some custom searching

In [None]:
name = "070 Shake"
results = sp.search(q='artist:' + "Rex Orange County"+ " track:" + "Loving Is Easy (feat. Benny Sings)", type='track')

In [None]:
playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=1333723a6eff4b7f"
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

In [None]:
sp.playlist_tracks(playlist_URI)['items'][1]

### Downloading characteristics of songs from personal data

In [29]:
path = "/Volumes/HD/GitHub/spotify_app/spotify_data"
streaming_df = pd.read_csv(path + "/streaming_history_concat.csv", index_col=0)

In [30]:
# counting number of plays
plays_count = streaming_df.groupby("trackName")['artistName'].count().reset_index()

In [31]:
# drop duplicates
streaming_df = streaming_df.drop_duplicates(subset=['trackName'])
streaming_df.shape

(6432, 4)

In [32]:
streaming_df = streaming_df.merge(plays_count, on="trackName")

In [33]:
# sort dataframe and choose 500 most popular tracks
streaming_df = streaming_df.sort_values("artistName_y", ascending=False)
streaming_df = streaming_df.head(int(len(streaming_df) * 0.2))

In [34]:
streaming_df = streaming_df.rename(columns = {"artistName_x": "artistName", "artistName_y": "plays_count"})

In [35]:
# download links of songs
songs_links = []
for index, song in streaming_df.iterrows():
    results = sp.search(q='artist:' + streaming_df["artistName"][index] + " track:" + streaming_df["trackName"][index], type='track')
    items = results['tracks']['items']

    if len(items) != 0:
        try:
            link = items[1]['href']
        except:
            link = items[0]['href']
    else:
        link = ""
    songs_links.append(link)

In [36]:
streaming_df['song_url'] = songs_links
streaming_df = streaming_df[streaming_df['song_url'] != ""] 

In [37]:
# get features of the songs
features = get_features(streaming_df, "song_url")

Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/1o0kWPueYo94LIjPYOE5Nf
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/70AYiGbc4mWZGEqiipBBDb
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4CZgQq8KJYWGh1bM3sqGmd
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4n1WwhKzKHUX598tvU1wMu
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4Dvkj6JhhA12EX05fT7y2e
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/6sQ1IeoCqOF3RjpCitYDWq
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/6Ucrht7JfguIXoa4hF9Leo
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4XBIzFEVvF4stC7E6IigLl
Expected id of type track but found type tracks https://api.spotify.com/v1/tracks/4k6Uh1HXdhtusDW5y8Gbvy
Expected id of type track but found type tracks https:/

In [38]:
features = features.dropna()

In [39]:
streaming_df = streaming_df.merge(features, on="song_url")
streaming_df = streaming_df.drop(["endTime", "song_url", "sec_played"], axis=1)
streaming_df.to_csv("../spotify_data/personal_data_to_recommend.csv")


In [None]:
streaming_df.head()

### Building recommendation database

In [None]:
playlist_URL = "spotify:playlist:54A6wGeGp7yAra5hwK6xHq"

In [None]:
first_df = call_playlist("spotify", "spotify:playlist:54A6wGeGp7yAra5hwK6xHq").reset_index(drop=True)
second_df = call_playlist("spotify", "spotify:playlist:3IsxzDS04BvejFJcQ0iVyW").reset_index(drop=True)
third_df = call_playlist("spotify", "spotify:playlist:37i9dQZEVXcEQgVh36QNFV").reset_index(drop=True)
fourth_df = call_playlist("spotify", "spotify:playlist:37i9dQZF1DX0YKekzl0blG").reset_index(drop=True)
fifth_df = call_playlist("spotify", "spotify:playlist:37i9dQZF1DX0YKekzl0blG").reset_index(drop=True)
sixth_df = call_playlist("spotify", "spotify:playlist:1coYrjao0tn6XY4HA6AXWV").reset_index(drop=True)

In [None]:
frames = [first_df, second_df, third_df, fourth_df, fifth_df, sixth_df]
recommender_df = pd.concat(frames, ignore_index=True)

In [None]:
recommender_df["artist"][0]

In [None]:
recommender_df.to_csv("spotify_data/playlists_data_to_recommend.csv")

### Creating recommendations

In [6]:
personal_df = pd.read_csv(path + "/personal_data_to_recommend.csv", index_col=0)
recommender_df = pd.read_csv(path + "/playlists_data_to_recommend.csv", index_col=0)

In [None]:
recommender_df.head()

In [None]:
personal_df.head()

In [None]:
# normalizing columns with different scale
recommender_df = normalize(recommender_df, ["loudness", "tempo"])
personal_df = normalize(personal_df, ["loudness", "tempo"])

In [None]:
# drop songs from recommender which are also in the streaming history
recommender_df = recommender_df[~recommender_df["track_name"].isin(personal_df['trackName'].tolist())]

In [None]:
# prepare data for recommender
personal_df = personal_df.drop(["plays_count"], axis=1)
recommender_df = recommender_df.drop(["album", "track_id"], axis=1)

In [None]:
recommender_df.head()

In [None]:
def recommend_me(df_personal, 
                df_recommender, 
                columns_for_vector, 
                results_dict = {"artist_personal":[],
                                "track_personal":[],
                                "artist_database":[],
                                "track_database":[],
                                "distance":[]}
):                                                                        
    df_personal = df_personal.sample(int(df_personal.shape[0]/3))
    df_recommender = df_recommender.sample(int(df_recommender.shape[0]/5))
    
    for i in range (0, df_personal.shape[0]):
        
        for j in range(0, df_recommender.shape[0]):
            
            distance = np.linalg.norm(df_personal[columns_for_vector].iloc[i, ].values - df_recommender[columns_for_vector].iloc[j, ].values)
            
            if all(distance < d for d in results_dict["distance"]) and df_personal["artistName"].iloc[i] not in results_dict["artist_personal"] and df_recommender["track_name"].iloc[j] not in results_dict["track_database"]:            
                results_dict["track_personal"].append(df_personal["trackName"].iloc[i])
                results_dict["artist_personal"].append(df_personal["artistName"].iloc[i])
                results_dict["artist_database"].append(df_recommender["artist"].iloc[j])
                results_dict["track_database"].append(df_recommender["track_name"].iloc[j])
                results_dict["distance"].append(distance)

            if len(results_dict["distance"]) > 5:
                lowest_value = min(results_dict["distance"])
                lowest_index = results_dict["distance"].index(lowest_value)
                
                for key in results_dict:
                    results_dict[key].pop(lowest_index)
                
    return results_dict

In [None]:
distances = recommend_me(personal_df, recommender_df, ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo"])

In [None]:
distances