In [201]:
import sys
import json
sys.path.insert(0, './CPPotify/source/py')
from CPPotify import CPPotify

import numpy as np
import pandas as pd

from utils.utils import cosine_similarity, jaccard_similarity, list_union
from keys import CLIENT_ID, CLIENT_SECRET, REDIRECT_URI, STATE, SCOPE, SHOW_DIALOG

from playlist_str import playlist_str as get_playlists_results

ModuleNotFoundError: No module named 'playlist_str'

In [164]:
pd.options.display.precision = 10

In [165]:
cpp = CPPotify(CLIENT_ID, CLIENT_SECRET, REDIRECT_URI, STATE, SCOPE, SHOW_DIALOG)

In [166]:
cpp.open_auth_url()

In [167]:
cpp.oAuth_flow('https://example.com/callback?code=AQCAV7JQ-ZQ-WSXAUmY8mN3yQTtTszywvtBdyalwo1264SeDGhDQ6SJARL7PABMuhYr6ekt0Qrsq1ghnYrvraI1VzGDX76ep6jLPwSUxxfPeTy3qQMBVnO83foqxs64p-rwtNBYm4s42Wl1GyqKpZsUA4lLlrUnKt_z-Qx8T26wID6YqCWLbToLruPGBlq4Ap-kjjAHi-R3J9-WHwCoCCyKtjU2evdC4EXQQ136kX4hUe9wqNB7xH8v2AUqVChnlJ5Zo2in2VVuEjMAgHtFrTvoBN4ZAKkaKz6kv76Qrp06nxv47tyM3MR7PqskF50yXHbH0OSOCGw0oV5FaHKY8syDXWa_AUYh75iBnmiOx-A&state=34fFs29kd09')

In [168]:
dat = pd.read_csv('audio_features_full/features_full.csv')

In [96]:
def get_song_artist_df(followed_playlists):
    # Take the playlists that a user follows. For each playlist in that list, create a dictionary having keys set to each unique track ID, with values set to 
    # the artists that are featured on that track. 
    
    followed_tracks_artists = {}
    for playlist_id in followed_playlists:
        try:
            res = cpp.get_playlists(False, '', playlist_id, 'tracks')
            for track in res['items']:
                if track['track']['id'] not in followed_tracks_artists.keys():
                    followed_tracks_artists[track['track']['id']] = [art['id'] for art in track['track']['artists']]
                else:
                    for artist in track['track']['artists']:
                        followed_tracks_artists[track['track']['id']].append(artist['id'])
        except Exception as e:
            print(e)
            pass

    # Create a DataFrame containing with each row containing a unique artist-track combinations 

    followed_tracks_df = pd.DataFrame(columns=['track', 'artist'])

    for track in followed_tracks_artists.keys():
        for row in followed_tracks_artists[track]:
            followed_tracks_df = followed_tracks_df.append({'track': track, 'artist': row}, ignore_index=True)
    
    # Now that we have a DataFrame of unique artist-track combinations, we want to find the genres for each unique artist. Spotify does not release track genre information
    # through its API, for this reason we have to find the genre of the artist. We will use this genre information to filter the final DataFrame to match genres with the song/artist
    # that is currently being listened to

    artist_genre = {}
    for artist_id in followed_tracks_df['artist'].unique():
        if artist_id not in artist_genre.keys():
            artist_genre[artist_id] = cpp.get_artists(artist_id)['genres'] if cpp.get_artists(artist_id)['genres'] != [] and 'genres' in cpp.get_artists(artist_id).keys() else None
    
    # For the newly created artist_genre dictionary above, match each artist ID in our previously created DataFrame with its genre in the dictionary. Use this to create a new 'genre'
    # column in the DataFrame. Filter out any artists that do not have genre information

    followed_tracks_df['genres'] = [artist_genre[id] for id in followed_tracks_df['artist']]
    followed_tracks_df = followed_tracks_df[followed_tracks_df['genres'].notna()]

    # For the newly created 'genre' column, split the genres into words and create a union of those words for each artist. The reason we do this is that Spotify contains a lot of unique 
    # genres. Since we have a rather small dataset, we want to split more rare genres such as 'german hip hop' into a set ('german', 'hip', 'hop') so that this song will match with other 
    # german or hip-hop tracks
    
    followed_tracks_df['genre_words'] = [list_union([word for word in [genre_string.split(' ') for genre_string in genre_list]]) for genre_list in followed_tracks_df['genres'].values]

    return followed_tracks_df

In [19]:
def current_song_info(song_id)->tuple:
    # For the current song, get the song's artists and the genres for those artists

    track_info = cpp.get_tracks(song_id)
    
    current_artists = [artist['id'] for artist in track_info['artists']]
    current_genres = {}
    for artist in current_artists:
        try:
            current_genres[artist] = cpp.get_artists(artist)['genres']
        except Exception as e:
            current_genres[artist] = None
                
    return current_genres

In [158]:
def get_audio_features(tracks, existing_data):
    # Create a dictionary of tracks and their audio features. If the song exists in the current dataset, retrieve that information. If it doesn't, retrieve the information from the 
    # Spotify API 

    audio_features = {}
    for id in tracks:
        if id in existing_data['track_id'].values:
            audio_features[id] = existing_data[existing_data['track_id']==id].iloc[:, np.r_[5:16, 21:23]].values.tolist()
        else:
            try:
                audio_features[id] = list({k:v for k,v in cpp.get_tracks(id, 'audio-features').items() if k in existing_data.columns[np.r_[5:16, 21:23]].values}.values())
            except:
                pass 
    
    return audio_features

In [192]:
def song_rec(user_id, get_playlists_results, current_song, stored_songs)->str:
    # Given a user_id, find all playlists that the user follows. Using this logic, we can create a set of tracks listened to by users that created these followed playlists.
    # Since the Spotify API does not let us find followed users, this is our best bet for collaborative filtering

    followed_playlists = [playlist['id'] for playlist in json.loads(get_playlists_results)['items'] if playlist['owner']['id'] != user_id]
    
    # Call the two functions above to get information for tracks contained in 'followed playlists', as well as track information for the current song

    followed_tracks_df = get_song_artist_df(followed_playlists)
    current_genres = current_song_info(current_song)

    # For the genres of the current song/artist(s)
    
    current_genres_words = list_union([word for word in [dict_values.split(' ') for all_genres in current_genres.values() for dict_values in all_genres]])
    
    # Find the set Jaccard distance between the genres of the current song/artist(s) and all the songs in our followed tracks dataset. Since Spotify has unique genre naming conventions,
    # it will be harder to find exact matches for sets of genres 
    
    followed_tracks_df['jac_dist'] = [round(jaccard_similarity(np.array(current_genres_words), np.array(followed_genre)), 2) for followed_genre in followed_tracks_df['genre_words']]

    # Keep songs which genre sets have Jaccard distance greater than or equal to 0.4 
    
    followed_tracks_df = followed_tracks_df[followed_tracks_df['jac_dist'] >= 0.4] 

    # Get the audio features for the songs in the remaining dataset
    
    followed_tracks_audio_features = get_audio_features(followed_tracks_df['track'].unique(), stored_songs)

    # Set the audio_features retrieved above as a new column in the dataframe
    
    followed_tracks_df['audio_features'] = [followed_tracks_audio_features[track] if track in followed_tracks_audio_features else None for track in followed_tracks_df['track']]
    followed_tracks_df = followed_tracks_df[followed_tracks_df['audio_features'].notna()]

    # Get the audio_features of the current song. We will be calculating the cosine similarity of the audio features of the current song to each song in our dataframe
    
    af_list_values = list({k:v for k,v in cpp.get_tracks(current_song, 'audio-features').items() if k in stored_songs.columns[np.r_[5:16, 21:23]].values}.values())

    # Now that we have the audio features of the current song and the songs with closely matching genres, we can calculate cosine similarity. Sort the 
    # DataFrame by cosine similarity and Jaccard distance. Then return the DataFrame

    followed_tracks_df['cosine_sim'] = [cosine_similarity(np.array(af_list_values), stored_songs.iloc[x, np.r_[5:16, 21:23]].values) for x in range(len(followed_tracks_df))]
    followed_tracks_df.sort_values(['jac_dist', 'cosine_sim'], ascending=False)
   
    return followed_tracks_df

In [None]:
def

In [193]:
followed_playlists = song_rec('1252723390', get_playlists_results, '0DAQryfUiFZdWFo76RYBc8', dat)

In [194]:
followed_playlists

Unnamed: 0,track,artist,genres,genre_words,jac_dist,audio_features,cosine_sim
39,3qAyjdhtnU2a14rrRKEkcE,02kJSzxNuaWGqwubyUba0Z,"[hip hop, indie pop rap, oakland hip hop, pop ...","{hop, hip, oakland, rap, indie, pop}",0.43,"[0.836, 0.703, 0, -4.732, 1, 0.107, 0.0301, 0,...",0.9999999626
40,1cTZMwcBJT0Ka3UJPXOeeN,3TVXtAsR1Inumwj472S9r4,"[canadian hip hop, canadian pop, hip hop, pop ...","{hop, canadian, hip, rap, pop, toronto}",0.43,"[0.567, 0.913, 8, -6.471, 1, 0.0736, 0.0934, 0...",0.9999999421
43,6n4U3TlzUGhdSFbUUhTvLP,3TVXtAsR1Inumwj472S9r4,"[canadian hip hop, canadian pop, hip hop, pop ...","{hop, canadian, hip, rap, pop, toronto}",0.43,"[[0.907, 0.633, 2.0, -5.145, 1.0, 0.184, 0.087...",0.9999999835
47,4qKcDkK6siZ7Jp1Jb4m0aL,3TVXtAsR1Inumwj472S9r4,"[canadian hip hop, canadian pop, hip hop, pop ...","{hop, canadian, hip, rap, pop, toronto}",0.43,"[[0.922, 0.581, 10.0, -7.495, 1.0, 0.27, 0.001...",0.9999999826
56,7sO5G9EABYOXQKNPNiE9NR,0iEtIxbK0KxaSlF7G42ZOp,"[hip hop, pop rap, rap, southern hip hop, trap]","{hop, hip, southern, rap, pop, trap}",0.43,"[[0.88, 0.428, 9.0, -8.28, 1.0, 0.206, 0.149, ...",0.9999999768
58,7sO5G9EABYOXQKNPNiE9NR,0iEtIxbK0KxaSlF7G42ZOp,"[hip hop, pop rap, rap, southern hip hop, trap]","{hop, hip, southern, rap, pop, trap}",0.43,"[[0.88, 0.428, 9.0, -8.28, 1.0, 0.206, 0.149, ...",0.9999999556
62,4XoP1AkbOurU9CeZ2rMEz2,3TVXtAsR1Inumwj472S9r4,"[canadian hip hop, canadian pop, hip hop, pop ...","{hop, canadian, hip, rap, pop, toronto}",0.43,"[0.869, 0.687, 1, -6.816, 1, 0.263, 0.0208, 1e...",0.999999991
76,2Xqd0wUttjueBfdcltADOv,02kJSzxNuaWGqwubyUba0Z,"[hip hop, indie pop rap, oakland hip hop, pop ...","{hop, hip, oakland, rap, indie, pop}",0.43,"[0.838, 0.771, 1, -3.791, 1, 0.244, 0.0117, 0,...",0.9999999994
92,6UjfByV1lDLW0SOVQA4NAi,3TVXtAsR1Inumwj472S9r4,"[canadian hip hop, canadian pop, hip hop, pop ...","{hop, canadian, hip, rap, pop, toronto}",0.43,"[0.877, 0.391, 1, -8.196, 1, 0.063, 0.0317, 0....",0.999999984
94,6fwdbPMwP1zVStm8FybmkO,0iEtIxbK0KxaSlF7G42ZOp,"[hip hop, pop rap, rap, southern hip hop, trap]","{hop, hip, southern, rap, pop, trap}",0.43,"[0.835, 0.413, 1, -9.81, 1, 0.396, 0.373, 0.00...",0.9999999885
