In [229]:
# pip install spotipy

In [238]:
from datetime import datetime
import numpy as np
import pandas as pd
import spotipy 
from spotipy.oauth2 import SpotifyOAuth

In [239]:
# Setting up your spotify api
scope = 'user-top-read user-read-recently-played'
client_id = 'e7a08b73cebb4838880fe22e1f16825f' # use your client id
client_secret = 'de4639162a1e44cdb276acc6b017681f' # use your client secret
redirect_uri = 'http://127.0.0.1:9090'

# Asking for authorization
sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(scope=scope,
                              client_id=client_id,
                             client_secret=client_secret,
                             redirect_uri=redirect_uri))

In [240]:
# Initializing the information
today = datetime.now() # get today dates
country = 'HK'
user_name = sp.current_user()['display_name']
user_id = sp.current_user()['id']

In [233]:
# Get top tracks and put them into a dictionary by durations
# Short_term - last 4 weeks
# medium_term - last 6 months
# long_term - all time
top_tracks_dict = {}
for term in ['short_term','medium_term','long_term']:
    count = 0
    while True:
        offset_track = 7*count
        top_tracks = sp.current_user_top_tracks(limit = 7, offset=offset_track, time_range=term)
        count += 1
        if len(top_tracks['items']) != 0 :
            top_tracks_temp = [
                [top_tracks['items'][n]['artists'][0].get('name',np.nan), # top_track_artist_name
                 top_tracks['items'][n]['artists'][0].get('id',np.nan), # top_track_artist_id
                 top_tracks['items'][n].get('name',np.nan), # top_track_name
                 top_tracks['items'][n].get('id',np.nan), # top_track_id
                 top_tracks['items'][n].get('popularity',np.nan), # top_tracks_popularity
                 top_tracks['items'][n]['album'].get('release_date',np.nan)] # top_tracks_date
                for n in range(len(top_tracks))]
            top_tracks_temp = pd.DataFrame(top_tracks_temp,
                                          columns=['ttrack_artist','ttrack_artist_id',
                                                  'ttrack_name','ttrack_id',
                                                   'ttrack_popularity',
                                                  'ttrack_release_date'])  
            if count == 1:
                top_tracks_full = top_tracks_temp.copy()
            else:
                top_tracks_full = pd.concat(
                    [top_tracks_full,top_tracks_temp])
        else:
            break
    
    # Add ranks
    top_tracks_full = top_tracks_full.reset_index(
        drop=True).reset_index().rename(
        columns={'index':'rank'})
    top_tracks_full['rank'] = top_tracks_full['rank'] + 1
    # Add data retrieval dates
    top_tracks_full['Date'] = today
    # Add audio features
    top_tracks_audio_features = pd.DataFrame(
        sp.audio_features(list(top_tracks_full['ttrack_id']))).drop(
        ['type','id','uri','track_href','analysis_url'],axis=1)
    top_tracks_full = pd.concat([top_tracks_full,top_tracks_audio_features],axis=1)
    # Add user id columns
    top_tracks_full['user_id'] = user_id
    top_tracks_dict[term] = top_tracks_full

In [234]:
# Get top artists and put them into a dictionary by durations
# Short_term - last 4 weeks
# medium_term - last 6 months
# long_term - all time
top_artists_dict = {}
for term in ['short_term','medium_term','long_term']:
    count = 0
    while True:
        offset_artist = 7*count
        top_artists = sp.current_user_top_artists(limit = 7, offset=offset_artist, time_range=term)
        count += 1
        if len(top_artists['items']) != 0 :
            top_artists_temp = [
                [top_artists['items'][n].get('name',np.nan),
                 top_artists['items'][n].get('id',np.nan),
                 top_artists['items'][n]['followers'].get('total',np.nan),
                 top_artists['items'][n].get('genres',np.nan),
                 top_artists['items'][n].get('popularity',np.nan)]
                for n in range(len(top_tracks))]
            
            top_artists_temp = pd.DataFrame(top_artists_temp,
                                          columns=['top_artist','top_artist_id',
                                                  'top_artist_followers','top_artist_genres',
                                                   'top_artist_popularity'])  
            if count == 1:
                top_artists_full = top_artists_temp.copy()
            else:
                top_artists_full = pd.concat(
                    [top_artists_full,top_artists_temp])
        else:
            break
    
    # Add ranks
    top_artists_full = top_artists_full.reset_index(
        drop=True).reset_index().rename(
        columns={'index':'rank'})
    top_artists_full['rank'] = top_artists_full['rank'] + 1
    # Add data retrieval dates
    top_artists_full['Date'] = today
    # Add user id columns
    top_artists_full['user_id'] = user_id
    top_artists_dict[term] = top_artists_full

In [235]:
# Get recommendation songs based on your top tracks and genres
recommend_dict = {}
for term in ['short_term','medium_term','long_term']:
    recommend = sp.recommendations(
        seed_genres=list(top_artists_dict['short_term']['top_artist_id'])[:3], 
        seed_tracks=list(top_tracks_dict['short_term']['ttrack_id'])[:2],
        country=country)
    recommend = pd.DataFrame(
        [(recommend['tracks'][n]['name'],recommend['tracks'][n]['artists'][0]['name']) 
         for n in range(len(recommend['tracks']))],
        columns = ['Name','Artist'])
    recommend['Date'] = today
    recommend['user_id'] = user_id
    recommend_dict[term] = recommend

In [236]:
# Getting recently played song
# GMT + 0 for played_at
recently_played = sp.current_user_recently_played(limit=50)
recently_played = pd.DataFrame([[recently_played['items'][n]['track']['name'],
 recently_played['items'][n]['track']['artists'][0]['name'],
 recently_played['items'][n]['track']['id'],
recently_played['items'][n]['played_at']] 
 for n in range(len(recently_played['items']))],
             columns = ['Name','Artist','id','played_at'])
# Add user id column and data retrieval date
recently_played['user_id'] = user_id
recently_played['Date'] = today
# Get the audio features for recently played stuff too
recently_played = pd.concat([recently_played,
           pd.DataFrame(
               sp.audio_features(list(recently_played['id']))).drop(
                   ['type','id','uri','track_href','analysis_url'],axis=1)],
               axis=1)