In [1]:
import pprint
import sys, os

import spotipy
import spotipy.util as util

import pandas as pd
from IPython.display import JSON

In [3]:
USERNAME = os.environ['USERNAME']
SPOTIPY_CLIENT_ID = os.environ['SPOTIPY_CLIENT_ID']
SPOTIPY_CLIENT_SECRET = os.environ['SPOTIPY_CLIENT_SECRET']
SPOTIPY_REDIRECT_URI = os.environ['SPOTIPY_REDIRECT_URI']

In [4]:
scope = 'user-top-read'
token = util.prompt_for_user_token(
    USERNAME,
    scope,
    client_id=SPOTIPY_CLIENT_ID,
    client_secret=SPOTIPY_CLIENT_SECRET,
    redirect_uri=SPOTIPY_REDIRECT_URI
)

In [5]:
tracks = []

## Get songs that I like

In [6]:
columns = ['album','artist','duration_ms','track_id','track_name','popularity','track_uri']

In [7]:
def parseTrack(item):
    track = {}
    track['album'] = item['album']['name']
    track['artist'] = item['artists'][0]['name']
    track['duration_ms'] = item['duration_ms']
    track['track_id'] = item['id']
    track['track_name'] = item['name']
    track['popularity'] = item['popularity']
    track['track_uri'] = item ['uri']
    return track

In [8]:
sp = spotipy.Spotify(auth=token)
sp.trace = False
ranges = ['short_term', 'medium_term', 'long_term']
for range in ranges:
    result = sp.current_user_top_tracks(time_range=range, limit=1000)
    for item in result['items']:
        track = parseTrack(item)
        tracks.append(track)

In [9]:
df = pd.DataFrame.from_dict(tracks)

In [10]:
df.head(1)

Unnamed: 0,album,artist,duration_ms,track_id,track_name,popularity,track_uri
0,Hajk,Hajk,233746,7LEmD0e4GaTjJQ2xIOmkmk,Common Sense,15,spotify:track:7LEmD0e4GaTjJQ2xIOmkmk


In [342]:
df = df.drop_duplicates()

In [343]:
def getTrackFeatures(df):
    track_feature_cols = ['danceability','energy','key','loudness','mode','speechiness',
        'acousticness','instrumentalness','liveness','valence','tempo','type','id','uri',
        'track_href','analysis_url','duration_ms','time_signature']
    df[track_feature_cols] = df.apply((lambda track: pd.Series(sp.audio_features(track['track_uri'])[0])), axis=1, result_type="expand")
    return df

In [344]:
df = getTrackFeatures(df)

In [345]:
df.head(1)

Unnamed: 0,album,artist,duration_ms,popularity,track_id,track_name,track_uri,danceability,energy,key,...,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,time_signature
0,Fleet Foxes,Fleet Foxes,191493,34,2SHOE3LHVBluhe1WZ7M8ig,Sun It Rises,spotify:track:2SHOE3LHVBluhe1WZ7M8ig,0.577,0.285,7,...,0.0649,0.163,0.0653,112.025,audio_features,2SHOE3LHVBluhe1WZ7M8ig,spotify:track:2SHOE3LHVBluhe1WZ7M8ig,https://api.spotify.com/v1/tracks/2SHOE3LHVBlu...,https://api.spotify.com/v1/audio-analysis/2SHO...,3


In [346]:
df.to_csv('my_top_135.csv')

## Get songs that i dislike

### Will use top streamed tracks on spotify

In [11]:
dislike_df = pd.read_csv("world_top_135_raw.csv")

In [12]:
dislike_df['track_uri'] = dislike_df['Track URL']

In [13]:
dislike_df = dislike_df.reindex(columns = ['track_uri'])

In [14]:
dislike_df[columns] = dislike_df.apply((lambda track: pd.Series(parseTrack(sp.track(track['track_uri'])))), axis=1, result_type="expand")

In [369]:
dislike_df = getTrackFeatures(dislike_df)

In [371]:
dislike_df.to_csv("world_top_135.csv")