In [2]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials


from sklearn.preprocessing import MinMaxScaler

import keyring
import time
import os

# Load Data

In [3]:
# read and process the charts dataset
charts_df = pd.read_csv('data/spotify_daily_charts.csv')
# transform date column into a datetime column
charts_df['date'] = pd.to_datetime(charts_df['date'])
charts_df.head()

Unnamed: 0,date,position,track_id,track_name,artist,streams
0,2017-01-01,1,0kN8xEmgMW9mh7UmDYHlJP,Versace on the Floor,Bruno Mars,185236
1,2017-01-01,2,5uCax9HTNlzGybIStD3vDh,Say You Won't Let Go,James Arthur,180552
2,2017-01-01,3,7BKLCZ1jbUBVqRi2FVlTVw,Closer,The Chainsmokers,158720
3,2017-01-01,4,2rizacJSyD9S1IQUxUxnsK,All We Know,The Chainsmokers,130874
4,2017-01-01,5,5MFzQMkrl1FOOng9tq6R9r,Don't Wanna Know,Maroon 5,129656


In [4]:
# read and process the tracks dataset
tracks_df = pd.read_csv('data/spotify_daily_charts_tracks.csv')
tracks_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0kN8xEmgMW9mh7UmDYHlJP,Versace on the Floor,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,4PgleR09JVnm3zY1fW3XBA,261240,2016-11-17,75,0.578,0.574,2.0,-6.209,1.0,0.0454,0.196,0.0,0.083,0.301,174.152
1,5uCax9HTNlzGybIStD3vDh,Say You Won't Let Go,4IWBUUAFIplrNtaOHcJPRM,James Arthur,7oiJYvEJHsmYtrgviAVIBD,211466,2016-10-28,84,0.358,0.557,10.0,-7.398,1.0,0.059,0.695,0.0,0.0902,0.494,85.043
2,7BKLCZ1jbUBVqRi2FVlTVw,Closer,69GGBxA162lTqCwzJG5jLp,The Chainsmokers,0rSLgV8p5FzfnqlEk4GzxE,244960,2016-07-29,84,0.748,0.524,8.0,-5.599,1.0,0.0338,0.414,0.0,0.111,0.661,95.01
3,2rizacJSyD9S1IQUxUxnsK,All We Know,69GGBxA162lTqCwzJG5jLp,The Chainsmokers,0xmaV6EtJ4M3ebZUPRnhyb,194080,2016-09-29,69,0.662,0.586,0.0,-8.821,1.0,0.0307,0.097,0.00272,0.115,0.296,90.0
4,5MFzQMkrl1FOOng9tq6R9r,Don't Wanna Know,04gDigrS5kc9YWfZHwBETP,Maroon 5,0fvTn3WXF39kQs9i3bnNpP,214480,2016-10-11,0,0.783,0.623,7.0,-6.126,1.0,0.08,0.338,0.0,0.0975,0.447,100.048


In [5]:
# merge the charts and tracks dataset
streams_df = charts_df.merge(tracks_df, on='track_id', how='left')
streams_df = streams_df.drop(columns='track_name_y')
streams_df = streams_df.rename(columns={'track_name_x': 'track_name'})
streams_df['date']=pd.to_datetime(streams_df['date'])
streams_df.set_index("date", inplace=True)

streams_df.head()

Unnamed: 0_level_0,position,track_id,track_name,artist,streams,artist_id,artist_name,album_id,duration,release_date,...,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,1,0kN8xEmgMW9mh7UmDYHlJP,Versace on the Floor,Bruno Mars,185236,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,4PgleR09JVnm3zY1fW3XBA,261240.0,2016-11-17,...,0.574,2.0,-6.209,1.0,0.0454,0.196,0.0,0.083,0.301,174.152
2017-01-01,1,0kN8xEmgMW9mh7UmDYHlJP,Versace on the Floor,Bruno Mars,185236,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,4PgleR09JVnm3zY1fW3XBA,261240.0,2016-11-17,...,0.574,2.0,-6.209,1.0,0.0454,0.196,0.0,0.083,0.301,174.152
2017-01-01,2,5uCax9HTNlzGybIStD3vDh,Say You Won't Let Go,James Arthur,180552,4IWBUUAFIplrNtaOHcJPRM,James Arthur,7oiJYvEJHsmYtrgviAVIBD,211466.0,2016-10-28,...,0.557,10.0,-7.398,1.0,0.059,0.695,0.0,0.0902,0.494,85.043
2017-01-01,2,5uCax9HTNlzGybIStD3vDh,Say You Won't Let Go,James Arthur,180552,4IWBUUAFIplrNtaOHcJPRM,James Arthur,7oiJYvEJHsmYtrgviAVIBD,211466.0,2016-10-28,...,0.557,10.0,-7.398,1.0,0.059,0.695,0.0,0.0902,0.494,85.043
2017-01-01,3,7BKLCZ1jbUBVqRi2FVlTVw,Closer,The Chainsmokers,158720,69GGBxA162lTqCwzJG5jLp,The Chainsmokers,0rSLgV8p5FzfnqlEk4GzxE,244960.0,2016-07-29,...,0.524,8.0,-5.599,1.0,0.0338,0.414,0.0,0.111,0.661,95.01


In [6]:
tracks_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0kN8xEmgMW9mh7UmDYHlJP,Versace on the Floor,0du5cEVh5yTK9QJze8zA0C,Bruno Mars,4PgleR09JVnm3zY1fW3XBA,261240,2016-11-17,75,0.578,0.574,2.0,-6.209,1.0,0.0454,0.196,0.0,0.083,0.301,174.152
1,5uCax9HTNlzGybIStD3vDh,Say You Won't Let Go,4IWBUUAFIplrNtaOHcJPRM,James Arthur,7oiJYvEJHsmYtrgviAVIBD,211466,2016-10-28,84,0.358,0.557,10.0,-7.398,1.0,0.059,0.695,0.0,0.0902,0.494,85.043
2,7BKLCZ1jbUBVqRi2FVlTVw,Closer,69GGBxA162lTqCwzJG5jLp,The Chainsmokers,0rSLgV8p5FzfnqlEk4GzxE,244960,2016-07-29,84,0.748,0.524,8.0,-5.599,1.0,0.0338,0.414,0.0,0.111,0.661,95.01
3,2rizacJSyD9S1IQUxUxnsK,All We Know,69GGBxA162lTqCwzJG5jLp,The Chainsmokers,0xmaV6EtJ4M3ebZUPRnhyb,194080,2016-09-29,69,0.662,0.586,0.0,-8.821,1.0,0.0307,0.097,0.00272,0.115,0.296,90.0
4,5MFzQMkrl1FOOng9tq6R9r,Don't Wanna Know,04gDigrS5kc9YWfZHwBETP,Maroon 5,0fvTn3WXF39kQs9i3bnNpP,214480,2016-10-11,0,0.783,0.623,7.0,-6.126,1.0,0.08,0.338,0.0,0.0975,0.447,100.048


In [7]:
artist_dict=pd.Series(tracks_df.artist_id.values,index=tracks_df.artist_name).to_dict()

#arr_df['track_id'] = arr_df['track_id'].apply(lambda x:bnb_tracks_dict.get(x))
artist_dict.get("Taylor Swift")


'06HL4z0CvFAxyc27GXpf02'

In [8]:
artist_dict

{'Bruno Mars': '0du5cEVh5yTK9QJze8zA0C',
 'James Arthur': '4IWBUUAFIplrNtaOHcJPRM',
 'The Chainsmokers': '69GGBxA162lTqCwzJG5jLp',
 'Maroon 5': '04gDigrS5kc9YWfZHwBETP',
 'Hailee Steinfeld': '5p7f24Rk5HkUZsaS3BLG5F',
 'Alessia Cara': '2wUjUUtkb5lvLKcGKsKqsR',
 'DJ Snake': '540vIaP2JwjQb9dm3aArA4',
 'The Weeknd': '1Xyo4u8uXC1ZmMpatF05PJ',
 'Dawin': '46GXASE9LHzyssNqKOInUu',
 'Major Lazer': '738wLrAtLtCtFOLvQBXOXp',
 'Jonas Blue': '1HBjj22wzbscIZ9sEb5dyf',
 'Machine Gun Kelly': '6TIYQ3jFPwQSRmorSezPxX',
 'ZAYN': '5ZsFI1h6hIdQRw2ti0hz81',
 'The Vamps': '7gAppWoH7pcYmphCVTXkzs',
 'Martin Garrix': '60d24wfXkVzDSfLS6hyCjZ',
 'Ariana Grande': '66CXWjxzNUsdJxJ2JdwvnR',
 'Starley': '02A3cEvlLLCbIMVDrK2GHV',
 'Andy Grammer': '2oX42qP5ineK3hrhBECLmj',
 'Shawn Mendes': '7n2wHs1TKAczGzO7Dd2rGr',
 'Clean Bandit': '6MDME20pz9RveH9rEXvrOM',
 'SUD': '0jKQrxK2zHqxyOAyIMrUQt',
 'Charlie Puth': '6VuMaDnrHyPL1p4EHjYLi7',
 'Steve Aoki': '77AiFEVeAVj2ORpC85QVJs',
 'Little Mix': '3e7awlrlDSwF3iM0WBjGMp',
 'gn

## Setup Spotipy credentials and query wrapper

In [62]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Artist->Album List

In [10]:
results = sp.artist_albums(artist_dict.get("Ben&Ben"), album_type='album')
#results = sp.artist_albums('4DAcJXcjX0zlQAZAPAx4Zb', album_type='album')


In [11]:
albums_list=[ [r['name'],r['id']] for r in results['items']]
albums_list

[['Pebble House, Vol. 1: Kuwaderno', '0UeeygFdIo8k7PgxpLEGWI'],
 ['LIMASAWA STREET', '7xCcuTA3abKwxj8HwgxP7R']]

### Album List->Tracks

In [12]:
tracks_list=[]

for n,i in enumerate(album_list):
    ii=i[1]
    print("Getting track data for album %s :..." % (album_list[n]), end='' )
    try:
        results=sp.album(ii)
        tracks_in_loop=[ [r['name'],r['id']] for r in results['tracks']['items']]
        tracks_list.append(tracks_in_loop)
        print("   DONE")
    except:
        print("   Aborted")
    #track_list
    
tracks_list

NameError: name 'album_list' is not defined

In [None]:
tracks_list[0]

## Artist-> Top10 Tracks

In [None]:
results=sp.artist_top_tracks(artist_dict.get("John Roa"), country='PH')
#results=sp.artist_top_tracks('7tNO3vJC9zlHy2IJOx34ga', country='PH')


In [None]:
top_tracks_list=[ [r['name'],r['id']] for r in results['tracks']]
top_tracks_list

## Artist->Similar Artists

In [None]:
#Get Spotify catalog information about artists similar to an identified artist. 
#Similarity is based on analysis of the Spotify community’s listening history.

results=sp.artist_related_artists(artist_dict.get("I Belong to the Zoo"))
#results=sp.artist_related_artists("0vnvkzyrIHCyhTUdp3yjcu")


In [None]:
results['artists'][0]['name']
[ [r['name'],r['id']] for r in results['artists']]

## Track ID-> Audio Features

In [66]:
def get_track_data(t_id):                    

    track_features = sp.audio_features(t_id)
    relevant_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',\
                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']  
    #print(track_features)
    tf_data = pd.DataFrame(track_features)
    tf_data = tf_data[relevant_cols]
    return tf_data

In [67]:
get_track_data('0HAG6nIiEKd75yGpovuSJQ')

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.429,0.464,6,-8.445,0,0.0719,0.783,0,0.0846,0.375,121.17


# Download Artist + Mainstay Top 10 Tracks With Audio Features

In [21]:
tracks_dict=pd.Series(tracks_df.track_id.values,index=(tracks_df.track_name)).to_dict()
artist_dict=pd.Series(tracks_df.artist_name.values,index=(tracks_df.artist_id)).to_dict()

top5_names=["Arthur Nery", "Adie","Zack Tabudlo","Ben&Ben","NOBITA"]
artist_name=["I Belong to the Zoo"]
comb_names=top5_names+artist_name

In [53]:
tracks_list=[]
tracks_df=pd.DataFrame()

for i in comb_names:
    print(i)
    results=sp.artist_top_tracks(artist_dict.get(i), country='PH')
    top_tracks_list= [ [r['name'],r['id'],i] for r in results['tracks']]
    top_tracks_df=pd.DataFrame(top_tracks_list, columns = ['track_name', 'track_id',"artist_name"])
    
    print(top_tracks_list)
    print("")
    #tracks_list.append(top_tracks_list)
    tracks_df=pd.concat([tracks_df,top_tracks_df])

Arthur Nery
[['Isa lang', '0HAG6nIiEKd75yGpovuSJQ', 'Arthur Nery'], ['Pagsamo', '0WZqHCoOBXqgCCIsUZLC2a', 'Arthur Nery'], ['Higa', '7gfrASeOt73YWvpIv1fU8E', 'Arthur Nery'], ['Happy w u', '0EA8jl8Wje9ALRKC8Qlzyt', 'Arthur Nery'], ['Binhi', '092cPwFlwBXdB3QF3X469W', 'Arthur Nery'], ['TAKE ALL THE LOVE', '2v6jmF6VQWS96x6tSg05IC', 'Arthur Nery'], ['Sinag (feat. Sam Benwick)', '5DARNIgi6aaJxGD5SKAUal', 'Arthur Nery'], ['Life Puzzle', '42A9RtOizRYgILd4muMvto', 'Arthur Nery'], ['Cotton Candy', '1t19LlMX4aomJ1U9MDzan8', 'Arthur Nery'], ['Hung Up', '5YHXL4ggZEHZUgQBgLF0Oz', 'Arthur Nery']]

Adie
[['Mahika', '47sq83n5WN22ZzYnGn8aTy', 'Adie'], ['Paraluman', '2jbf9EytR7fzpSrPWAYCf9', 'Adie'], ['Tahanan', '5CUQnKjA6nlteCnxMKsjIu', 'Adie'], ['Dungaw', '3XlzKAS2zostN3UiBJvLW6', 'Adie'], ['Kabado', '7oLe1cVRqLpoH2NozJ78mh', 'Adie'], ["You'll Be Safe Here", '4whLOE7hwNIAykoMcSZmvO', 'Adie'], ['Luha', '2f0T6IDfPT9sNJqqJxEOTR', 'Adie'], ['Tinatangi (feat. Chrstn)', '5Pgssd39LAfVsR9gvUVwU7', 'Adie'], ['Sa

In [58]:
sp.audio_features('0HAG6nIiEKd75yGpovuSJQ')

[{'danceability': 0.429,
  'energy': 0.464,
  'key': 6,
  'loudness': -8.445,
  'mode': 0,
  'speechiness': 0.0719,
  'acousticness': 0.783,
  'instrumentalness': 0,
  'liveness': 0.0846,
  'valence': 0.375,
  'tempo': 121.17,
  'type': 'audio_features',
  'id': '0HAG6nIiEKd75yGpovuSJQ',
  'uri': 'spotify:track:0HAG6nIiEKd75yGpovuSJQ',
  'track_href': 'https://api.spotify.com/v1/tracks/0HAG6nIiEKd75yGpovuSJQ',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0HAG6nIiEKd75yGpovuSJQ',
  'duration_ms': 269256,
  'time_signature': 3}]

In [54]:
tracks_df

Unnamed: 0,track_name,track_id,artist_name
0,Isa lang,0HAG6nIiEKd75yGpovuSJQ,Arthur Nery
1,Pagsamo,0WZqHCoOBXqgCCIsUZLC2a,Arthur Nery
2,Higa,7gfrASeOt73YWvpIv1fU8E,Arthur Nery
3,Happy w u,0EA8jl8Wje9ALRKC8Qlzyt,Arthur Nery
4,Binhi,092cPwFlwBXdB3QF3X469W,Arthur Nery
5,TAKE ALL THE LOVE,2v6jmF6VQWS96x6tSg05IC,Arthur Nery
6,Sinag (feat. Sam Benwick),5DARNIgi6aaJxGD5SKAUal,Arthur Nery
7,Life Puzzle,42A9RtOizRYgILd4muMvto,Arthur Nery
8,Cotton Candy,1t19LlMX4aomJ1U9MDzan8,Arthur Nery
9,Hung Up,5YHXL4ggZEHZUgQBgLF0Oz,Arthur Nery


In [68]:
af_df=pd.DataFrame()

for i in tracks_df.track_id:
    print(i)
    data=get_track_data(i)
    data["track_id"]=i
    af_df=pd.concat([af_df,data])

In [70]:
tracks_df_af=pd.merge(tracks_df,af_df, on="track_id", how="left")
tracks_df_af

Unnamed: 0,track_name,track_id,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Isa lang,0HAG6nIiEKd75yGpovuSJQ,Arthur Nery,0.429,0.464,6,-8.445,0,0.0719,0.783,0.0,0.0846,0.375,121.17
1,Pagsamo,0WZqHCoOBXqgCCIsUZLC2a,Arthur Nery,0.644,0.315,6,-9.382,1,0.0339,0.706,0.00162,0.0962,0.356,60.001
2,Higa,7gfrASeOt73YWvpIv1fU8E,Arthur Nery,0.656,0.336,9,-10.538,1,0.0434,0.453,0.0,0.106,0.182,125.058
3,Happy w u,0EA8jl8Wje9ALRKC8Qlzyt,Arthur Nery,0.596,0.182,6,-16.004,1,0.0378,0.903,0.0,0.0909,0.268,111.734
4,Binhi,092cPwFlwBXdB3QF3X469W,Arthur Nery,0.46,0.311,5,-10.235,1,0.0319,0.961,0.000819,0.189,0.22,124.193
5,TAKE ALL THE LOVE,2v6jmF6VQWS96x6tSg05IC,Arthur Nery,0.491,0.486,5,-6.588,1,0.0347,0.285,0.0,0.0976,0.262,82.411
6,Sinag (feat. Sam Benwick),5DARNIgi6aaJxGD5SKAUal,Arthur Nery,0.54,0.287,9,-12.92,1,0.0314,0.876,0.00012,0.0803,0.215,75.02
7,Life Puzzle,42A9RtOizRYgILd4muMvto,Arthur Nery,0.719,0.234,4,-13.983,1,0.0884,0.586,0.0,0.0785,0.362,93.924
8,Cotton Candy,1t19LlMX4aomJ1U9MDzan8,Arthur Nery,0.473,0.371,1,-10.269,0,0.128,0.697,0.0,0.103,0.292,179.986
9,Hung Up,5YHXL4ggZEHZUgQBgLF0Oz,Arthur Nery,0.549,0.36,11,-10.244,1,0.0549,0.771,0.000113,0.691,0.258,116.667


In [73]:
#tracks_df_af.to_csv('data_sp/tracks_data.csv')