### Collect track analysis data from Spotify API using spotipy library

In [1]:
%load_ext autoreload

In [2]:
from cap_package import SpotipyCollect as sc
from dotenv import load_dotenv
import numpy as np
import os
from pathlib import Path
%autoreload 2

Downloading emoji data ...
... OK (Got response in 0.20 seconds)
Writing emoji data to C:\Users\Administrator\.demoji\codes.json ...
... OK


In [3]:
load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
REDIRECT_URI = os.getenv('REDIRECT_URI')
USERNAME = os.getenv('SPOTIFY_USERNAME')
SCOPE = 'playlist-read-private'
PATH = os.getenv('PATH_DATASET1.2')

Hierachy of SpotipyCollect(sc) package:
- spotipy_userauth
- Using USER's playlist: get_pl_details >  playlist_id_url > extract_playlists
- create_dataset 

 > get_folder_analysis
   >> get_playlist_analysis 
      >>> get_segments
          >>>> extract_tracks_analysis
               >>>>> extract_tracks
               
Reduntant - tracks_analysis, track_genre

In [4]:
# get spotify authorization 
sp = sc.spotipy_userauth(USERNAME, SCOPE, CLIENT_ID, CLIENT_SECRET, REDIRECT_URI)

In [5]:
# get user playlist details - specifically : name, id, url and total number of tracks
pl_Name_, pl_ID_, pl_URL_, pltot_Tracks_ = sc.get_pl_details(sp, USERNAME)
print('Sample playlist names:', pl_Name_[:3])

Sample playlist names: ["Today's Top Hits", 'Deep house', 'Progressive House']


In [6]:
# Filter, sort and choose relevant playlists
filsort_pl = sc.filtersort_playlists(pl_Name_, pl_ID_, pl_URL_, pltot_Tracks_, start=1, pl_range = 18)[2:-2]
print("Let's see a sample:\n")
filsort_pl[0]

Let's see a sample:



(50,
 'That familiar trance',
 '3PH2J5HkKhhMoxWj3W0jk8',
 'https://api.spotify.com/v1/playlists/3PH2J5HkKhhMoxWj3W0jk8/tracks')

In [7]:
# Get audio analysis of all playlists
folder_analysis_dict = sc.get_folder_analysis(sp, filsort_pl, segments=True, sections=True, tempo=False)

Name of all playlists:

In [8]:
folder_analysis_dict.keys()

dict_keys(['That familiar trance', 'House-Trance', 'Our old school trance', 'Classic progressive', 'Progressive 4', 'Deep house', 'Progressive 2', 'Our old school trance 2', 'Progressive 3', 'Progressive House', 'Progressive 5', 'Our old school trance 138', 'Progressive 1'])

Track names are present as keys for a playlist. They are a combination of track name and the first 3 characters of the contributing artist/s name.

In [9]:
folder_analysis_dict['Progressive 1'].keys()

dict_keys(['London_Kat', 'Burns - Lane 8 Club Mix_Geo', 'Boxed Out_Cub', 'Sunday Maybe_Way', 'Tokyo Night Train - RMX_Cla', 'Kids - RMX_PRO', 'Chanunpa_Sou', 'Breathe_Vin', 'Ascension_Vin', 'Come Home - Mixed_Vin', '8 Bit Eclipse_Qui', 'Iridescent - Forerunners Dub Remix_Sou', "Cristiano - Peter Illias 'Back to Love' Remix_Tim", 'Told You_Cla', 'Campfire 2017 - Sons of Maria Retouch_Din', 'Mimi_Esk', 'Kiwi_Har'])

For a track we have two 2 dataframes: 
- Segments : consisting of atleast 100 segments (rows) chosen based on certain criteria.
- Section : consisting of all sections

In [10]:
print('\nSample track - segments dataframe : \n')
folder_analysis_dict['Progressive 1']['London_Kat'][0]


Sample track - segments dataframe : 



Unnamed: 0,start,start_minute,duration,confidence,pitches,timbre
3,0.54567,00:00:55,0.37737,1.000,"[0.099, 0.135, 0.145, 0.212, 0.289, 0.555, 0.8...","[44.69, 72.975, -59.469, 39.529, 15.246, -114...."
5,1.02172,00:01:02,0.51043,0.870,"[0.11, 0.144, 0.139, 0.197, 0.268, 0.539, 0.83...","[43.2, 74.587, -21.919, 47.153, 28.937, -46.45..."
6,1.53215,00:01:53,0.35451,0.986,"[0.743, 1.0, 0.319, 0.137, 0.083, 0.043, 0.033...","[42.67, 29.638, -49.407, 39.926, -16.55, -139...."
10,2.51320,00:02:51,0.36576,1.000,"[0.711, 1.0, 0.322, 0.136, 0.084, 0.048, 0.031...","[41.307, 13.913, -57.769, 67.452, -24.724, -13..."
26,4.97488,00:04:97,0.34834,1.000,"[0.717, 1.0, 0.322, 0.134, 0.077, 0.035, 0.027...","[39.028, -28.523, -92.945, 109.523, -32.092, -..."
...,...,...,...,...,...,...
1619,285.53516,04:45:54,0.25610,0.968,"[1.0, 0.187, 0.062, 0.066, 0.241, 0.781, 0.125...","[39.551, 179.46, -10.525, -0.761, 101.805, -17..."
1648,290.70804,04:50:71,0.25034,1.000,"[0.694, 1.0, 0.482, 0.195, 0.155, 0.083, 0.046...","[39.112, -35.085, -152.116, 138.637, -28.081, ..."
1652,291.44482,04:51:44,0.25002,0.991,"[1.0, 0.291, 0.119, 0.119, 0.219, 0.61, 0.161,...","[32.912, 313.575, -71.716, 17.621, 50.437, -41..."
1680,297.84244,04:57:84,0.34844,0.707,"[0.373, 0.411, 0.426, 0.518, 0.469, 0.588, 0.5...","[31.054, 331.479, 38.327, 84.965, 88.189, -35...."


In [11]:
print('Sample track - section dataframe : \n')
folder_analysis_dict['Progressive 1']['London_Kat'][1]

Sample track - section dataframe : 



Unnamed: 0,start,duration,confidence,loudness,tempo,tempo_confidence,key,key_confidence,mode,mode_confidence,time_signature,time_signature_confidence
0,0.0,16.67468,1.0,-14.333,121.915,0.562,1,0.0,1,0.0,4,1.0
1,16.67468,15.23954,1.0,-13.096,122.075,0.567,5,0.008,0,0.196,4,1.0
2,31.91422,47.70385,0.753,-9.988,122.069,0.452,0,0.0,1,0.0,4,1.0
3,79.61807,23.12073,0.37,-12.096,122.162,0.428,10,0.356,1,0.364,4,1.0
4,102.7388,24.09149,0.563,-9.533,122.075,0.344,1,0.0,1,0.0,4,1.0
5,126.83029,31.47287,0.494,-7.806,122.149,0.237,5,0.609,0,0.608,4,1.0
6,158.30315,47.2172,0.491,-7.876,121.989,0.198,5,0.495,0,0.538,4,1.0
7,205.52036,53.72078,0.529,-9.484,121.909,0.485,5,0.915,0,0.666,4,1.0
8,259.24115,14.755,0.219,-12.174,122.047,0.801,0,0.03,1,0.359,4,1.0
9,273.99615,28.92228,0.252,-13.912,122.036,0.82,7,0.041,0,0.464,4,1.0


In [12]:
# Confirm if there are any tracks with missing segments or sections
mis_secs = []
mis_segs = []
for pl, tracks in folder_analysis_dict.items():
    
    for track, dfs in tracks.items():
        if dfs[0].isnull().sum().sum() > 0:
            mis_segs.append((pl, track))
        if dfs[1].isnull().sum().sum() > 0:
            mis_secs.append((pl, track))
# Should be empty
print('missing segments :', mis_segs)
print('missing sections :', mis_secs)

missing segments : []
missing sections : []


In [None]:
# Define path to the dataset directory
#path = Path(PATH).joinpath('user_playlists')

In [None]:
# Create dataset and save them as parquet files
#sc.create_dataset(folder_analysis_dict, path)