In [1]:
# Import libraries

import pandas as pd
import numpy as np
import os

In [2]:
# Importing Spotify libraries

import spotipy
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
import time

In [3]:
# Getting token for Spotify authorization
auth_manager = SpotifyClientCredentials(client_id = 'xxxxxxxxxx',
                                        client_secret = 'xxxxxxxx')

sp = spotipy.Spotify(auth_manager=auth_manager)

In [4]:
path = r'/Users/joefeuille/CF Analysis/Spotify Analysis'

In [5]:
# Bringing in au_appended as df

df = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'au_appended.csv'), index_col = False)

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,Position,Track Name,Artist,Streams,URL,Week,Country,Country_Code
0,1,1,STAY (with Justin Bieber),The Kid LAROI,2031338,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/30/21,Australia,AU
1,2,2,INDUSTRY BABY (feat. Jack Harlow),Lil Nas X,1506943,https://open.spotify.com/track/5Z9KJZvQzH6PFmb...,09/30/21,Australia,AU
2,3,3,Heat Waves,Glass Animals,1388362,https://open.spotify.com/track/02MWAaffLxlfxAU...,09/30/21,Australia,AU
3,4,4,Cold Heart - PNAU Remix,Elton John,1382069,https://open.spotify.com/track/6zSpb8dQRaw0M1d...,09/30/21,Australia,AU
4,5,5,Bad Habits,Ed Sheeran,1315108,https://open.spotify.com/track/6PQ88X9TkUIAUIZ...,09/30/21,Australia,AU


In [7]:
df['spotify_id'] = df['URL'].str[-22:]

In [8]:
df.drop(columns='Unnamed: 0')

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Week,Country,Country_Code,spotify_id
0,1,STAY (with Justin Bieber),The Kid LAROI,2031338,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/30/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX
1,2,INDUSTRY BABY (feat. Jack Harlow),Lil Nas X,1506943,https://open.spotify.com/track/5Z9KJZvQzH6PFmb...,09/30/21,Australia,AU,5Z9KJZvQzH6PFmb8SNkxuk
2,3,Heat Waves,Glass Animals,1388362,https://open.spotify.com/track/02MWAaffLxlfxAU...,09/30/21,Australia,AU,02MWAaffLxlfxAUY7c5dvx
3,4,Cold Heart - PNAU Remix,Elton John,1382069,https://open.spotify.com/track/6zSpb8dQRaw0M1d...,09/30/21,Australia,AU,6zSpb8dQRaw0M1dK8PBwQz
4,5,Bad Habits,Ed Sheeran,1315108,https://open.spotify.com/track/6PQ88X9TkUIAUIZ...,09/30/21,Australia,AU,6PQ88X9TkUIAUIZJHW2upE
...,...,...,...,...,...,...,...,...,...
1995,196,DON'T LEAVE ME (feat. G Herbo & Lil Durk),The Kid LAROI,212154,https://open.spotify.com/track/4sbyUpMYCeJXwZi...,07/29/21,Australia,AU,4sbyUpMYCeJXwZicQV9YrE
1996,197,Tell The Vision (feat. Kanye West & Pusha T),Pop Smoke,211907,https://open.spotify.com/track/2UwALqx6yOsXTFt...,07/29/21,Australia,AU,2UwALqx6yOsXTFt7zRxnts
1997,198,Shut Up and Dance,WALK THE MOON,211628,https://open.spotify.com/track/4kbj5MwxO1bq9wj...,07/29/21,Australia,AU,4kbj5MwxO1bq9wjT5g9HaA
1998,199,Whoopty,CJ,211527,https://open.spotify.com/track/1t9WgS8FN0534tL...,07/29/21,Australia,AU,1t9WgS8FN0534tLBRwbaxO


# Now, we'll create a new df for only unique track ids

In [9]:
df_id = df.filter(['Track Name', 'Country_Code', 'spotify_id'])

In [10]:
df_id.head()

Unnamed: 0,Track Name,Country_Code,spotify_id
0,STAY (with Justin Bieber),AU,5PjdY0CKGZdEuoNab3yDmX
1,INDUSTRY BABY (feat. Jack Harlow),AU,5Z9KJZvQzH6PFmb8SNkxuk
2,Heat Waves,AU,02MWAaffLxlfxAUY7c5dvx
3,Cold Heart - PNAU Remix,AU,6zSpb8dQRaw0M1dK8PBwQz
4,Bad Habits,AU,6PQ88X9TkUIAUIZJHW2upE


In [11]:
df_id.shape

(2000, 3)

In [12]:
id_unique = df_id.drop_duplicates(subset=['spotify_id'])

In [13]:
id_unique.shape

(388, 3)

In [14]:
id_list = id_unique['spotify_id'].to_list()

# Defining a func that will get the features of tracks with a return statement

In [15]:
def getTrackFeatures(id):
    track_info = sp.track(id)
    features_info = sp.audio_features(id)
    
    # Track info
    name = track_info['name']
    album = track_info['album']['name']
    artist = track_info['album']['artists'][0]['name']
    release_date = track_info['album']['release_date']
    length = track_info['duration_ms']
    popularity = track_info['popularity']

    # Track features
    acousticness = features_info[0]['acousticness']
    danceability = features_info[0]['danceability']
    energy = features_info[0]['energy']
    instrumentalness = features_info[0]['instrumentalness']
    liveness = features_info[0]['liveness']
    loudness = features_info[0]['loudness']
    speechiness = features_info[0]['speechiness']
    tempo = features_info[0]['tempo']
    time_signature = features_info[0]['time_signature']

    track_data = [name, album, artist, release_date, length, popularity, acousticness, danceability, energy,
                    instrumentalness, liveness, loudness, speechiness, tempo, time_signature]

    return track_data

# creating a loop that calls the Spotify API and returns track features

In [16]:
# using time.sleep() to add a delay to the loop and prevent sending too many reqs at once

track_ids = id_list

track_list = []
for i in range(len(track_ids)):
    time.sleep(3)
    track_data = getTrackFeatures(track_ids[i])
    track_list.append(track_data)

song_info = pd.DataFrame(track_list, columns = ['Track Name', 'Album', 'Artist', 'Release_date', 'Length',
                                                       'Popularity', 'Acousticness', 'Danceability', 'Energy',
                                                       'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness',
                                                       'Tempo', 'Time_Signature'])

song_info.head(10)

Unnamed: 0,Track Name,Album,Artist,Release_date,Length,Popularity,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Time_Signature
0,STAY (with Justin Bieber),F*CK LOVE 3: OVER YOU,The Kid LAROI,2021-07-23,141805,93,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4
1,INDUSTRY BABY (feat. Jack Harlow),MONTERO,Lil Nas X,2021-09-17,212352,88,0.0221,0.741,0.691,0.0,0.0476,-7.395,0.0672,150.087,4
2,Heat Waves,Dreamland (+ Bonus Levels),Glass Animals,2020-08-06,238805,89,0.44,0.761,0.525,7e-06,0.0921,-6.9,0.0944,80.87,4
3,Cold Heart - PNAU Remix,Cold Heart (PNAU Remix),Elton John,2021-08-13,202735,92,0.034,0.796,0.798,4.2e-05,0.0952,-6.312,0.0317,116.032,4
4,Bad Habits,Bad Habits,Ed Sheeran,2021-06-25,231041,97,0.0469,0.808,0.897,3.1e-05,0.364,-3.712,0.0348,126.026,4
5,Shivers,Shivers,Ed Sheeran,2021-09-13,207853,93,0.281,0.788,0.859,0.0,0.0424,-2.724,0.0856,141.02,4
6,THATS WHAT I WANT,MONTERO,Lil Nas X,2021-09-17,143901,93,0.00614,0.737,0.846,0.0,0.0486,-4.51,0.22,87.981,4
7,good 4 u,SOUR,Olivia Rodrigo,2021-05-21,178146,96,0.335,0.563,0.664,0.0,0.0849,-5.044,0.154,166.928,4
8,Beggin',Chosen,Måneskin,2017-12-08,211560,96,0.127,0.714,0.8,0.0,0.359,-4.808,0.0504,134.002,4
9,MONTERO (Call Me By Your Name),MONTERO,Lil Nas X,2021-09-17,137704,88,0.293,0.593,0.503,0.0,0.405,-6.725,0.22,178.781,4


In [17]:
song_info.shape

(388, 15)

## Success!
## Now we'll merge the track data onto our Japan data set, and we've got one seventh of our work done!

In [18]:
df_complete = df.merge(song_info, on = ['Track Name', 'Artist'], indicator=True, how='outer')

In [19]:
df_complete.head()

Unnamed: 0.1,Unnamed: 0,Position,Track Name,Artist,Streams,URL,Week,Country,Country_Code,spotify_id,...,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Time_Signature,_merge
0,1,1,STAY (with Justin Bieber),The Kid LAROI,2031338,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/30/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,...,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4,both
1,1,1,STAY (with Justin Bieber),The Kid LAROI,2260974,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/23/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,...,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4,both
2,1,1,STAY (with Justin Bieber),The Kid LAROI,2277839,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/16/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,...,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4,both
3,1,1,STAY (with Justin Bieber),The Kid LAROI,2367836,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/09/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,...,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4,both
4,1,1,STAY (with Justin Bieber),The Kid LAROI,2535418,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/02/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,...,0.0383,0.591,0.764,0.0,0.103,-5.484,0.0483,169.928,4,both


In [21]:
df_complete.drop(columns='Unnamed: 0')

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Week,Country,Country_Code,spotify_id,Album,...,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Time_Signature,_merge
0,1,STAY (with Justin Bieber),The Kid LAROI,2031338,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/30/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,F*CK LOVE 3: OVER YOU,...,0.0383,0.591,0.764,0.000000,0.1030,-5.484,0.0483,169.928,4,both
1,1,STAY (with Justin Bieber),The Kid LAROI,2260974,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/23/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,F*CK LOVE 3: OVER YOU,...,0.0383,0.591,0.764,0.000000,0.1030,-5.484,0.0483,169.928,4,both
2,1,STAY (with Justin Bieber),The Kid LAROI,2277839,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/16/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,F*CK LOVE 3: OVER YOU,...,0.0383,0.591,0.764,0.000000,0.1030,-5.484,0.0483,169.928,4,both
3,1,STAY (with Justin Bieber),The Kid LAROI,2367836,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/09/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,F*CK LOVE 3: OVER YOU,...,0.0383,0.591,0.764,0.000000,0.1030,-5.484,0.0483,169.928,4,both
4,1,STAY (with Justin Bieber),The Kid LAROI,2535418,https://open.spotify.com/track/5PjdY0CKGZdEuoN...,09/02/21,Australia,AU,5PjdY0CKGZdEuoNab3yDmX,F*CK LOVE 3: OVER YOU,...,0.0383,0.591,0.764,0.000000,0.1030,-5.484,0.0483,169.928,4,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2085,191,Dynamite,BTS,213912,https://open.spotify.com/track/4saklk6nie3yiGe...,07/29/21,Australia,AU,4saklk6nie3yiGePpBwUoc,BE,...,0.0112,0.746,0.765,0.000000,0.0936,-4.410,0.0993,114.044,4,both
2086,193,Mood Swings (feat. Lil Tjay),Pop Smoke,213022,https://open.spotify.com/track/5rZlwNFl01HqLWB...,07/29/21,Australia,AU,5rZlwNFl01HqLWBQGryKSm,Shoot For The Stars Aim For The Moon,...,0.4870,0.480,0.602,0.000000,0.0848,-6.335,0.3680,179.798,4,both
2087,196,DON'T LEAVE ME (feat. G Herbo & Lil Durk),The Kid LAROI,212154,https://open.spotify.com/track/4sbyUpMYCeJXwZi...,07/29/21,Australia,AU,4sbyUpMYCeJXwZicQV9YrE,F*CK LOVE 3: OVER YOU,...,0.0917,0.639,0.727,0.000000,0.1110,-5.230,0.0901,75.992,4,both
2088,197,Tell The Vision (feat. Kanye West & Pusha T),Pop Smoke,211907,https://open.spotify.com/track/2UwALqx6yOsXTFt...,07/29/21,Australia,AU,2UwALqx6yOsXTFt7zRxnts,Faith,...,0.2510,0.662,0.686,0.000006,0.3490,-8.302,0.3320,143.971,4,both


In [22]:
df_complete.shape

(2090, 24)

In [25]:
df_complete.value_counts(['_merge'])

_merge    
both          2090
left_only        0
right_only       0
dtype: int64

In [26]:
# Exporting df_complete as pkl for future append of entire data set

df_complete.to_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'AU_complete.pkl'))