# Playlists from Spotify 

## Libraries and dependencies 

In [1]:
import requests
import base64

In [2]:
import pandas as pd

## Credentials and token

In [3]:

# Set up credentials
#client_id = 'your_client_id'
#client_secret = 'your_client_secret'
#creds = f'{client_id}:{client_secret}'
#creds_b64 = base64.b64encode(creds.encode()).decode()


In [4]:
# Get access token
#auth_url = 'https://accounts.spotify.com/api/token'
#auth_headers = {'Authorization': f'Basic {creds_b64}'}
#auth_data = {'grant_type': 'client_credentials'}
#auth_response = requests.post(auth_url, headers=auth_headers, data=auth_data)
#auth_response.json()['access_token']

In [5]:
# read a txt file with the access token
path_token = '../data/spotify_data.csv'
with open('path_token', 'r') as f:
    access_token = f.read()


FileNotFoundError: [Errno 2] No such file or directory: 'path_token'

In [1]:
access_token = '{access_token}'


## Requests / Getting data

### Get user's playlists and tracks 

In [6]:
# Get list of playlists
playlists_url = 'https://api.spotify.com/v1/me/playlists'
playlists_headers = {'Authorization': f'Bearer {access_token}'}
playlists_response = requests.get(playlists_url, headers=playlists_headers)
playlists = playlists_response.json()['items']

In [7]:
# Initialize an empty list to store the track details
tracks_list = []

In [8]:
# Loop through each playlist
for playlist in playlists:
    # Make a GET request to retrieve the tracks in the playlist
    tracks_url = playlist['tracks']['href']
    tracks_headers = {'Authorization': f'Bearer {access_token}'}
    tracks_response = requests.get(tracks_url, headers=tracks_headers)
    tracks = tracks_response.json()['items']
    
    # Loop through each track
    for track in tracks:
        # Make a GET request to retrieve the track's details
        track_url = track['track']['href']
        track_headers = {'Authorization': f'Bearer {access_token}'}
        track_response = requests.get(track_url, headers=track_headers)
        track_details = track_response.json()
        
        # Store the track details in a dictionary
        track_dict = {
            'id': track_details['id'],
            'name': track_details['name'],
            'artists': [artist['name'] for artist in track_details['artists']],
            'album': track_details['album']['name'],
            'album_id': track_details['album']['id'],
            'album_artists': [artist['name'] for artist in track_details['album']['artists']],
            'album_release_date': track_details['album']['release_date'],
            'duration_ms': track_details['duration_ms'],
            'popularity': track_details['popularity'],
            'explicit': track_details['explicit'],
            'track_number': track_details['track_number'],
            'disc_number': track_details['disc_number'],
            'is_local': track_details['is_local']
        }

        # Append the dictionary to the list
        tracks_list.append(track_dict)



In [9]:
# Convert the list of dictionaries to a pandas DataFrame
df_tracks = pd.DataFrame(tracks_list)
# DROP DUPLICATES SONGS
df_tracks = df_tracks.drop_duplicates(subset=['id'])

In [11]:
df_tracks

Unnamed: 0,id,name,artists,album,album_id,album_artists,album_release_date,duration_ms,popularity,explicit,track_number,disc_number,is_local
0,2iUXsYOEPhVqEBwsqP70rE,Youngblood,[5 Seconds of Summer],Youngblood (Deluxe),2D0Hi3Jj6RFnpWDcSa0Otu,[5 Seconds of Summer],2018-06-15,203417,82,False,1,1,False
1,2v5lLKdZG0PsXGWfvigk55,Rewind.. (But I Love You),[WHIPPED CREAM],Someone You Can Count On,5wj4dMktjUCPq8BfsQCMR2,[WHIPPED CREAM],2023-03-08,173609,60,False,5,1,False
2,5jA67K9o5mEW5NzjRCdAUg,Breath,[Elohim],Breath,4ZjGSYAVcd7TbZfFNUFWM8,[Elohim],2023-04-07,200339,51,False,1,1,False
3,4461Ozpndhv2AjNqe6d0Ic,Forever - Pauline Herr Rework,"[Alison Wonderland, Pauline Herr]",Loner (Remixes),0MOgYRLnYJNdSd8EsqT4dJ,[Alison Wonderland],2023-03-16,170095,48,False,1,1,False
4,46bI9wmq6kDJJ3yAqTvkzH,BACK ONLINE,"[MEMBA, pluko, EVAN GIIA, Biicla]",BACK ONLINE,4Jo971sLoS2lO4bf9hKe2j,"[MEMBA, pluko, EVAN GIIA]",2023-02-20,217846,52,False,1,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,5yyEGZbHFcFAXo9V8J79Od,Slow Dive,[CLAVVS],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,[CLAVVS],2020-02-25,231143,43,False,4,1,False
568,02qBlMtpcv3t4cKhFjilk5,Devils I Know,[CLAVVS],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,[CLAVVS],2020-02-25,215092,28,False,1,1,False
569,5KhZNvQ9eXo53ZKeQE8G2b,All Your Friends,"[Icarus, Quelle T]",All Your Friends,3eKaMTFMWyFwKIzOKCdJEo,"[Icarus, Quelle T]",2020-10-16,225306,20,False,1,1,False
570,4Bif5TdVlzTiEff3hTwS5W,Enter the Dreamland,[Meresha],Look How Far,4bHyxz0m1csBnNZD1Ta1Jt,[Meresha],2020-09-25,239666,26,False,4,1,False


### Get tracks' audio features 

In [12]:
# get all the unique album ids
tracks_ids = df_tracks['id'].unique()
# now transform into a list 
tracks_ids = tracks_ids.tolist()

In [13]:
audio_features_df = pd.DataFrame(columns=['track_id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])

In [14]:
for track_id in tracks_ids:
    # Make a GET request to retrieve the track's audio features
    audio_features_url = f'https://api.spotify.com/v1/audio-features/{track_id}'
    audio_features_headers = {'Authorization': f'Bearer {access_token}'}
    audio_features_response = requests.get(audio_features_url, headers=audio_features_headers)
    audio_features = audio_features_response.json()

    # Add the audio features to the DataFrame
    new_row = {
        'track_id': track_id,
        'danceability': audio_features['danceability'],
        'energy': audio_features['energy'],
        'key': audio_features['key'],
        'loudness': audio_features['loudness'],
        'mode': audio_features['mode'],
        'speechiness': audio_features['speechiness'],
        'acousticness': audio_features['acousticness'],
        'instrumentalness': audio_features['instrumentalness'],
        'liveness': audio_features['liveness'],
        'valence': audio_features['valence'],
        'tempo': audio_features['tempo'],
        'time_signature': audio_features['time_signature']
    }
    audio_features_df = pd.concat([audio_features_df, pd.DataFrame(new_row, index=[0])], ignore_index=True)


In [15]:
audio_features_df

Unnamed: 0,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,2iUXsYOEPhVqEBwsqP70rE,0.596,0.854,7,-5.114,0,0.4630,0.016900,0,0.1240,0.1520,120.274,4
1,2v5lLKdZG0PsXGWfvigk55,0.668,0.787,6,-8.346,0,0.0418,0.024800,0.795,0.0821,0.0381,130.034,4
2,5jA67K9o5mEW5NzjRCdAUg,0.776,0.916,0,-3.569,1,0.0589,0.045000,0.000733,0.1000,0.2740,118.039,4
3,4461Ozpndhv2AjNqe6d0Ic,0.448,0.521,11,-6.846,0,0.0328,0.198000,0.000003,0.3490,0.4280,139.787,4
4,46bI9wmq6kDJJ3yAqTvkzH,0.713,0.858,4,-5.787,0,0.0710,0.145000,0.0254,0.0914,0.4280,130.031,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,5yyEGZbHFcFAXo9V8J79Od,0.708,0.614,10,-6.101,0,0.0315,0.200000,0.000067,0.1220,0.6000,140.015,4
558,02qBlMtpcv3t4cKhFjilk5,0.489,0.812,6,-3.616,0,0.0398,0.124000,0.0105,0.1590,0.6320,156.113,4
559,5KhZNvQ9eXo53ZKeQE8G2b,0.640,0.766,5,-6.865,0,0.0498,0.256000,0.169,0.1980,0.3750,98.001,4
560,4Bif5TdVlzTiEff3hTwS5W,0.514,0.781,5,-4.240,0,0.0354,0.000421,0.00672,0.1590,0.3090,119.997,4


### Get user's playlists and tracks 

In [16]:
# Merge the audio features DataFrame with the tracks DataFrame
merged_df = pd.merge(df_tracks, audio_features_df, left_on='id', right_on='track_id')

In [17]:
merged_df

Unnamed: 0,id,name,artists,album,album_id,album_artists,album_release_date,duration_ms,popularity,explicit,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,2iUXsYOEPhVqEBwsqP70rE,Youngblood,[5 Seconds of Summer],Youngblood (Deluxe),2D0Hi3Jj6RFnpWDcSa0Otu,[5 Seconds of Summer],2018-06-15,203417,82,False,...,7,-5.114,0,0.4630,0.016900,0,0.1240,0.1520,120.274,4
1,2v5lLKdZG0PsXGWfvigk55,Rewind.. (But I Love You),[WHIPPED CREAM],Someone You Can Count On,5wj4dMktjUCPq8BfsQCMR2,[WHIPPED CREAM],2023-03-08,173609,60,False,...,6,-8.346,0,0.0418,0.024800,0.795,0.0821,0.0381,130.034,4
2,5jA67K9o5mEW5NzjRCdAUg,Breath,[Elohim],Breath,4ZjGSYAVcd7TbZfFNUFWM8,[Elohim],2023-04-07,200339,51,False,...,0,-3.569,1,0.0589,0.045000,0.000733,0.1000,0.2740,118.039,4
3,4461Ozpndhv2AjNqe6d0Ic,Forever - Pauline Herr Rework,"[Alison Wonderland, Pauline Herr]",Loner (Remixes),0MOgYRLnYJNdSd8EsqT4dJ,[Alison Wonderland],2023-03-16,170095,48,False,...,11,-6.846,0,0.0328,0.198000,0.000003,0.3490,0.4280,139.787,4
4,46bI9wmq6kDJJ3yAqTvkzH,BACK ONLINE,"[MEMBA, pluko, EVAN GIIA, Biicla]",BACK ONLINE,4Jo971sLoS2lO4bf9hKe2j,"[MEMBA, pluko, EVAN GIIA]",2023-02-20,217846,52,False,...,4,-5.787,0,0.0710,0.145000,0.0254,0.0914,0.4280,130.031,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,5yyEGZbHFcFAXo9V8J79Od,Slow Dive,[CLAVVS],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,[CLAVVS],2020-02-25,231143,43,False,...,10,-6.101,0,0.0315,0.200000,0.000067,0.1220,0.6000,140.015,4
558,02qBlMtpcv3t4cKhFjilk5,Devils I Know,[CLAVVS],No Saviors (Extended),2zUsbft0MRoEpVvEGtnk4Z,[CLAVVS],2020-02-25,215092,28,False,...,6,-3.616,0,0.0398,0.124000,0.0105,0.1590,0.6320,156.113,4
559,5KhZNvQ9eXo53ZKeQE8G2b,All Your Friends,"[Icarus, Quelle T]",All Your Friends,3eKaMTFMWyFwKIzOKCdJEo,"[Icarus, Quelle T]",2020-10-16,225306,20,False,...,5,-6.865,0,0.0498,0.256000,0.169,0.1980,0.3750,98.001,4
560,4Bif5TdVlzTiEff3hTwS5W,Enter the Dreamland,[Meresha],Look How Far,4bHyxz0m1csBnNZD1Ta1Jt,[Meresha],2020-09-25,239666,26,False,...,5,-4.240,0,0.0354,0.000421,0.00672,0.1590,0.3090,119.997,4


## Export Data

In [18]:
# write to csv in the folder called data 
merged_df.to_csv(f'~/data/spotify_data.csv', index=False)