Take one look in the mirror

Implication so clear

I live life with no fear

Except for the idea

That one day you won't be here

'-' **Tyler, The Creator**  

In [6]:
# Importing necessary libraries

import spotipy
import pandas as pd
import numpy as np
import time
from requests.exceptions import ReadTimeout
from spotipy.exceptions import SpotifyException
from timeit import default_timer as timer
from datetime import timedelta
from pandas.api.types import CategoricalDtype

In [7]:
import configparser
from spotipy.oauth2 import SpotifyClientCredentials

# Create a ConfigParser object
config = configparser.ConfigParser()

# Read the config file
config.read('config.ini')

# Get credentials from the config file
client_id = config.get('credentials', 'Client_ID')
client_secret = config.get('credentials', 'Client_Secret')

client_credentials_manager = SpotifyClientCredentials(
                                client_id = client_id, client_secret = client_secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) 

In [10]:
# function to know how long it'll take to scrape the data
def format_time(seconds):
    minutes = seconds // 60
    seconds = seconds % 60
    if minutes > 0:
        return f"{minutes} minutes, {seconds} seconds"
    else:
        return f"{seconds} seconds"

In [None]:
# start time
start_time = timer()

# For tyler, The Creator as a main artist
artist_name = []
track_name = []
track_id = []
album_name = []
album_id = []
release_date = []
popularity = []
explicit = []
danceability = []
duration_ms = []
energy = []
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
time_signature = []
featured_artists = []

# loop through results, using offset to get all tracks
for i in range(0, 1000, 50):
    try:
        track_results = sp.search(q='artist: Tyler, The Creator', type='track', limit=50, offset=i)
        for i, t in enumerate(track_results['tracks']['items']):
        
            # get track details
            audio_features = sp.audio_features(item['id'])[0]
            if audio_features is not None:
                danceability.append(audio_features['danceability'])
                duration_ms.append(audio_features['duration_ms'])
                energy.append(audio_features['energy'])
                key.append(audio_features['key'])
                loudness.append(audio_features['loudness'])
                mode.append(audio_features['mode'])
                speechiness.append(audio_features['speechiness'])
                acousticness.append(audio_features['acousticness'])
                instrumentalness.append(audio_features['instrumentalness'])
                liveness.append(audio_features['liveness'])
                valence.append(audio_features['valence'])
                tempo.append(audio_features['tempo'])
                time_signature.append(audio_features['time_signature'])
            else:
                # Append NaNs or suitable placeholders if audio features are missing
                danceability.append(float('nan'))
                duration_ms.append(float('nan'))
                energy.append(float('nan'))
                key.append(float('nan'))
                loudness.append(float('nan'))
                mode.append(float('nan'))
                speechiness.append(float('nan'))
                acousticness.append(float('nan'))
                instrumentalness.append(float('nan'))
                liveness.append(float('nan'))
                valence.append(float('nan'))
                tempo.append(float('nan'))
                time_signature.append(float('nan'))

        

            # get featured artists
            if len(t['artists']) > 1:
                feat_artists = []
                for j in range(1, len(t['artists'])):
                    feat_artists.append(t['artists'][j]['name'])
                featured_artists.append(feat_artists)
            else:
                featured_artists.append([])
    except ReadTimeout as e:
        print(f"Error: {e}. Retrying in 5 seconds...")
        time.sleep(5)  # Retry after a short delay
    except SpotifyException as e:
        print(f"Spotify API Error: {e}")
        break

# create dataframe from lists
df_raw = pd.DataFrame({
    'artist_name': artist_name,
    'track_name': track_name,
    'track_id': track_id,
    'album_name': album_name,
    'album_id': album_id,
    'release_date': release_date,
    'duration_ms': duration_ms,
    'popularity': popularity,
    'explicit': explicit,
    'danceability': danceability,
    'energy': energy,
    'key': key,
    'loudness': loudness,
    'mode': mode,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'time_signature': time_signature,
    'featured_artists': featured_artists
})



# end time and print
end_time = timer()
elapsed_time = int(end_time - start_time)
print(f"Elapsed time: {format_time(elapsed_time)}")



In [11]:
import pandas as pd
import time
import requests
from timeit import default_timer as timer


def collect_spotify_data():
    start_time = timer()
    data = []  # Using a list of dictionaries to collect data

    for i in range(0, 1000, 50):
        try:
            track_results = sp.search(q='artist: Tyler, The Creator', type='track', limit=50, offset=i)
            for item in track_results['tracks']['items']:
                track_info = {
                    'artist_name': item['artists'][0]['name'],
                    'track_name': item['name'],
                    'track_id': item['id'],
                    'album_name': item['album']['name'],
                    'album_id': item['album']['id'],
                    'release_date': item['album']['release_date'],
                    'popularity': item['popularity'],
                    'explicit': item['explicit']
                }
                
                # Get audio features for track
                audio_features = sp.audio_features(item['id'])[0]
                audio_info = {
                    'danceability': audio_features.get('danceability', float('nan')),
                    'duration_ms': audio_features.get('duration_ms', float('nan')),
                    'energy': audio_features.get('energy', float('nan')),
                    'key': audio_features.get('key', float('nan')),
                    'loudness': audio_features.get('loudness', float('nan')),
                    'mode': audio_features.get('mode', float('nan')),
                    'speechiness': audio_features.get('speechiness', float('nan')),
                    'acousticness': audio_features.get('acousticness', float('nan')),
                    'instrumentalness': audio_features.get('instrumentalness', float('nan')),
                    'liveness': audio_features.get('liveness', float('nan')),
                    'valence': audio_features.get('valence', float('nan')),
                    'tempo': audio_features.get('tempo', float('nan')),
                    'time_signature': audio_features.get('time_signature', float('nan'))
                }
                track_info.update(audio_info)

                # Get featured artists
                if len(item['artists']) > 1:
                    featured_artists = [artist['name'] for artist in item['artists'][1:]]
                    track_info['featured_artists'] = featured_artists
                else:
                    track_info['featured_artists'] = []

                data.append(track_info)

        except requests.exceptions.ReadTimeout as e:
            print(f"Timeout error: {e}. Retrying in 5 seconds...")
            time.sleep(5)  # Retry after a short delay
        except requests.exceptions.RequestException as e:
            print(f"API error: {e}")
            break  # Break the loop if there's a persistent error

    df_raw = pd.DataFrame(data)
    end_time = timer()
    elapsed_time = int(end_time - start_time)
    print(f"Elapsed time: {elapsed_time} seconds")
    return df_raw

# Collecting data from Spotify API 
df_spotify = collect_spotify_data()


Timeout error: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5). Retrying in 5 seconds...
Timeout error: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5). Retrying in 5 seconds...
Elapsed time: 409 seconds


Unnamed: 0,artist_name,track_name,track_id,album_name,album_id,release_date,popularity,explicit,danceability,duration_ms,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,featured_artists
0,"Tyler, The Creator",IFHY (feat. Pharrell),0NjW4SKY3gbfl2orl1p8hr,Wolf,40QTqOBBxCEIQlLNdSjFQB,2013-04-01,72,True,0.358,319253,...,-6.181,1,0.3310,0.1970,0.000000,0.5810,0.275,85.478,4,[Pharrell Williams]
1,"Tyler, The Creator",She,0SA0mMXWh23raZ6xzOCU2V,Goblin,5dbCm8qaCdWnNdVyi3urUO,2011-05-09,77,True,0.350,253387,...,-6.495,0,0.2330,0.2560,0.000034,0.6130,0.459,83.375,4,[Frank Ocean]
2,"Tyler, The Creator",48,2wK7y55bzry7tUDmDqYaz7,Wolf,40QTqOBBxCEIQlLNdSjFQB,2013-04-01,59,True,0.558,247293,...,-8.083,1,0.2650,0.1320,0.000000,0.8900,0.537,119.910,4,[]
3,"Tyler, The Creator",NEW MAGIC WAND,0fv2KH6hac06J86hBUTcSf,IGOR,5zi7WsKlIiUXv09tbGLKsE,2019-05-17,81,True,0.621,195320,...,-5.414,0,0.1070,0.0967,0.000131,0.6730,0.464,139.566,4,[]
4,"Tyler, The Creator",Tamale,60b6B2ULC85t4Y5IjZvP7A,Wolf,40QTqOBBxCEIQlLNdSjFQB,2013-04-01,63,True,0.742,166347,...,-7.207,1,0.0977,0.0557,0.000242,0.2720,0.967,129.986,4,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,TSR,22,02wm4TmFLqHGLyvZpde46h,22,1qGF9oMd4E67uixps7UIG8,2022-08-15,1,False,0.747,120394,...,-9.138,1,0.2670,0.8050,0.000252,0.0593,0.388,146.062,4,[]
754,poku6k,wtf!,38N6POWZEMSjxVn3exezGC,6KWORLD,2cFh16ZEZzZFhqgWeKWNC4,2023-10-27,1,True,0.889,105600,...,-9.077,0,0.4510,0.1010,0.000000,0.3980,0.953,149.949,4,[]
755,Zeno,3:23,1uFKCCYNOP3pJpHgetq71t,3:23,4der0uoPII0nrbT8mQuaty,2023-07-14,7,True,0.607,179168,...,-6.369,1,0.0416,0.2930,0.000000,0.1450,0.525,119.973,4,[]
756,257,Thc,7EPwt0BD5WqLQQ7WZHDIOg,All,73n3SryK1UFwJINDrmSsba,2018-12-27,2,False,0.608,164284,...,-13.235,1,0.0292,0.5870,0.259000,0.1410,0.487,130.023,4,[]
