In [2]:
pip install requests -q

Note: you may need to restart the kernel to use updated packages.


## Libraries

In [2]:
import requests
import pandas as pd
import numpy as np
import base64
import json
import time
import seaborn as sns
import matplotlib.pyplot as plt

## Randomly Sample Spotify API

In [4]:
# Define your credentials and base URL
CLIENT_ID = '' # Removed CLIENT_ID for data privacy
CLIENT_SECRET = '' # Removed CLIENT_SECRET for data privacy
BASE_URL = "https://api.spotify.com/v1"

# Function to get the access token
def get_access_token(client_id, client_secret):
    auth_url = "https://accounts.spotify.com/api/token"
    auth_header = base64.b64encode((client_id + ':' + client_secret).encode('ascii')).decode('ascii')
    auth_data = {
        'grant_type': 'client_credentials'
    }
    headers = {
        'Authorization': 'Basic ' + auth_header
    }
    response = requests.post(auth_url, data=auth_data, headers=headers)
    response_data = response.json()
    if response.status_code != 200:
        raise Exception(f"Error obtaining access token: {response_data}")
    return response_data['access_token']

# Function to get tracks from a genre
def get_pop_tracks(access_token, limit=50, offset=0):
    search_url = BASE_URL + "/search"
    headers = {
        'Authorization': 'Bearer ' + access_token
    }
    params = {
        'q': 'genre:pop',
        'type': 'track',
        'limit': limit,
        'offset': offset
    }
    response = requests.get(search_url, headers=headers, params=params)
    if response.status_code != 200:
        print(f"Error fetching tracks: {response.json()}")
        return []
    return response.json().get('tracks', {}).get('items', [])

# Function to get audio features for a list of track IDs
def get_audio_features(access_token, track_ids):
    features_url = BASE_URL + "/audio-features"
    headers = {
        'Authorization': 'Bearer ' + access_token
    }
    audio_features = []
    for i in range(0, len(track_ids), 100):
        params = {
            'ids': ','.join(track_ids[i:i + 100])
        }
        response = requests.get(features_url, headers=headers, params=params)
        if response.status_code != 200:
            print(f"Error fetching audio features: {response.json()}")
            continue
        audio_features.extend(response.json().get('audio_features', []))
        time.sleep(1)  
    return audio_features

# Main script
try:
    access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)
except Exception as e:
    print(e)
    exit()

# Collect 2000 pop tracks
all_tracks = []
total_tracks = 1500
limit = 50

for i in range(total_tracks // limit):
    tracks_data = get_pop_tracks(access_token, limit=limit, offset=i * limit)
    all_tracks.extend(tracks_data)
    time.sleep(1) 

# Extract track details (ID, popularity, release date, explicit)
track_details = [{'id': track['id'], 'popularity': track['popularity'], 'release_date': track['album']['release_date'], 'explicit': track['explicit']} for track in all_tracks]

# Get audio features for these tracks
track_ids = [track['id'] for track in all_tracks]
audio_features = get_audio_features(access_token, track_ids)

# Convert the audio features to a pandas DataFrame
df_audio_features = pd.DataFrame(audio_features)

# Convert the track details to a DataFrame
df_track_details = pd.DataFrame(track_details)

# Merge the audio features with track details
df_combined = pd.merge(df_audio_features, df_track_details, on='id')

# Display the DataFrame
print(df_combined)

Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
Error fetching tracks: {'error': {'status': 400, 'message': 'Bad request.'}}
      danceability  energy  key  loudness  mode  speechiness  acousticness  \
0            0.612   0.520    7    -6.192     1       0.0353      0.173000   
1            0.643   0.802    9    -5.066     1       0.1450      0.375000

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,popularity,release_date,explicit
0,0.612,0.520,7,-6.192,1,0.0353,0.173000,0.000000,0.0989,0.3610,...,audio_features,7wVp5yvEsSSgsTTUwEhgVe,spotify:track:7wVp5yvEsSSgsTTUwEhgVe,https://api.spotify.com/v1/tracks/7wVp5yvEsSSg...,https://api.spotify.com/v1/audio-analysis/7wVp...,190869,3,0,1983-01-01,False
1,0.643,0.802,9,-5.066,1,0.1450,0.375000,0.000000,0.3280,0.7290,...,audio_features,55t3Y0lsDuhlpNoeYKyrHy,spotify:track:55t3Y0lsDuhlpNoeYKyrHy,https://api.spotify.com/v1/tracks/55t3Y0lsDuhl...,https://api.spotify.com/v1/audio-analysis/55t3...,143733,4,0,1997-06-04,False
2,0.773,0.430,9,-12.956,1,0.0329,0.636000,0.024300,0.0910,0.3970,...,audio_features,5tt5OEdYdgWGZMSk2yfLXX,spotify:track:5tt5OEdYdgWGZMSk2yfLXX,https://api.spotify.com/v1/tracks/5tt5OEdYdgWG...,https://api.spotify.com/v1/audio-analysis/5tt5...,220333,4,0,2009-01-01,False
3,0.570,0.783,0,-8.428,1,0.0410,0.677000,0.000069,0.2580,0.9390,...,audio_features,3Px3kUyygoFleBPFfD5Wci,spotify:track:3Px3kUyygoFleBPFfD5Wci,https://api.spotify.com/v1/tracks/3Px3kUyygoFl...,https://api.spotify.com/v1/audio-analysis/3Px3...,238947,4,0,2018-12-18,False
4,0.231,0.458,3,-10.334,1,0.0377,0.680000,0.000000,0.0931,0.0949,...,audio_features,1hJ0BhY7SIE52GDZEX8Wu3,spotify:track:1hJ0BhY7SIE52GDZEX8Wu3,https://api.spotify.com/v1/tracks/1hJ0BhY7SIE5...,https://api.spotify.com/v1/audio-analysis/1hJ0...,194867,3,0,2014-03-07,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1299,0.218,0.857,2,-7.005,1,0.0665,0.000323,0.000000,0.1010,0.6590,...,audio_features,5WuajYHwpf9fRO6EmENAA6,spotify:track:5WuajYHwpf9fRO6EmENAA6,https://api.spotify.com/v1/tracks/5WuajYHwpf9f...,https://api.spotify.com/v1/audio-analysis/5Wua...,129413,4,3,2010-07-05,True
1300,0.655,0.779,1,-4.733,0,0.0586,0.631000,0.000000,0.1110,0.8510,...,audio_features,14BHU7qhjAqg1rQ0SX284d,spotify:track:14BHU7qhjAqg1rQ0SX284d,https://api.spotify.com/v1/tracks/14BHU7qhjAqg...,https://api.spotify.com/v1/audio-analysis/14BH...,288491,3,40,2020-09-30,False
1301,0.725,0.649,10,-5.083,0,0.0532,0.189000,0.000002,0.1210,0.2060,...,audio_features,4yt3C4Ls2QWZV6HzgNdaKJ,spotify:track:4yt3C4Ls2QWZV6HzgNdaKJ,https://api.spotify.com/v1/tracks/4yt3C4Ls2QWZ...,https://api.spotify.com/v1/audio-analysis/4yt3...,190840,4,3,2023-04-06,False
1302,0.779,0.729,4,-4.323,0,0.2410,0.179000,0.000000,0.1430,0.8990,...,audio_features,5FenfCHU8rxxaMO6JMBfjq,spotify:track:5FenfCHU8rxxaMO6JMBfjq,https://api.spotify.com/v1/tracks/5FenfCHU8rxx...,https://api.spotify.com/v1/audio-analysis/5Fen...,208045,4,13,2020-03-06,True


In [5]:
df_combined.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,popularity,release_date,explicit
0,0.612,0.52,7,-6.192,1,0.0353,0.173,0.0,0.0989,0.361,...,audio_features,7wVp5yvEsSSgsTTUwEhgVe,spotify:track:7wVp5yvEsSSgsTTUwEhgVe,https://api.spotify.com/v1/tracks/7wVp5yvEsSSg...,https://api.spotify.com/v1/audio-analysis/7wVp...,190869,3,0,1983-01-01,False
1,0.643,0.802,9,-5.066,1,0.145,0.375,0.0,0.328,0.729,...,audio_features,55t3Y0lsDuhlpNoeYKyrHy,spotify:track:55t3Y0lsDuhlpNoeYKyrHy,https://api.spotify.com/v1/tracks/55t3Y0lsDuhl...,https://api.spotify.com/v1/audio-analysis/55t3...,143733,4,0,1997-06-04,False
2,0.773,0.43,9,-12.956,1,0.0329,0.636,0.0243,0.091,0.397,...,audio_features,5tt5OEdYdgWGZMSk2yfLXX,spotify:track:5tt5OEdYdgWGZMSk2yfLXX,https://api.spotify.com/v1/tracks/5tt5OEdYdgWG...,https://api.spotify.com/v1/audio-analysis/5tt5...,220333,4,0,2009-01-01,False
3,0.57,0.783,0,-8.428,1,0.041,0.677,6.9e-05,0.258,0.939,...,audio_features,3Px3kUyygoFleBPFfD5Wci,spotify:track:3Px3kUyygoFleBPFfD5Wci,https://api.spotify.com/v1/tracks/3Px3kUyygoFl...,https://api.spotify.com/v1/audio-analysis/3Px3...,238947,4,0,2018-12-18,False
4,0.231,0.458,3,-10.334,1,0.0377,0.68,0.0,0.0931,0.0949,...,audio_features,1hJ0BhY7SIE52GDZEX8Wu3,spotify:track:1hJ0BhY7SIE52GDZEX8Wu3,https://api.spotify.com/v1/tracks/1hJ0BhY7SIE5...,https://api.spotify.com/v1/audio-analysis/1hJ0...,194867,3,0,2014-03-07,False


## Import Kaggle Dataset to increase sample

In [7]:
# Import Kaggle Spotify dataset
kaggle_df = pd.read_csv('dataset.csv', index_col=0)

In [8]:
# Subset Kaggle df to pop genre
kaggle_pop_df = kaggle_df[kaggle_df['track_genre'] == 'pop']

In [9]:
kaggle_pop_df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
81000,0VjIjW4GlUZAMYd2vXMi3b,The Weeknd,After Hours,Blinding Lights,91,200040,False,0.514,0.73,1,-5.934,1,0.0598,0.00146,9.5e-05,0.0897,0.334,171.005,4,pop
81001,3yV2c3cYhkAwSsxAq6nRiv,Gajendra Verma,Table No. 21 (Original Motion Picture Soundtrack),Mann Mera,74,200120,False,0.535,0.765,1,-7.862,0,0.0444,0.054,0.0,0.0921,0.373,191.827,4,pop
81002,1aL9518P5G72N92b48tuKw,AP Dhillon,Summer High,Summer High,83,177391,False,0.86,0.541,1,-6.51,0,0.0325,0.165,0.163,0.0663,0.952,114.984,4,pop
81003,5IgjP7X4th6nMNDh4akUHb,Chris Brown,Indigo (Extended),Under The Influence,96,184613,True,0.733,0.69,9,-5.529,0,0.0427,0.0635,1e-06,0.105,0.31,116.992,4,pop
81004,7MXVkk9YMctZqd1Srtv4MB,The Weeknd;Daft Punk,Starboy,Starboy,90,230453,True,0.679,0.587,7,-7.015,1,0.276,0.141,6e-06,0.137,0.486,186.003,4,pop


## Function to add 'release_date' column in Kaggle data through API

In [11]:
# Function to get track details from the Spotify API
def get_track_details(access_token, track_id):
    track_url = f"{BASE_URL}/tracks/{track_id}"
    headers = {
        'Authorization': 'Bearer ' + access_token
    }
    response = requests.get(track_url, headers=headers)
    if response.status_code != 200:
        print(f"Error fetching track details for {track_id}: {response.json()}")
        return None
    return response.json()

# Main script
try:
    access_token = get_access_token(CLIENT_ID, CLIENT_SECRET)
except Exception as e:
    print(e)
    exit()


# Add a 'release_date' column to the DataFrame
kaggle_pop_df['release_date'] = None

# Fetch release date for each track and update the DataFrame
for index, row in kaggle_pop_df.iterrows():
    track_id = row['track_id']
    track_details = get_track_details(access_token, track_id)
    if track_details:
        release_date = track_details['album']['release_date']
        kaggle_pop_df.loc[index, 'release_date'] = release_date
    time.sleep(1)  # To avoid hitting rate limits

# Display the updated DataFrame
print(kaggle_pop_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kaggle_pop_df['release_date'] = None


                     track_id                                    artists  \
81000  0VjIjW4GlUZAMYd2vXMi3b                                 The Weeknd   
81001  3yV2c3cYhkAwSsxAq6nRiv                             Gajendra Verma   
81002  1aL9518P5G72N92b48tuKw                                 AP Dhillon   
81003  5IgjP7X4th6nMNDh4akUHb                                Chris Brown   
81004  7MXVkk9YMctZqd1Srtv4MB                       The Weeknd;Daft Punk   
...                       ...                                        ...   
81995  6mmGbsCqf5e8vxv1WodYIG                              G. V. Prakash   
81996  2WO5nzB7QtKn9ZRc9Jkalt            Harris Jayaraj;Sudha Ragunathan   
81997  5OCFWPgrCCNBukB3YrDD90                                 Raj Barman   
81998  13wIQbwSuQ4YFvDvtQgSVc  Harris Jayaraj;Unnikrishnan;Chinmayi;Mega   
81999  62yaHPUl2vELHwaHb6R5oW                         Yuvan Shankar Raja   

                                              album_name           track_name  \
81000 

In [13]:
# Identify common columns
common_columns = df_combined.columns.intersection(kaggle_pop_df.columns)

# Filter DataFrames to keep only common columns
df_combined_filtered = df_combined[common_columns]
kaggle_pop_df_filtered = kaggle_pop_df[common_columns]

# Concatenate DataFrames
api_df = pd.concat([df_combined_filtered, kaggle_pop_df_filtered], ignore_index=True)

# Display the updated DataFrame
print(api_df)

      danceability  energy  key  loudness  mode  speechiness  acousticness  \
0            0.612   0.520    7    -6.192     1       0.0353         0.173   
1            0.643   0.802    9    -5.066     1       0.1450         0.375   
2            0.773   0.430    9   -12.956     1       0.0329         0.636   
3            0.570   0.783    0    -8.428     1       0.0410         0.677   
4            0.231   0.458    3   -10.334     1       0.0377         0.680   
...            ...     ...  ...       ...   ...          ...           ...   
2299         0.448   0.607    9    -4.606     0       0.0611         0.791   
2300         0.773   0.436    4   -10.972     0       0.0321         0.672   
2301         0.639   0.368    2   -14.096     0       0.0480         0.245   
2302         0.718   0.361    5    -9.740     0       0.0298         0.565   
2303         0.712   0.422    6   -10.688     1       0.0460         0.307   

      instrumentalness  liveness  valence    tempo  duration_ms

In [20]:
# Check for missing values
api_df.isnull().sum()

danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
duration_ms         0
time_signature      0
popularity          0
release_date        0
explicit            0
dtype: int64

In [134]:
# Export api_df to prevent re-run of Spotify API
api_df.to_csv('api_df.csv')

In [158]:
# Import api_df
df = pd.read_csv('api_df.csv', index_col=0)

## Standardizing Numerical Features 

In [160]:
# Preprocessing steps
df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
df['release_year'] = df['release_date'].dt.year
df['duration_ms'] = df['duration_ms'] / 1000
df.rename(columns={'duration_ms': 'duration_s'}, inplace=True)
df['virality'] = (df['popularity'] > 80).astype(int)
df['explicit'] = df['explicit'].astype(int)
df.dropna(subset='release_date', inplace=True)

# Define numerical feature for standardization
numerical_features = ['popularity', 'duration_s', 'danceability', 'energy', 'valence', 'tempo', 'loudness', 'liveness', 'instrumentalness', 'acousticness']

# Standardization
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

In [161]:
df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_s,time_signature,popularity,release_date,explicit,release_year,virality
0,0.109892,-0.399148,7,0.447035,1,0.0353,-0.626147,-0.225815,-0.612151,-0.688519,0.000427,-0.450347,3,-0.787262,1983-01-01,0,1983.0,0
1,0.312524,0.961265,9,0.782909,1,0.145,0.040012,-0.225815,0.775918,0.880122,0.518389,-1.035873,4,-0.787262,1997-06-04,0,1997.0,0
2,1.162273,-0.833323,9,-1.570594,1,0.0329,0.900743,-0.026841,-0.660016,-0.535065,-0.658211,-0.084343,4,-0.787262,2009-01-01,0,2009.0,0
3,-0.164642,0.869606,0,-0.21994,1,0.041,1.035954,-0.225252,0.351803,1.775271,-1.488966,0.146881,4,-0.787262,2018-12-18,0,2018.0,0
4,-2.380525,-0.698246,3,-0.78848,1,0.0377,1.045847,-0.225815,-0.647292,-1.8228,-1.185859,-0.400683,3,-0.787262,2014-03-07,0,2014.0,0


In [162]:
# Check for missing values
df.isnull().sum()

danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
duration_s          0
time_signature      0
popularity          0
release_date        0
explicit            0
release_year        0
virality            0
dtype: int64

In [163]:
# Export preprocessed_df
df.to_csv('preprocessed_df.csv')

## Additional API to increase number of 'Viral' Tracks

In [13]:
# API to add more 'Viral' tracks to handle imbalance classes for Supervised Learning
CLIENT_ID = '' # Removed CLIENT_ID for data privacy
CLIENT_SECRET = '' # Removed CLIENT_SECRET for data privacy
BASE_URL = "https://api.spotify.com/v1"

# Retrieves an authentication token from Spotify
def get_token(client_id, client_secret):
    auth_url = "https://accounts.spotify.com/api/token"
    auth_string = f"{client_id}:{client_secret}"
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = base64.b64encode(auth_bytes).decode("utf-8")

    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }
    payload = {"grant_type": "client_credentials"}

    response = requests.post(auth_url, headers=headers, data=payload)
    json_response = response.json()
    return json_response.get("access_token")

# Retrieves the Spotify ID of an artist by name
def get_artist_id(token, artist_name):
    url = f"{BASE_URL}/search"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "q": artist_name,
        "type": "artist",
        "limit": 1
    }
    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    items = data['artists']['items']
    if not items:
        return None
    return items[0]['id']

# Fetches the top tracks of an artist by Spotify ID
def get_tracks(token, artist_id):
    url = f"{BASE_URL}/artists/{artist_id}/top-tracks"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"country": "US"}
    response = requests.get(url, headers=headers, params=params)
    return response.json()['tracks']

# Retrieves audio features for a list of track IDs
def get_audio_features(token, track_ids):
    url = f"{BASE_URL}/audio-features"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"ids": ",".join(track_ids)}
    response = requests.get(url, headers=headers, params=params)
    return response.json()['audio_features']

# Collects detailed information and audio features for all tracks of a given artist
def get_artist_tracks_and_features(token, artist_name):
    artist_id = get_artist_id(token, artist_name)
    if not artist_id:
        return pd.DataFrame() 

    tracks = get_tracks(token, artist_id)
    track_ids = [track['id'] for track in tracks]
    features = get_audio_features(token, track_ids)
    
    track_details = []
    for track, feature in zip(tracks, features):
        stream_count = track.get('stream_count', 'Data not available')  
        track_details.append({
            'Artist': artist_name, 
            'Name': track['name'],
            'Album': track['album']['name'],
            'Release Date': track['album']['release_date'],
            'Popularity': track['popularity'],
            'Duration (ms)': track['duration_ms'],
            'Explicit': track['explicit'],
            'Spotify URL': track['external_urls']['spotify'],
            'Streams': stream_count,  
            'Danceability': feature['danceability'],
            'Energy': feature['energy'],
            'Key': feature['key'],
            'Mode': feature['mode'],
            'Valence': feature['valence'],
            'Tempo': feature['tempo'],
            'Loudness': feature['loudness'],
            'Liveness': feature['liveness'],
            'Instrumentalness': feature['instrumentalness'],
            'Acousticness': feature['acousticness']
        })
    
    return pd.DataFrame(track_details)

# Aggregates track and feature data across multiple artists
def get_multiple_artists_tracks_and_features(token, artist_list):
    dfs = []
    for artist in artist_list:
        df = get_artist_tracks_and_features(token, artist)
        if not df.empty:
            dfs.append(df)
    if dfs:
        return pd.concat(dfs, ignore_index=True)
    else:
        return pd.DataFrame()

In [None]:
# Function to get top artists for a given genre
def get_top_artists_by_genre(client_id, client_secret, genre):
    token = get_token(client_id, client_secret)
    url = "https://api.spotify.com/v1/search"
    headers = {"Authorization": f"Bearer {token}"}
    params = {
        "q": f"genre:{genre}",
        "type": "artist",
        "limit": 50
    }

    response = requests.get(url, headers=headers, params=params)
    data = response.json()
    
    artist_details = []
    for artist in data['artists']['items']:
        monthly_listeners = artist.get('followers', {}).get('total', 'Data not available')
        artist_details.append({
            'Name': artist['name'],
            'Monthly Listeners': monthly_listeners 
        })

    return pd.DataFrame(artist_details), token

genre = 'pop' 
top_artists_df, token = get_top_artists_by_genre(CLIENT_ID, CLIENT_SECRET, genre)

artist_list = top_artists_df['Name'].tolist()
all_tracks_and_features = get_multiple_artists_tracks_and_features(token, artist_list)

In [None]:
# Loop through the top 10 artists and collect their tracks and features
all_artists_tracks = pd.DataFrame()

for index, row in top_artists_df.iterrows():
    artist_name = row['Name']
    artist_tracks = get_artist_tracks_and_features(artist_name)
    all_artists_tracks = pd.concat([all_artists_tracks, artist_tracks], ignore_index=True)

In [None]:
# Loop through the DataFrame and update the popularity
def update_popularity(df):
    token = get_token(CLIENT_ID, CLIENT_SECRET)
    popularities = []
    
    for index, row in df.iterrows():
        track_name = row['Name']
        artist_name = row['Artist']
        track = search_track(token, track_name, artist_name)
        
        if track:
            popularities.append(track['popularity'])
        else:
            popularities.append('Unknown Popularity')
        
        time.sleep(0.1)

    df['Popularity'] = popularities
    return df

# Update the popularity
updated_df = update_popularity(all_artists_tracks)

In [None]:
artists_df = updated_df
artists_df.to_csv('clean_artists_df.csv')

In [25]:
# Import saved csv file
clean_artists_df = pd.read_csv('clean_artists_df.csv', index_col=0)

In [26]:
clean_artists_df.head()

Unnamed: 0,Artist,Name,Album,Release Date,Popularity,Duration (s),Explicit,Danceability,Energy,Key,Mode,Valence,Tempo,Loudness,Liveness,Instrumentalness,Acousticness,Virality
0,Taylor Swift,Fortnight (feat. Post Malone),THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY,2024-04-19,95.0,228.965,0,0.504,0.386,11,1,0.281,192.004,-10.976,0.0961,1.5e-05,0.502,1
1,Taylor Swift,The Tortured Poets Department,THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY,2024-04-19,88.0,293.048,1,0.604,0.428,0,1,0.292,110.259,-8.441,0.126,0.0,0.0483,1
2,Taylor Swift,My Boy Only Breaks His Favorite Toys,THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY,2024-04-19,88.0,203.801,0,0.596,0.563,0,1,0.481,97.073,-7.362,0.302,0.0,0.137,1
3,Taylor Swift,Down Bad,THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY,2024-04-19,90.0,261.228,1,0.541,0.366,11,1,0.168,159.707,-10.412,0.0946,1e-06,0.56,1
4,Taylor Swift,"So Long, London",THE TORTURED POETS DEPARTMENT: THE ANTHOLOGY,2024-04-19,89.0,262.974,0,0.423,0.533,9,1,0.248,160.218,-11.388,0.0816,0.00264,0.73,1
