### Objective: Enrich the existing dataset using Spotify API
### Dataset: Spotify dataset

*Importing Libraries*

In [None]:

import pandas as pd
import time
import base64
import requests

Dataset loaded: 8330 tracks


*Function to get the access token using the client id and client secret*

In [None]:

def get_access_token(client_id, client_secret):
    auth_url = "https://accounts.spotify.com/api/token"
    auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode("ascii")
    headers = {"Authorization": f"Basic {auth_header}"}
    data = {"grant_type": "client_credentials"}

    response = requests.post(auth_url, headers=headers, data=data)
    response.raise_for_status()
    return response.json()["access_token"]

client_id='1e2af1b14f984f8983377350f9634bfc'
client_secret = "2b8b681956024444ef02d8df779fc26p"


# get access token using client id and client secret
access_token = get_access_token(client_id, client_secret)
headers = {"Authorization": f"Bearer {access_token}"}


*API call for data*

In [None]:
def enrich_track_metadata(track_id):
    """Fetch only track, artist, and album metadata (skip audio features)."""
    data = {}

    # Track info
    t_res = requests.get(f"https://api.spotify.com/v1/tracks/{track_id}", headers=headers)
    track_info = t_res.json()

    # Artist info
    artist_id = track_info.get("artists", [{}])[0].get("id")
    ar_res = requests.get(f"https://api.spotify.com/v1/artists/{artist_id}", headers=headers)
    artist_info = ar_res.json() if ar_res.status_code == 200 else {}

    # Album info
    album_id = track_info.get("album", {}).get("id")
    al_res = requests.get(f"https://api.spotify.com/v1/albums/{album_id}", headers=headers)
    album_info = al_res.json() if al_res.status_code == 200 else {}

    # Collect only new metadata
    data.update({
        # Artist metadata
        "artist_followers": artist_info.get("followers", {}).get("total"),
        "artist_popularity": artist_info.get("popularity"),

        # Album metadata
        "album_type": album_info.get("album_type"),
        "label": album_info.get("label"),
    })

    print("data",data)
    return data


*Calling Api function*

In [None]:

metadata_cols = [ "artist_followers",
    "artist_popularity","album_type","label"
]

save_path = "/content/drive/MyDrive/Spotify_analytics/spotify_final.csv"

# Load your dataset (original or partially enriched)
df = pd.read_csv(save_path)

# Ensure new columns exist
for col in metadata_cols:
    if col not in df.columns:
        df[col] = None

# Loop through rows and enrich only missing metadata
for i, row in df.iterrows():
    track_id = row['track_id']

    # Skip rows already enriched
    if all(pd.notna(row[col]) for col in metadata_cols):
        continue

    print(f"Fetching {track_id}...")

    try:
        new_data = enrich_track_metadata(track_id)

        for col in metadata_cols:
            # Only fill if missing
            if pd.isna(row[col]) and new_data.get(col) is not None:
                df.at[i, col] = new_data[col]

        # Save progress every 10 rows
        if (i + 1) % 10 == 0:
            df.to_csv(save_path, index=False)
            print(f"Progress saved at row {i+1}")

    except Exception as e:
        print(f" Error for track {track_id}: {e}")

# --- Final save ---
df.to_csv(save_path, index=False)
print(" Dataset enriched successfully (audio features untouched).")


Fetching 6yIzxXqgJ4fkTI0HJlPAPQ...
data {'artist_followers': 2766239, 'artist_popularity': 0, 'album_type': 'single', 'label': ''}
Progress saved at row 530
Fetching 1bLz3Mcy1pf1W04KZBWrHk...
data {'artist_followers': 148872, 'artist_popularity': 44, 'album_type': 'single', 'label': 'Columbia'}
Fetching 09eSdS5RTgyodJt3krr5AC...
data {'artist_followers': 955488, 'artist_popularity': 73, 'album_type': 'album', 'label': 'Universal Records'}
Fetching 7rBMZvgeWnOTHWUh3Pvw51...
data {'artist_followers': 378579, 'artist_popularity': 45, 'album_type': 'album', 'label': 'Island Mercury'}
Fetching 4ZpmGpw5L8kG0VNr3Zhgji...
data {'artist_followers': 2034885, 'artist_popularity': 66, 'album_type': 'album', 'label': 'Fair Trade/Columbia'}
Progress saved at row 5150
Fetching 165lX6DbMhTzqODVZKLmkm...
data {'artist_followers': 1699729, 'artist_popularity': 69, 'album_type': 'album', 'label': 'Roadrunner Records'}
Fetching 1yFz5j1B9TqIVwblu6QSUi...
data {'artist_followers': 6895260, 'artist_popularit