## Importing the libraries

In [None]:
import os
import pandas as pd
import numpy as np
import json
import spotipy
import spotipy.oauth2 as oauth2
from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials
import re
from tqdm import tqdm
import multiprocessing as mp
import time
import random
import datetime

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv("spotify_secrets.env")
auth_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIPY_CLIENT_ID'),
                                        client_secret=os.getenv('SPOTIPY_CLIENT_SECRET'))
sp = spotipy.client.Spotify(auth_manager=auth_manager)

In [4]:
# Replace 'path_to_csv.csv' with the path to your actual CSV file
csv_path = 'V1.csv'

# Load the CSV data into a DataFrame
df = pd.read_csv(csv_path)

In [5]:
df.columns

Index(['track_uri', 'artist_uri', 'album_uri'], dtype='object')

In [None]:
t_uri=df["track_uri"].unique()
a_uri=df["artist_uri"].unique()

# Feature extraction

Using the Spotify API for Feature Extraction and Saving Results to a CSV File and Errors to a Log File

I was using SP.track first, but I realised that it would take a lot of time and I would have to counter a lot of Api rate limits, so I used SP.tracks and SP.artists instead. They accept lists with a 50-URI maximum and handle them in a single request, so it took a lot less time.

In [7]:
# Function to find the last logged URI in the CSV
def find_last_logged_uri(file_path):
    try:
        df = pd.read_csv(file_path)
        last_uri = df.iloc[-1, 0]  # Assuming the URI is in the first column
        return last_uri
    except Exception as e:
        print(f"Error reading the file: {e}")
        return None

# Find the last URI and determine the starting index
last_uri = find_last_logged_uri('data/audio_features.csv')
start_index = 0
if last_uri and last_uri in t_uri:
    index_array = np.where(a_uri == last_uri)[0]
    if index_array.size > 0:
        start_index = index_array[0] + 1

In [None]:
# Constants
REQUESTS_PER_MINUTE = 180
SECONDS_PER_MINUTE = 60
DELAY = SECONDS_PER_MINUTE / REQUESTS_PER_MINUTE  # Calculate delay to fit the rate limit

f = open('data/audio_features.csv', 'a')
e = 0

for i in tqdm(range(start_index, len(t_uri), 100)):
    try:
        time.sleep(DELAY)
        track_feature = sp.audio_features(t_uri[i:i+100])
        track_df = pd.DataFrame(track_feature)
        csv_data = track_df.to_csv(header=False,index=False)
        f.write(csv_data)
    except Exception as error:
        e+=1
        r = open("audio_features_log.txt", "a")
        r.write(datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")+": "+str(error)+'\n')
        r.close()
        time.sleep(3)
        continue

# Logging the final count of errors
r = open("audio_features.txt", "a")
r.write(datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") + " _________________________ " + "Total Number Of Errors : " + str(e) + " _________________________ " + '\n')
r.close()

# Closing the file
f.close()

# Ignore

In [None]:
f = open('data/audio_features.csv','a')
e=0
for i in tqdm(range(0,len(t_uri),100)):
    try:
     track_feature = sp.audio_features(t_uri[i:i+100])
     track_df = pd.DataFrame(track_feature)
     csv_data = track_df.to_csv(header=False,index=False)
     f.write(csv_data)
    except Exception as error:
        e+=1
        r = open("audio_features_log.txt", "a")
        r.write(datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")+": "+str(error)+'\n')
        r.close()
        time.sleep(10)
        continue
r = open("audio_features_log.txt", "a")
r.write(datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")+" _________________________ "+"Total Number Of Errors : "+str(e)+" _________________________ "+'\n')
r.close()
f.close()

  0%|          | 0/22623 [00:00<?, ?it/s]

  9%|▉         | 2002/22623 [08:20<2:18:09,  2.49it/s] Max Retries reached
  9%|▉         | 2003/22623 [08:35<27:52:24,  4.87s/it]Max Retries reached
  9%|▉         | 2004/22623 [08:47<40:11:52,  7.02s/it]Max Retries reached
  9%|▉         | 2005/22623 [08:59<48:46:23,  8.52s/it]Max Retries reached
  9%|▉         | 2006/22623 [09:11<54:54:48,  9.59s/it]Max Retries reached
  9%|▉         | 2007/22623 [09:23<59:06:13, 10.32s/it]Max Retries reached
  9%|▉         | 2008/22623 [09:35<62:02:00, 10.83s/it]Max Retries reached
  9%|▉         | 2009/22623 [09:47<64:05:33, 11.19s/it]Max Retries reached
  9%|▉         | 2009/22623 [09:59<1:42:35,  3.35it/s] 


KeyboardInterrupt: 