In [11]:
import pandas as pd
import requests
import json
from pandas import json_normalize
import logging
import time

logging.basicConfig(filename='data_retrieval.log', level=logging.INFO)


In [12]:
sid = '09dceb980747489297e7ea0a51077a66'
skey = 'f3543385c18c48cd98018f4cc39bf12a'
baseURL = 'https://api.spotify.com'


### Endpoints

In [13]:
def request_access_token(cid = sid, skey = skey ):
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": cid,
        "client_secret": skey
    }

    response = requests.post(url, headers=headers, data=data)
    if response.status_code == 200:
        return response.json().get('access_token')
    else:
        print(f"Failed to obtain token, status code: {response.status_code}")
        return None

def fetch_tracks_info(track_ids):
    ids = ','.join(track_ids)
    url = f"https://api.spotify.com/v1/tracks?ids={ids}"
    
    try:
        response = requests.get(url, headers=headers)
        return response
    except requests.RequestException as e:
        # Log the error or handle it as needed
        print(f"Request failed: {e}")
        # Consider raising an exception here to indicate the failure to the caller
        raise

def fetch_tracks_audiofeatures(track_ids):
    ids = ','.join(track_ids)
    url = f"https://api.spotify.com/v1/audio-features?ids={ids}"
    
    try:
        response = requests.get(url, headers=headers)
        return response
    except requests.RequestException as e:
        print(f"Request failed: {e}", response)
        raise


### Endpoint utils

In [14]:
def new_session_headers():
    headers = {
        "Authorization": f'Bearer {request_access_token()}',
    }

    return headers

In [15]:
headers = new_session_headers()

### Core data

In [16]:
all_files = [f"./data_2022/MyData_streaming_hist/endsong_{i}.json" for i in range(12)]
df_list = [pd.read_json(file) for file in all_files]
df = pd.concat(df_list, ignore_index=True)
df.ts = pd.to_datetime(df.ts)

non_tracks = df.loc[df.spotify_track_uri.str.contains('track') != True].index
df.drop(index=non_tracks, inplace=True)

df = df.sort_values(by='ts').reset_index(drop = True)

df = df[['spotify_track_uri', 'ts']].copy()

### Metadata retrieval

In [17]:
def get_batch_metadata(batch_list, start_id=0, end_id=1):
    try:
        fetched_info = fetch_tracks_info(batch_list)
        fetched_info.raise_for_status()  # Raises an HTTPError if the response was an error
        
        tracks_data = fetched_info.json().get('tracks', [])
        frames = [json_normalize(track) for track in tracks_data]
        df = pd.concat(frames, ignore_index=True)
        return df
    except requests.HTTPError as e:
        logging.error(f"Failed for IDs {start_id} to {end_id}. HTTP Error: {e}")
        raise  # Re-raise the exception after logging it
    except Exception as e:
        logging.error(f"Unexpected error for IDs {start_id} to {end_id}: {e}")
        raise  # Re-raise unexpected exceptions

def get_batch_audiofeatures(batch_list, start_id=0, end_id=1):
    try:
        fetched_info = fetch_tracks_audiofeatures(batch_list)
        fetched_info.raise_for_status()

        tracks_data = fetched_info.json().get('audio_features', [])
        frames = [json_normalize(track) for track in tracks_data]
        df = pd.concat(frames, ignore_index=True)
        return df
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 429:
            retry_after = int(e.response.headers.get('Retry-After'))
            logging.info(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
            return retry_after
        else:
            logging.error(f"HTTP Error for IDs {start_id} to {end_id}: {e}")
            raise
    except Exception as e:
        logging.error(f"Unexpected error for IDs {start_id} to {end_id}: {e}")
        raise

def save_data(data, start_id, end_id, folder):
    data.to_csv(f'{folder}/track_data_{start_id}.csv', index = False)
    logging.info(f'Data saved from {start_id} to {end_id}')

    return True

In [18]:
def batchit():
    chunk_size = 50
    for start in range(100000, len(df), chunk_size):
        end = start + chunk_size
        chunk = df[start:end]
        batch_list = list(chunk.spotify_track_uri.apply(lambda x: x.split(':')[-1])) #get just spotify id from uri
        data = get_batch_audiofeatures(batch_list, start, end)
        # data = get_batch_metadata(batch_list, start, end)

        if isinstance(data, pd.DataFrame) and not data.empty:
            save_data(data, start, end, 'fetched_audiofeatures')
            print('Finished:', start, batch_list) 
        else:
            print(f'No data returned for batch starting at {start}.')
            return data
            
        time.sleep(1)
    return None

batchresult = batchit()

Finished: 100000 ['1r4LajAwZBMufNXGxG56em', '1r4LajAwZBMufNXGxG56em', '1rzpBug5iLlcjTJcHF5OeL', '1rzpBug5iLlcjTJcHF5OeL', '5AxF9GFsJIMZMBgvxhazMi', '01iglyLhCYZYlCqAxY8Vfz', '3ypgFxGFG23ZAksYMrAps7', '3kwl9ycRoO7qSm6OZblqqB', '6XDdCcpLgQa6WgSmM7wBrG', '4EVWMkJXLVPoKbdYf8QAqI', '4EVWMkJXLVPoKbdYf8QAqI', '0tbTWvzyJgjV43pMyIG0Ng', '0tbTWvzyJgjV43pMyIG0Ng', '3sgWl7edt8ANfm1Z3lsXan', '659CdcGqJ8VxR2aXeLyV2O', '5XxFY11r0YA6Xoyh1O9WsR', '7vqcDtNvoJn5pXhktc8qe8', '1HmAx26E2PIQBWhRv0VSAV', '1HmAx26E2PIQBWhRv0VSAV', '7kyiQ2oxgT0XX8yhzBWE6t', '7kyiQ2oxgT0XX8yhzBWE6t', '4C2vliOPlcMPRMfceLqsKh', '4C2vliOPlcMPRMfceLqsKh', '6VGNzYErt08Rai78mLkIzI', '6VGNzYErt08Rai78mLkIzI', '2NJtgZk8OWI2u58iGLzJNq', '6xkcixqFy3qDbz7EPrn6oy', '4MOCTiC5mMrJuhLFSNjiIM', '2hiYgtiU7j5zzLs7ib5uIt', '1c04ndvRQDQeVzsSPB4xOT', '4FvdRNsf5CwmZqBNomtjFA', '2gNjmvuQiEd2z9SqyYi8HH', '1qCQTy0fTXerET4x8VHyr9', '6Rqn2GFlmvmV4w9Ala0I1e', '0yRbkTprsL0rYr4UId0Ryi', '0yRbkTprsL0rYr4UId0Ryi', '6Rqn2GFlmvmV4w9Ala0I1e', '6Rqn2GFlmvmV4w9Ala0

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

In [20]:
batchresult.headers.get('Retry-after', 1)

NameError: name 'batchresult' is not defined