	# Author: Alexander Staub
	## Last changed: 2025.02.18
	## Purpose: Using the chartmetric IDs to get song level metadata


In [1]:
#installing packages
import time
import requests
import logging
import pandas as pd

In [None]:
#Setup the logging of the errors
logging.basicConfig(
    filename='chartmetric_api_metadata.log',
    level=logging.INFO,
    format='%(asctime)s %(levelname)s: %(message)s'
)

In [None]:
# Define API host and your refresh token
HOST = 'https://api.chartmetric.com'
with open("chartmetric_refresh_token.txt", "r") as f:
    REFRESH_TOKEN = f.read().strip()

In [None]:
# Retrieve an access token using the refresh token
token_response = requests.post(f'{HOST}/api/token', json={'refreshtoken': REFRESH_TOKEN})

# Check if the token was retrieved successfully
if token_response.status_code != 200:

    # Log the error and raise an exception
    logging.error(f"Token retrieval error: {token_response.status_code}")
    raise Exception(f"Error: received {token_response.status_code} from /api/token")

# Extract the access token from the response
access_token = token_response.json()['token']

# Define the headers for the API requests
headers = {'Authorization': f'Bearer {access_token}'}

# Defining the get_request

Robust request logic that:
- backs off for a max of 26 hours in retries
- logs all erros it encounters


In [None]:


# --- Robust get_request Function ---
def get_request(endpoint, params=None, max_retries=5):
    backoff = 1  # initial backoff in seconds (used if header data is missing)
    for attempt in range(max_retries):
        try:
            response = requests.get(f"{HOST}{endpoint}", headers=headers, params=params)
        except Exception as ex:
            logging.error(f"Network error on attempt {attempt+1} for {endpoint}: {ex}")
            time.sleep(backoff)
            backoff *= 2
            continue

# Log the response status code and rate limit headers
        logging.info(f"Request to {endpoint} returned {response.status_code}. RateLimit headers: {response.headers}")

# Check if the response status code is 200
        if response.status_code == 200:
            return response.json()

# Handle different types of errors
# 401: Token may have expired; refresh it
        elif response.status_code == 401:
            # Token may have expired; refresh it
            logging.warning(f"401 error for {endpoint}. Refreshing token.")
            token_response = requests.post(f'{HOST}/api/token', json={'refreshtoken': REFRESH_TOKEN})
            if token_response.status_code != 200:
                logging.error(f"Token refresh failed: {token_response.status_code}")
                raise Exception(f"Token refresh failed with status {token_response.status_code}")
            new_token = token_response.json()['token']
            headers['Authorization'] = f'Bearer {new_token}'
            time.sleep(backoff)
            backoff *= 2

# 429: Rate limit exceeded; wait and retry
        elif response.status_code == 429:
            # Rate limit exceeded.
            reset_timestamp = response.headers.get("X-RateLimit-Reset")
            if reset_timestamp:
                # Wait until the time provided by the API
                sleep_time = int(reset_timestamp) - int(time.time())
                if sleep_time < 0:
                    sleep_time = backoff
            else:
                # No wait time provided by the API; compute one that totals 26 hours over all retries.
                total_wait_limit = 26 * 3600  # total wait time in seconds (26 hours)
                # Sum exponential weights for remaining attempts: for i from current attempt to max_retries-1
                remaining_weights = sum(2 ** i for i in range(attempt, max_retries))
                # Use the weight for the current attempt to assign a fraction of the total wait.
                sleep_time = total_wait_limit * (2 ** attempt / remaining_weights)
            logging.warning(f"429 error for {endpoint}. Sleeping for {sleep_time} seconds (attempt {attempt+1}/{max_retries}).")
            time.sleep(sleep_time)
            backoff *= 2

# 500: Server error; wait and retry
        elif response.status_code >= 500:
            logging.warning(f"Server error {response.status_code} for {endpoint}. Retrying after {backoff} seconds.")
            time.sleep(backoff)
            backoff *= 2

        else:
            logging.error(f"Error {response.status_code} for {endpoint}: {response.text}")
            raise Exception(f"Error: received {response.status_code} from {endpoint}")

# If the loop completes without returning, raise an exception
    raise Exception(f"Max retries exceeded for endpoint {endpoint}")

# Use chartmetric ID to access song characteristics:
- use the chartmetric ID file to get the songs for which we have chartmetric ID
- loop over chartmetric id to access the track metadata endpoint and retreive song level of relevance


In [None]:
import pandas as pd

# the different lists of ids
chartmetric_ids_spotify_1 = pd.read_csv("Z:/Data_alexander/data/incidental/chartmetric/chartmetric_ids_spotify_sample_1.csv")
chartmetric_ids_spotify_2 = pd.read_csv("Z:/Data_alexander/data/incidental/chartmetric/chartmetric_ids_spotify_sample_2.csv")
#chartmetric_ids_spotify_3 = pd.read_csv("Z:/Data_alexander/data/incidental/chartmetric/chartmetric_ids_spotify_sample_3.csv")

# concatenate the dataframes into one
chartmetric_ids_spotify = pd.concat([chartmetric_ids_spotify_1, chartmetric_ids_spotify_2, chartmetric_ids_spotify_3])

In [None]:
# trial run with a single ID 

search_enpoint = 'track/:id'
#input the first value of the chartmetric_id

search_params = {'q': '"Stronger" "Kanye West"', 'type': 'tracks'}

search_output = get_request(search_enpoint, search_params)

pprint(search_output)