# Audio features & Genre data from Spotify

In [None]:
# Author: Soumya Sambeet Mohapatra
# Date: 02-04-2022
# Description: Python script to fetch audio features and genre data from Spotify

## Imports

In [None]:
import requests
import base64
import json
import csv

## Authorization

* Authorization process is required to generate the access token which is needed to authenticate API calls
* The Authorization token expires after every hour, so evalute this once every hour to renew the token 

In [None]:
client_id = '' # Enter your client ID here
client_secret = '' # Enter your client Secret here

# Conversion to base64 for requesting authorization token
auth_string = client_id + ':' + client_secret
auth_bytes = auth_string.encode('ascii')
auth_bytes_base64 = base64.b64encode(auth_bytes)
auth_string_base64 = auth_bytes_base64.decode("ascii")

# Requesting access token
url = 'https://accounts.spotify.com/api/token'
body = {'grant_type': 'client_credentials'}
headers = {'Authorization': 'Basic ' + auth_string_base64, 'Content-Type': 'application/x-www-form-urlencoded'}
response = requests.post(url, headers=headers, data=body)
access_token = json.loads(response.text)['access_token']

## API request function

In [None]:
def spotifyAPI(url):
    request_headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+ access_token
    }
    response=requests.get(url, headers=request_headers)
    return json.loads(response.text)

## Genre list

* There are 126 genres in Spotify
* However some genres do not contain any tracks for some years

In [None]:
genres = spotifyAPI('https://api.spotify.com/v1/recommendations/available-genre-seeds')['genres']

## Fetch genre and audio features data

* Audio feature data is fetched for each of the 126 categories
* The year is set manually due to API rate limit
* For a particular year, this block of code fetches the audio feature data for a maximum of 6300 tracks

In [None]:
year = '2022' # Setting manually due to API rate limit
genre_data = []
for genre in genres:
    search_url = 'https://api.spotify.com/v1/search?type=track&q=year:'+ year + '%20genre:"' + genre + '"&limit=50'
    search_response = spotifyAPI(search_url)
    track_ids = "%2C".join([track['id'] for track in search_response['tracks']['items']])
    if(track_ids != ''):
        features_url = 'https://api.spotify.com/v1/audio-features?ids=' + track_ids
        features_response = spotifyAPI(features_url)
        for track in features_response['audio_features']:
            track['year'] = year
            track['genre'] = genre
            genre_data.append(track)
    else:
        print('\x1b[31mError! No tracks found for genre ' + genre + ' in year ' + year )

## CSV Import

In [None]:
keys = genre_data[0].keys()
with open('data/csv/genre_data_' + year + '.csv', 'w') as csv_file:
    csvwriter = csv.DictWriter(csv_file, keys)
    csvwriter.writeheader()
    csvwriter.writerows(genre_data)

## JSON Import

In [None]:
with open('data/json/genre_data_' + year + '.json', 'w') as json_file:
    json.dump(genre_data, json_file)