## Spotify API Artist Search
### This script calls the Spotify API, searches for artists, and stores their metadata (ID, name, URL, genres, etc.) in a dataframe for analysis
### Author: @ajantonik-personal (Anthony J Antonik)
### Date: 2020-04-25

#### Import modules

In [None]:
import csv
import os

import pandas as pd
import requests.auth

from IPython.display import display, HTML

#### Specify utility functions

In [None]:
class Utils:
    
    def __init__(self):
        self.return_dict = {}
    
    def text_parse_dict(self, filepath, separator):
        """
        Function to parse lines of text from a file into a dictionary
        :param filepath: path to text file to be parsed
        :param separator: string separating keynames from values in text file
        :return return_dict: dictionary of keys, values from text file
        """
        self.return_dict = {}
        with open(os.path.expanduser(filepath), "r") as filetoparse:
            lines = filetoparse.readlines()
            for line in lines:
                line = line.split(separator)
                self.return_dict[line[0].replace(" ", "")] = (
                    line[1].replace("\n", "").replace(" ", "")
                )
        return self.return_dict

#### Set constants & variables

In [None]:
# File containing spotify credentials
CREDS_FILE = os.environ['SPOTIFY_CREDS_FILE']

# List of Artists
ARTIST_LIST = [
    'Radiohead', 
    'Flying Lotus', 
    'Beyoncé', 
    'Arcade Fire', 
    'St. Vincent',
    'LCD Soundsystem', 
    'Grouper', 
    'Nine Inch Nails', 
    'Father John Misty',
    'David Byrne', 
    'The Cure', 
    'Typhoon', 
    'Robyn', 
    'Tyler, The Creator',
    'Sunn 0)))', 
    'Godspeed You! Black Emperor', 
    'Mogwai', 
    'Portugal. The Man',
    'Car Seat Headrest', 
    'Japandroids'
]
ARTIST_COUNT = len(ARTIST_LIST)
print("Artists: {}".format(ARTIST_COUNT))

# API token endpoint
API_TOKEN = 'https://accounts.spotify.com/api/token'

# API search endpoint
API = 'https://api.spotify.com/v1/search?q='

# API artist search route
ROUTE = '&type=artist'

# Header for df output
HEADER = ['ARTIST_ID','ARTIST_NAME','SPOTIFY_URL','GENRES']

#### Authenticate Spotify API credentials

In [None]:
utils = Utils()

# Retrieve Spotify Client ID & Secret
CREDS = utils.text_parse_dict(
    filepath=CREDS_FILE, 
    separator='='
)
CLIENT_ID = CREDS['CLIENT_ID']
CLIENT_SECRET = CREDS['CLIENT_SECRET']

# Define data dict for token request payload
PAYLOAD_TOKEN = {'grant_type': 'client_credentials'}

# Refresh Spotify API authentication token
print("Authenticating...")
client_auth = requests.auth.HTTPBasicAuth(
    CLIENT_ID, 
    CLIENT_SECRET
)
auth = requests.post(
    url=API_TOKEN, 
    auth=client_auth,
    data=PAYLOAD_TOKEN
)
token_json = auth.json()
TOKEN = 'Bearer ' + str(token_json['access_token'])

if auth.status_code == 200:
    print("SUCCESS")
else:
    print("ERROR: Check client authentication credentials.")
    exit()

# Define params dict for search request parameters
PARAMS = {
    'Accept': 'application/json',
    'Content-Type': 'application/json',
    'Authorization': TOKEN
}

#### Retrieve artist info & save to dataframe

In [None]:
print("Searching for artists...")
artists_df = pd.DataFrame(columns = HEADER)
artists_df_rows = len(artists_df)
artist_count = 1

for a in ARTIST_LIST:
    response = requests.get(url=API+str(a)+ROUTE, headers=PARAMS)
    data = response.json()
    data_dict = dict(data)
    x = 0
    print("{}|{}".format(artist_count, a))
    # NOTE: Finds first instance of exact artist name match
    while True:
        try:
            artist_name = str(data_dict['artists']['items'][x]['name'])
            artist_id = str(data_dict['artists']['items'][x]['id'])
            spotify_url = str(data_dict['artists']['items'][x]['external_urls']['spotify'])
            genres = str(data_dict['artists']['items'][x]['genres'])
            artist_dict = {
                'ARTIST_ID': artist_id,
                'ARTIST_NAME': artist_name,
                'SPOTIFY_URL': spotify_url,
                'GENRES': genres
            }
            if artist_name == str(a):
                artists_df.loc[artists_df_rows] = artist_dict
                break
            else:
                x = x+1
        except IndexError:
            print("ERROR: Artist {} not found".format(a))
            break
    artists_df_rows = len(artists_df)
    artist_count = artist_count + 1
    
print("DONE")
display(HTML(artists_df.to_html()))