## Import libraries

In [1]:
import pandas as pd
import numpy as np
import time 

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)  
#pd.set_option('display.max_rows', None)  

## Get artist target list 

In [4]:
all_albums = pd.read_csv('../data/clean/all_550_albums_clean.csv', encoding='latin1')

all_albums.head(2)

Unnamed: 0,rstoprankingorder,rsartist,rsyear,rsalbum,album_artist,rshorriblerankingorder,rsrationale
0,1.0,The Beatles,1967,Sgt. Pepper's Lonely Hearts Club Band,Sgt. Pepper's Lonely Hearts Club Band - The Be...,,
1,2.0,The Beach Boys,1966,Pet Sounds,Pet Sounds - The Beach Boys,,


In [7]:
len(all_albums['rsartist'].unique())

304

## Spotify

In [8]:
#%pip install pyarrow
#%pip install joblib
#%pip install tqdm
#%pip install spotipy
#%pip install python-dotenv
#%pip install requests 

In [10]:
import requests as rq
import pyarrow
from joblib import Parallel, delayed
from tqdm import tqdm
import pymongo
from passwords import *

### Generate token

In [54]:
# Obtener el TOKEN

AUTH_URL = "https://accounts.spotify.com/api/token"

creds = {
    "grant_type": "client_credentials",
    "client_id": Client_ID,
    "client_secret": Client_secret
}

response = rq.post(AUTH_URL, 
                         data=creds, 
                         headers={"Content-Type": "application/x-www-form-urlencoded"})


TOKEN = response.json()['access_token']

# Conexión con la API

url = "https://api.spotify.com/"

headers = {"Authorization": f'Bearer {TOKEN}'}

response = rq.get(url,headers=headers)
response

<Response [200]>

### Get artist information 

In [55]:
#we will do a keyword search 

all_albums['rsartist'].head(3)

0       The Beatles
1    The Beach Boys
2       The Beatles
Name: rsartist, dtype: object

#### Test to understand the API for the info I am interested on 

In [63]:
endpoint = 'https://api.spotify.com/v1/search'

In [64]:
artistname = 'The Beatles'
albumname = "Sgt. Pepper's Lonely Hearts Club Band"

In [69]:
params = {'q': f'album:{albumname} artist:{artistname}','type': 'album','market':'US', 'limit':'1','offset':'0'}

In [70]:
response = rq.get(endpoint, headers=headers, params=params)
response

<Response [200]>

In [71]:
data = response.json()
data.keys()

dict_keys(['albums'])

In [75]:
#getalbumid
data['albums']['items'][0]['id']

'6QaVfG1pHYl1z15ZxkvVDW'

In [76]:
#getalbumname
data['albums']['items'][0]['name']

"Sgt. Pepper's Lonely Hearts Club Band (Remastered)"

In [80]:
#artist name 
data['albums']['items'][0]['artists'][0]['name']

'The Beatles'

In [81]:
#artist id
data['albums']['items'][0]['artists'][0]['id']

'3WrFJ7ztbogyGnTHbHJFl2'

In [82]:
#album release date
data['albums']['items'][0]['release_date']

'1967-06-01'

In [83]:
#total tracks
data['albums']['items'][0]['total_tracks']

13

#### target lists 

In [85]:
#replacing spaces for '+' for the search query 

albumlist = [x.replace(' ', '+') for x in all_albums['rsalbum']]
albumlist[0:3]

["Sgt.+Pepper's+Lonely+Hearts+Club+Band", 'Pet+Sounds', 'Revolver']

In [87]:
artistlist = [x.replace(' ', '+') for x in all_albums['rsartist']]
artistlist[0:3]

['The+Beatles', 'The+Beach+Boys', 'The+Beatles']

In [89]:
#function to get info

data_list = []

def get_album_info(artist_name, album_name):
    try:
        endpoint = 'https://api.spotify.com/v1/search'
        params = {
            'q': f'album:{album_name} artist:{artist_name}',
            'type': 'album',
            'market': 'US',
            'limit': '1',
            'offset': '0',
        }
        response = rq.get(endpoint, headers=headers, params=params)

        if response.status_code == 200:
            data = response.json()
            if data['albums']['items']:
                album_info = data['albums']['items'][0]
                data_list.append({
                    'albumnamers': album_name,
                    'albumidspotify': album_info['id'],
                    'albumnamespotify': album_info['name'],
                    'artistnamespotify': album_info['artists'][0]['name'],
                    'artistidspotify': album_info['artists'][0]['id'],
                    'releasedate': album_info['release_date'],
                    'totaltracks': album_info['total_tracks']
                })
            else:
                data_list.append({
                    'albumnamers': album_name,
                    'albumidspotify': None,
                    'albumnamespotify': None,
                    'artistnamespotify': None,
                    'artistidspotify': None,
                    'releasedate': None,
                    'totaltracks': None
                })
                print(f"No album found for {album_name} by {artist_name}")
        else:
            print(f"Failed to retrieve album information for {album_name} by {artist_name}. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

#execute for the entire list 
for artist, album in zip(artistlist, albumlist):
    get_album_info(artist, album)

# Create a DataFrame
df = pd.DataFrame(data_list)
df.head(3)

No album found for The+Sun+Sessions by Elvis+Presley
No album found for Are+You+Experienced by The+Jimi+Hendrix+Experience
No album found for John+Lennon/Plastic+Ono+Band by John+Lennon+/+Plastic+Ono+Band
No album found for Blue by Joni+Mitchell
No album found for The+Anthology by Muddy+Waters
No album found for At+Fillmore+East by The+Allman+Brothers+Band
No album found for Here's+Little+Richard by Little+Richard
No album found for Meet+The+Beatles! by The+Beatles
No album found for Electric+Ladyland by The+Jimi+Hendrix+Experience
No album found for Chronicle:+The+20+Greatest+Hits by Creedence+Clearwater+Revival
No album found for Trout+Mask+Replica by Captain+Beefheart+&+His+Magic+Band
No album found for Back+to+Mono+(1958-1969) by Phil+Spector
No album found for After+the+Gold+Rush by Neil+Young
No album found for Purple+Rain by Prince+and+the+Revolution
No album found for Harvest by Neil+Young
No album found for Axis:+Bold+as+Love by The+Jimi+Hendrix+Experience
No album found for S

Unnamed: 0,albumnamers,albumidspotify,albumnamespotify,artistnamespotify,artistidspotify,releasedate,totaltracks
0,Sgt.+Pepper's+Lonely+Hearts+Club+Band,6QaVfG1pHYl1z15ZxkvVDW,Sgt. Pepper's Lonely Hearts Club Band (Remaste...,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,1967-06-01,13.0
1,Pet+Sounds,6GphKx2QAPRoVGWE9D7ou8,Pet Sounds (Original Mono & Stereo Mix),The Beach Boys,3oDbviiivRWhXwIE8hxkVV,1966-06-16,27.0
2,Revolver,3PRoXYsngSwjEQWR5PsHWR,Revolver (Remastered),The Beatles,3WrFJ7ztbogyGnTHbHJFl2,1966-08-05,14.0


In [90]:
df.shape

(550, 7)

In [91]:
#we were unable to find information for 87 albums

na_col = df.isnull().sum()
na_col

albumnamers           0
albumidspotify       87
albumnamespotify     87
artistnamespotify    87
artistidspotify      87
releasedate          87
totaltracks          87
dtype: int64

In [92]:
df.to_csv('../data/clean/api_albums_clean.csv', index=False)