### Importing libraries

In [1]:
import pandas as pd
import numpy as np
import time 

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)  # me muestre todas las columnas
#pd.set_option('display.max_rows', None)  # me muestre todas las filas

### Getting target list 

In [2]:
dataalbums = pd.read_csv('../data/clean/api_albums_clean.csv', encoding='latin1')

In [3]:
dataalbums.head(2)

Unnamed: 0,albumnamers,albumidspotify,albumnamespotify,artistnamespotify,artistidspotify,releasedate,totaltracks
0,Sgt. Pepper's Lonely Hearts Club Band,1x1jpjDbetGqX0IKCUIBNj,Sgt. Pepper's Lonely Hearts Club Band,Bloco do Sargento Pimenta,3wGWCP3E3tYqj5memYV9Vq,2017-12-08,13.0
1,Pet Sounds,2CNEkSE8TADXRT2AzcEt1b,Pet Sounds,The Beach Boys,3oDbviiivRWhXwIE8hxkVV,1966-05-16,13.0


In [4]:
artist_ids = dataalbums['artistidspotify'].unique().tolist()

In [5]:
artist_ids[0:2]

['3wGWCP3E3tYqj5memYV9Vq', '3oDbviiivRWhXwIE8hxkVV']

### Spotify Connection 

In [6]:
#%pip install pyarrow
#%pip install joblib
#%pip install tqdm
#%pip install spotipy
#%pip install python-dotenv
#%pip install requests 

In [7]:
import requests as rq
import pyarrow
from joblib import Parallel, delayed
from tqdm import tqdm
import pymongo
from passwords import *

In [47]:
# Obtener el TOKEN

AUTH_URL = "https://accounts.spotify.com/api/token"

creds = {
    "grant_type": "client_credentials",
    "client_id": Client_ID,
    "client_secret": Client_secret
}

response = rq.post(AUTH_URL, 
                         data=creds, 
                         headers={"Content-Type": "application/x-www-form-urlencoded"})


TOKEN = response.json()['access_token']

# Conexión con la API

url = "https://api.spotify.com/"

headers = {"Authorization": f'Bearer {TOKEN}'}

response = rq.get(url,headers=headers)
response

<Response [200]>

### Getting Tracks Information 

In [48]:
album_ids = dataalbums['albumidspotify'].unique().tolist()

In [49]:
album_ids[0:3]

['1x1jpjDbetGqX0IKCUIBNj', '2CNEkSE8TADXRT2AzcEt1b', '0T1sskJDoybYGvPU5aw5Cf']

In [50]:
#endpoint
#https://api.spotify.com/v1/albums/{id}/tracks

In [51]:
#test 
url = 'https://api.spotify.com/v1/albums/1x1jpjDbetGqX0IKCUIBNj/tracks'

In [58]:
response = rq.get(url, headers=headers)

In [59]:
responsejson = response.json()

In [60]:
type(responsejson)

dict

In [63]:
responsejson.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [68]:
responsejson['items'][0]

dict_keys(['artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_urls', 'href', 'id', 'is_local', 'name', 'preview_url', 'track_number', 'type', 'uri'])

In [69]:
# Construct a list of URLs for querying track information
track_urls = [f'https://api.spotify.com/v1/albums/{album_id}/tracks' for album_id in album_ids]

# Create a list of dictionaries with album information and corresponding URLs
track_url_list = [{'album_id': album_id, 'url': url} for album_id, url in zip(album_ids, track_urls)]

In [73]:
# List to store the retrieved track data
track_data_list = []

# Function to search and retrieve track information
def search_track_info(track_url):
    try:
        response = rq.get(track_url['url'], headers=headers)
        response.raise_for_status()  # Check for request errors
        data = response.json()

        for track in data['items']:
            track_data_list.append({
                'track_id': track['id'],
                'track_name': track['name'],
                'album_id': track_url['album_id'],
                'artist_id': track['artists'][0]['id'],
                'artist_name': track['artists'][0]['name']
                # You can add more data points here that you want to fetch directly
            })

    except requests.exceptions.HTTPError as err:
        print(f"HTTP error occurred: {err}")
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle the error as needed

# Execute the function for each track URL
for track_url in track_url_list:
    search_track_info(track_url)

# Create a DataFrame from the list of dictionaries
data_tracks = pd.DataFrame(track_data_list)

In [74]:
data_tracks.head()

Unnamed: 0,track_id,track_name,album_id,artist_id,artist_name
0,4ZY1RPoLfezEqu03BI2axd,Sgt. Pepper's Lonely Hearts Club Band,1x1jpjDbetGqX0IKCUIBNj,3wGWCP3E3tYqj5memYV9Vq,Bloco do Sargento Pimenta
1,0QLyr8xasb6tgGkdPWYw8d,With a Little Help From My Friends,1x1jpjDbetGqX0IKCUIBNj,3wGWCP3E3tYqj5memYV9Vq,Bloco do Sargento Pimenta
2,1YFlTlQHKR62Sh39ctI7jX,Lucy in the Sky with Diamonds,1x1jpjDbetGqX0IKCUIBNj,3wGWCP3E3tYqj5memYV9Vq,Bloco do Sargento Pimenta
3,1aBpFQAum7vESFuNnWeZZI,Getting Better,1x1jpjDbetGqX0IKCUIBNj,3wGWCP3E3tYqj5memYV9Vq,Bloco do Sargento Pimenta
4,2OePklaMhBp1kCbbTq5UVk,Fixing a Hole,1x1jpjDbetGqX0IKCUIBNj,3wGWCP3E3tYqj5memYV9Vq,Bloco do Sargento Pimenta


In [76]:
data_tracks.shape

(4818, 5)

In [77]:
data_tracks.to_csv('../data/clean/api_tracks1_clean.csv', index=False)