## Spotify API

In [1]:
#!pip install spotipy

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pprint

In [None]:
cid = '***'
secret = '***'
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
import requests
import json
import base64
import pandas as pd
import numpy as np

In [None]:
def get_headers(client_id, client_secret):
    endpoint = 'https://accounts.spotify.com/api/token'
    encoded = base64.b64encode((client_id+':'+client_secret).encode('utf-8')).decode('ascii')
    headers = {'Authorization': 'Basic {auth}'.format(auth=encoded)}
    payload = {'grant_type':'client_credentials'}
    r = requests.post(endpoint, data=payload, headers=headers)
    acess_token = json.loads(r.text).get('access_token')
    headers = {'Authorization': 'Bearer {auth}'.format(auth=acess_token)}
    return headers

In [None]:
next_key = 1
new_release = {}

def add_item(dic, value):
    global next_key
    dic[next_key] = value
    next_key += 1

In [None]:
def get_album(album_id):
    endpoint = f'https://api.spotify.com/v1/albums/{album_id}'
    headers = get_headers(client_id, client_secret)
    r = requests.get(endpoint, headers=headers)
    return json.loads(r.text)

def get_artist(artist_id):
    endpoint = f'https://api.spotify.com/v1/artists/{artist_id}'
    headers = get_headers(client_id, client_secret)
    r = requests.get(endpoint, headers=headers)
    return json.loads(r.text)

def get_genres(album_id):
    album = get_album(album_id)
    genres = album.get('genres')
    artists = album['artists']
    for artist in artists:
        artist_id = artist['id']
        artist_info = get_artist(artist_id)
        genres = artist_info['genres']
    return genres

In [None]:
client_id = '***'
client_secret = '***'

def new_release_album(client_id, client_secret):
    endpoint = 'https://api.spotify.com/v1/browse/new-releases'
    headers = get_headers(client_id, client_secret)
    params = {
        'limit':50
    }
    r = requests.get(endpoint, params = params, headers = headers)

    if r.status_code == 200:
        data = json.loads(r.text)
        for d in data.get('albums').get('items'):
            for a in d.get('artists'):
                artist = a['name']
                artist_id = a['id']
            album_name = d.get('name')
            album_id = d.get('id')
            date = d.get('release_date')
            genres = get_genres(album_id)
            add_item(new_release, {'album':album_name, 'album_id': album_id, 'date':date, 'artist': artist, 'artist_id':artist_id, 'genres': genres})
    else:
        print('error! status code: ', r.status_code)
    return new_release

In [None]:
new_release_album(client_id, client_secret)

In [None]:
album_df = pd.DataFrame(new_release)
album_df = album_df.transpose()

In [None]:
album_df.head()

Unnamed: 0,album,album_id,date,artist,artist_id,genres
1,Closer Than This,3auDI7RSdoJr99qMNXimP4,2023-12-22,Jimin,1oSPZhvZMIrWW5I41kPkkY,[k-pop]
2,Lace It (with Eminem & benny blanco),5nOfPCLpHGpayGrUNHAPLt,2023-12-16,benny blanco,5CiGnKThu5ctn9pBxv7DGa,[pop]
3,The End,5yQSR4I6kIx4a7Siq2Q6vZ,2023-12-22,Tom Odell,2txHhyCwHjUEpJjWrEyqyX,[chill pop]
4,Entrapreneur,6VrYcDdRM3W0eqHbaTKcWh,2023-12-21,Central Cee,5H4yInM5zmHqpKIoMNAx4r,"[melodic drill, r&drill]"
5,Saliendo del Planeta,4c9baK6KJXLlly01POapWY,2023-12-22,Saiko,2O8vbr4RYPpk6MRA4fio7u,"[trap latino, urbano espanol, urbano latino]"


In [None]:
album_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71 entries, 1 to 71
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   album      71 non-null     object
 1   album_id   71 non-null     object
 2   date       71 non-null     object
 3   artist     71 non-null     object
 4   artist_id  71 non-null     object
 5   genres     71 non-null     object
dtypes: object(6)
memory usage: 3.9+ KB


In [None]:
album_df['date'] = pd.to_datetime(album_df['date'], format='%Y-%m-%d', errors='raise')

In [None]:
album_df.to_parquet('album.parquet', index=False)

### local to S3

In [None]:
# !pip3 install boto3

In [None]:
import boto3

def s3_connection():
    try:
        s3 = boto3.client(
            service_name="s3",
            region_name="us-west-2",
            aws_access_key_id="***",
            aws_secret_access_key="***"
        )
    except Exception as e:
        print(e)
    else:
        print("s3 bucket connected!")
        return s3

s3 = s3_connection()

s3 bucket connected!


In [None]:
def s3_put_object(s3, bucket, filepath, access_key):
    try:
        s3.upload_file(
            Filename=filepath,
            Bucket=bucket,
            Key=access_key
        )
    except Exception as e:
        print(e)
    return True

In [None]:
s3_put_object(s3, 'spotify-etl-bk', 'album.parquet', "album.parquet")

True

### read S3

In [None]:
import io

In [None]:
obj = s3.get_object(
    Bucket='spotify-etl-bk',
    Key='album.parquet'
)

result = pd.read_parquet(io.BytesIO(obj['Body'].read()))
print(result)

                                   album                album_id       date  \
0                       Closer Than This  3auDI7RSdoJr99qMNXimP4 2023-12-22   
1   Lace It (with Eminem & benny blanco)  5nOfPCLpHGpayGrUNHAPLt 2023-12-16   
2                                The End  5yQSR4I6kIx4a7Siq2Q6vZ 2023-12-22   
3                           Entrapreneur  6VrYcDdRM3W0eqHbaTKcWh 2023-12-21   
4                   Saliendo del Planeta  4c9baK6KJXLlly01POapWY 2023-12-22   
..                                   ...                     ...        ...   
66         light (with 9lives & midwxst)  7HJ83Qe1wRgbqHJsSo3sK3 2023-12-15   
67                                Novela  7KQdgdXYqfjqnTNsyaVWHd 2023-12-15   
68                 Antes De Que Amanezca  2gg7wgRW4yK27ex7rNvWuW 2023-12-15   
69                             Asteroids  5jPlekW4E787N4lgsC9G3w 2023-10-30   
70                           THINK LATER  0OUOx6rJXtL66AzTnP9KUE 2023-12-08   

          artist               artist_id  \
0      

## Python to S3 (JSON)