# Spotify API data extraction

In [1]:
# Import libaries
import spotipy
import pandas as pd
import requests
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
# Get access token
def get_spotify_access_token(client_id, client_secret):
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_data = {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret,
    }
    auth_response = requests.post(auth_url, data=auth_data)
    auth_response_data = auth_response.json()
    return auth_response_data['access_token']

# Function to extract track info
def search_track(track_id, access_token):
    track_url = f'https://api.spotify.com/v1/tracks/{track_id}'
    headers = {'Authorization': 'Bearer ' + access_token}
    track_response = requests.get(track_url, headers=headers)
    track_response_data = track_response.json()

    if 'error' in track_response_data:
        return None

    # Extract relevant information about the track
    track_info = {
        'Track Name': track_response_data['name'],
        'Artist(s)': ', '.join([artist['name'] for artist in track_response_data['artists']]),
        'Album': track_response_data['album']['name'],
        'Release Date': track_response_data['album']['release_date'],
        'Popularity': track_response_data['popularity'],
        'URI': track_response_data['uri'],
    }
    
    return track_info

In [3]:
# Set up credentials
client_id = '170449ff13924e30a2cda52d8a44442c'
client_secret = 'dd1ae34cc6484a60a09a73f7ea0ca2ef'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
# sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
access_token = get_spotify_access_token(client_id, client_secret)

In [4]:
# Read in Billboard and Spotify API data
billboard_df = pd.read_csv('billboard_dataset/data/audio_features.csv')
spotify_api_df = pd.read_csv('spotify_dataset/data/spotify_top_tracks.csv')

In [12]:
billboard_df.shape

(29503, 22)

In [13]:
spotify_api_df.shape

(50, 11)

In [5]:
# Rename song ID
spotify_api_df.rename(columns={'id': 'spotify_track_id'}, inplace = True)

# Join data
merged_df = billboard_df.merge(spotify_api_df, on = 'spotify_track_id', how = 'left')

In [11]:
merged_df.shape

(29503, 32)

In [10]:
merged_df.isna().sum()

song_id                          0
performer                        0
song                             0
spotify_genre                 1600
spotify_track_id              5106
spotify_track_preview_url    15012
spotify_track_duration_ms     5106
spotify_track_explicit        5106
spotify_track_album           5112
danceability_x                5169
energy_x                      5169
key                           5169
loudness                      5169
mode                          5169
speechiness                   5169
acousticness                  5169
instrumentalness              5169
liveness                      5169
valence                       5169
tempo                         5169
time_signature                5169
spotify_track_popularity      5106
name                         29488
popularity                   29488
duration_ms                  29488
explicit                     29488
album_id                     29488
album_name                   29488
artist_id           

In [6]:
# Extract track IDs into a list
# billboard_tracks = pd.Series(billboard_df.spotify_track_id.unique()).dropna().tolist()
# billboard_tracks[1]

In [7]:
# Use track IDs to pull more variables from Spotify API
# track_data_list = []
# test = ['1fHHq3qHU8wpRKHzhojZ4a']

# # Loop through each song and retrieve track data
# for song in test:
#     track_info = search_track(song, access_token)
#     if track_info:
#         track_data_list.append(track_info)
        
# # Convert to dataframe
# api_df = pd.DataFrame(track_data_list)
# api_df.head()