In [1]:
import numpy as np
import pandas as pd
import requests
from difflib import SequenceMatcher
import base64
from urllib.parse import urlencode
import datetime as dt

import spotify_cred as sc 
from spotify_api_client import SpotifyAPI 

import warnings
warnings.filterwarnings('ignore')

# Collect audio features from Spotify API

In [2]:
charts_df = pd.read_csv('top_charts.csv')
charts_df = charts_df.iloc[:,1:]
charts_df.head()

Unnamed: 0,year,song,artist
0,2013.0,Thrift Shop,Macklemore & Ryan Lewis Featuring Wanz
1,2013.0,Blurred Lines,Robin Thicke Featuring T.I. + Pharrell
2,2013.0,Radioactive,Imagine Dragons
3,2013.0,Harlem Shake,Baauer
4,2013.0,Can't Hold Us,Macklemore & Ryan Lewis Featuring Ray Dalton


In [3]:
# Importing credentials
spotify_client_id = sc.client_id 
spotify_secret = sc.client_secret
spotify = SpotifyAPI(spotify_client_id, spotify_secret)

In [4]:
# Verifying that credentials are valid
spotify.perform_auth()

True

In [5]:
# Accessing song_id (necessary to get audio features) and popularity score
song_id_col = []
popularity_col = []


for song, artist in zip(charts_df.song, charts_df.artist):
    song_id, pop = spotify.get_song_id(song, artist)
    
    song_id_col.append(song_id)
    popularity_col.append(pop)

In [6]:
# Adding popularity and song_id to df
charts_df['popularity'] = popularity_col 
charts_df['song_id'] = song_id_col

In [7]:
# Collecting audio features
audio_features_df = pd.DataFrame()# charts_df.drop(['song_id'], axis=1)

for song_id in charts_df.song_id:
    audio_feature_dict = spotify.get_audio_features(song_id)
    audio_features_df = audio_features_df.append(audio_feature_dict, ignore_index=True)

In [8]:
# Concatenating the top charts df to the audio features df and dropping irrelevant columns
song_info = pd.concat((charts_df, audio_features_df), axis=1)
song_info = song_info.drop(['song_id', 'id', 'uri', 'track_href', 'analysis_url', 'type'], axis=1)

In [9]:
song_info.to_csv('song_info.csv')