In [None]:
# Jillian Arthur: 10/1/24: Spotify API Data Report
# Hypothosis: Songs that are more popular have more danceability. In theory this makes sense because songs that have upbeat melodies are generally catchy and circulate well on social media because of this. This could be determined by comparing the most popular songs as well as the most danceable songs and seeing if they are the same.
# I will be using the endpoints for the 1989 album by Taylor Swift, and Born To Die by Lana Del Rey.
# The data telling us the popularity of an album is reliable as those are straight statistics based on how many streams it gets, however the data that states the danceability could be unreliable as 'danceability' is relatively subjective. Danceability can be defined as songs that are suitable to dance to, however one could argue that any song can be danced to, and some are more so than others depending on the person and their music taste. 

In [313]:
import urllib
import requests
import pandas as pd
import json
import base64

In [314]:
def get_session_token(SessionID, SessionKey):
    url = 'https://accounts.spotify.com/api/token'
    data = {'grant_type':'client_credentials'}
    encoded_key = base64.b64encode(str(SessionID + ":" + SessionKey).encode("ascii"))
    header = {'Authorization': 'Basic {}'.format(encoded_key.decode("ascii"))}
    response = requests.post(url, data, headers = header)
    print(response.status_code)
    return response.json()['access_token']

In [315]:
keys = pd.read_csv("keys.txt")

In [316]:
access_token = get_session_token(keys['Client_ID'].iloc[0], keys['Client_Secret'].iloc[0])

200


In [317]:
t_features_ep = 'https://api.spotify.com/v1/audio-features'
tracks_ep = 'https://api.spotify.com/v1/tracks'
ab_tracks_ep = 'https://api.spotify.com/v1/albums/{}/tracks'

In [318]:
def api_call(endpoint_url, api_header):
    response = requests.get(endpoint_url, headers = api_header)
    print(response.status_code)
    return response.json()

In [319]:
session_header = {'Authorization': 'Bearer {}'.format(access_token)}

In [320]:
ldr_album_id = '3TtsWmvFbChKTWIRfa85lS'
ts_album_id = '1o59UpKw81iHR0HPiSkJR0'

In [321]:
ab_tracks_ep.format(ldr_album_id)

'https://api.spotify.com/v1/albums/3TtsWmvFbChKTWIRfa85lS/tracks'

In [322]:
ldr_album_response = api_call(ab_tracks_ep.format(ldr_album_id), session_header)
ts_album_response = api_call(ab_tracks_ep.format(ts_album_id), session_header)

200
200


In [323]:
ldr_album_df = pd.DataFrame(ldr_album_response['items'])
ts_album_df = pd.DataFrame(ts_album_response['items'])

In [324]:
ldr_track_ids = ','.join(ldr_album_df['id'].to_list())
ts_track_ids = ','.join(ts_album_df['id'].to_list())

In [325]:
t_features_ep + '?ids={}'.format(ldr_track_ids)

'https://api.spotify.com/v1/audio-features?ids=2MrvoOqj007UVWUtGcCK0F,3vV6IX7lbyQFmOyAuQpdav,6Ce04PNSJO1KkF1WWhSXEi,1GNIYBU1XhMSlAxJXiUBbC,4PLP0yDRmK2x8qbL9b9VfQ,1F96ZPH8sMhxRg0E8Nyzev,7c3GnJoriaSP3Xi1wJ5gMb,04cYZCH74znS6dgdryrWLx,0SG3beN2W0xyWqtyrDJiNB,2uJ3Vh0EKPCHSDd9rb5v49,0l2HaL3nbp9AFJ428p4yaA,2afQfyEKTMRlHDUsnKwDo8,1flpioTD1PIkKyJVvp5Ceq,1MtwMDDHn35PP9eVMK6Dwd,12wkCDbvabkmnJNOLdNQc4'

In [326]:
ldr_track_features = api_call(t_features_ep + '?market=US&ids={}'.format(ldr_track_ids), session_header)
ts_track_features = api_call(t_features_ep + '?market=US&ids={}'.format(ts_track_ids), session_header)

200
200


In [327]:
ldr_track_info = api_call(tracks_ep + '?market=US&ids={}'.format(ldr_track_ids), session_header)
ts_track_info = api_call(tracks_ep + '?market=US&ids={}'.format(ts_track_ids), session_header)

200
200


In [328]:
ldr_features_df = pd.DataFrame(ldr_track_features['audio_features'])
ts_features_df = pd.DataFrame(ts_track_features['audio_features'])

In [329]:
ldr_tracks_df = pd.DataFrame(ldr_track_info['tracks'])
ts_tracks_df = pd.DataFrame(ts_track_info['tracks'])
ldr_tracks_df[['name', 'popularity']]

Unnamed: 0,name,popularity
0,Born To Die,45
1,Off To The Races,43
2,Blue Jeans,43
3,Video Games,43
4,Diet Mountain Dew,40
5,National Anthem,43
6,Dark Paradise,42
7,Radio,40
8,Carmen,40
9,Million Dollar Man,42


In [330]:
# This data frame shows the name of the track and the popularity of each track on Born To Die.

In [331]:
ldr_merged = pd.merge(ldr_features_df, ldr_tracks_df, how = 'inner', on = 'id')
ldr_merged[['name', 'danceability']]

Unnamed: 0,name,danceability
0,Born To Die,0.18
1,Off To The Races,0.4
2,Blue Jeans,0.559
3,Video Games,0.39
4,Diet Mountain Dew,0.46
5,National Anthem,0.535
6,Dark Paradise,0.586
7,Radio,0.312
8,Carmen,0.487
9,Million Dollar Man,0.2


In [332]:
# This data frame shows the name of each track and the danceability of each track on Born To Die. Now I will analyze this for 1989.

In [333]:
ts_merged = pd.merge(ts_features_df, ts_features_df, how = 'inner', on = 'id')

In [339]:
artist_compare = pd.concat([ldr_merged, ts_merged], axis = 0)
artist_compare
ts_tracks_df[['name', 'popularity']]

Unnamed: 0,name,popularity
0,Welcome To New York (Taylor's Version),60
1,Blank Space (Taylor's Version),62
2,Style (Taylor's Version),63
3,Out Of The Woods (Taylor's Version),61
4,All You Had To Do Was Stay (Taylor's Version),60
5,Shake It Off (Taylor's Version),60
6,I Wish You Would (Taylor's Version),59
7,Bad Blood (Taylor's Version),59
8,Wildest Dreams (Taylor's Version),60
9,How You Get The Girl (Taylor's Version),59


In [340]:
# This data frame shows the name and popularity of each track on 1989.

In [336]:
ts_merged = pd.merge(ts_features_df, ts_tracks_df, how = 'inner', on = 'id')
ts_merged[['name', 'danceability']]

Unnamed: 0,name,danceability
0,Welcome To New York (Taylor's Version),0.76
1,Blank Space (Taylor's Version),0.732
2,Style (Taylor's Version),0.514
3,Out Of The Woods (Taylor's Version),0.544
4,All You Had To Do Was Stay (Taylor's Version),0.589
5,Shake It Off (Taylor's Version),0.632
6,I Wish You Would (Taylor's Version),0.671
7,Bad Blood (Taylor's Version),0.624
8,Wildest Dreams (Taylor's Version),0.587
9,How You Get The Girl (Taylor's Version),0.761


In [337]:
# This data frame shows the name and danceability of each track on 1989.