# This section of the code calls the Covid-19 John Hopkins API and merges the cases dataset to the enriched Spotify Top 50 dataset (that contains the info from the Spotify API) by date.

In [1]:
# Import necessary packages for API requests and manipulation

import requests
import pandas as pd
import datetime
import numpy as np
import os

In [2]:
# Get the API data for the past 467 days (or however many days to January 1st 2020 as of today)

response = requests.get("https://disease.sh/v3/covid-19/historical/all?lastdays=467")

In [3]:
# Test API connection

if response.status_code == 200:
    print("API Successful")
else:
    print("API Query Failed")

API Successful


In [4]:
# Load data as JSON

import json
covid_data = json.loads(response.text)

In [5]:
# Isolate only cases key of the larger nested dictionary

covid_cases = covid_data['cases']

In [6]:
# Check data type

type(covid_cases)

dict

In [7]:
# Iteratively move through dictionary and compile to list

df_rows = []
for k,v in covid_cases.items():
    print('date', k)
    print('cases',v)
    df_rows.append([k,v])

date 1/22/20
cases 557
date 1/23/20
cases 655
date 1/24/20
cases 941
date 1/25/20
cases 1433
date 1/26/20
cases 2118
date 1/27/20
cases 2927
date 1/28/20
cases 5578
date 1/29/20
cases 6167
date 1/30/20
cases 8235
date 1/31/20
cases 9927
date 2/1/20
cases 12038
date 2/2/20
cases 16787
date 2/3/20
cases 19887
date 2/4/20
cases 23898
date 2/5/20
cases 27643
date 2/6/20
cases 30803
date 2/7/20
cases 34396
date 2/8/20
cases 37130
date 2/9/20
cases 40160
date 2/10/20
cases 42769
date 2/11/20
cases 44811
date 2/12/20
cases 45229
date 2/13/20
cases 60382
date 2/14/20
cases 66909
date 2/15/20
cases 69052
date 2/16/20
cases 71235
date 2/17/20
cases 73270
date 2/18/20
cases 75152
date 2/19/20
cases 75652
date 2/20/20
cases 76212
date 2/21/20
cases 76841
date 2/22/20
cases 78602
date 2/23/20
cases 78982
date 2/24/20
cases 79546
date 2/25/20
cases 80399
date 2/26/20
cases 81376
date 2/27/20
cases 82736
date 2/28/20
cases 84122
date 2/29/20
cases 86013
date 3/1/20
cases 88394
date 3/2/20
cases 90377

cases 95642222
date 1/19/21
cases 96250345
date 1/20/21
cases 96942928
date 1/21/21
cases 97599840
date 1/22/21
cases 98258825
date 1/23/21
cases 98826681
date 1/24/21
cases 99272581
date 1/25/21
cases 99769921
date 1/26/21
cases 100325930
date 1/27/21
cases 100923844
date 1/28/21
cases 101538493
date 1/29/21
cases 102128615
date 1/30/21
cases 102643720
date 1/31/21
cases 103026115
date 2/1/21
cases 103472860
date 2/2/21
cases 103930596
date 2/3/21
cases 104452768
date 2/4/21
cases 104919624
date 2/5/21
cases 105455017
date 2/6/21
cases 105827142
date 2/7/21
cases 106225026
date 2/8/21
cases 106541152
date 2/9/21
cases 106968967
date 2/10/21
cases 107404483
date 2/11/21
cases 107845702
date 2/12/21
cases 108274938
date 2/13/21
cases 108648026
date 2/14/21
cases 108941674
date 2/15/21
cases 109225259
date 2/16/21
cases 109576107
date 2/17/21
cases 109971474
date 2/18/21
cases 110374973
date 2/19/21
cases 110787608
date 2/20/21
cases 111157877
date 2/21/21
cases 111472126
date 2/22/21
ca

In [8]:
# Make dataframe

covid_data = pd.DataFrame(df_rows, columns=['Date','cases'])

In [9]:
# Convert string date to datetime

covid_data['Date']= pd.to_datetime(covid_data['Date'])

In [10]:
# Check subsample of data to see if formatting is correct

covid_data[(covid_data['Date'] > '2020-12-01') & (covid_data['Date'] < '2020-12-31')].head()

Unnamed: 0,Date,cases
315,2020-12-02,64673997
316,2020-12-03,65368134
317,2020-12-04,66051795
318,2020-12-05,66693750
319,2020-12-06,67232715


In [11]:
# Change directory for this script as well

os.chdir('F:\OneDrive - Central European University\Courses\Winter_Term\Coding 3\Term Project\spotify-covid-analysis\data')

In [12]:
# Read in the enriched Spotify dataset

spotify = pd.read_csv('all_spotify_data.csv')

In [13]:
# Make date field to datetime in the Spotify enriched dataset

spotify['Date']= pd.to_datetime(spotify['Date'])

In [14]:
# Merge Spotify data with Covid data by Date columns --> check result

df = pd.merge(spotify,covid_data,on='Date',how='left')
df.head()

Unnamed: 0,Position,Track Name,Artist,URL,Date,trackID,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cases
0,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-02,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,
1,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-09,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,
2,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-16,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,
3,2,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-23,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,655.0
4,4,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-30,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,8235.0


In [15]:
# Correctly label missing values as 0 Covid cases (this is our benchmark period - i.e. early January 2020)

df['cases'] = np.where(np.isnan(df['cases']), 0, df['cases'])
df.head()

Unnamed: 0,Position,Track Name,Artist,URL,Date,trackID,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cases
0,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-02,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,0.0
1,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-09,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,0.0
2,1,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-16,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,0.0
3,2,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-23,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,655.0
4,4,Pura Pura Lupa,Mahen,https://open.spotify.com/track/6Aw5k0jkFceYaya...,2020-01-30,6Aw5k0jkFceYayaJoRupLT,0.529,0.446,11,-9.167,1,0.0294,0.757,0.000764,0.146,0.14,111.903,246964,4,8235.0


In [16]:
# Write out the final dataset used for analytics to a CSV

df.to_csv('spotify_covid_data.csv', index = False)