# Gathering COVID Data

In [1]:
import csv
import requests
import pandas as pd
import numpy as np
import urllib.request
import codecs

## Define URLs

In [None]:
base_url = 'http://coronavirusapi.com/getTimeSeries/'
state = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", 
         "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", 
         "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", 
         "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", 
         "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", 
         "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", 
         "WV", "WI", "WY"]
master_list = []

## Define Functions

In [None]:
# Because the dataset uses seconds from Unix Epoch date, function returns datetime, from floationg point value.
def epochConverter(num):
    days = num/3600/24
    date = pd.to_datetime('01-01-1970') + pd.DateOffset(days=days)
    return date

## Make API Calls

In [None]:
# Outer for loop opens url and converts each call for state from bytestring to something readable.
for state_url in state:
    state_response = urllib.request.urlopen(base_url+state_url)
    csvfile = csv.reader(codecs.iterdecode(state_response, 'utf-8'))

    #inner loop iterates over each row in the returned .csv from API and appends in list
    for row in csvfile:
        #because the API is returning headers as well for each call, the if statement removes the header line.
        if row[0] != 'seconds_since_Epoch':
            master_list.append({'State': state_url,
                             'Date': epochConverter(float(row[0])),
                             'Tested': float(row[1]),
                             'Positive': float(row[2]),
                             'Deaths': float(row[3]),
                             })  

## Build Dataframe and Export to CSV File

In [None]:
#convert list of dictionaries into a dataframe
covid_state_df = pd.DataFrame(master_list)
covid_state_df

In [None]:
# Create new dataframe with just the most recent values.
recent_date_df = covid_state_df.groupby('State').max()
recent_date_df = recent_date_df.reset_index()
recent_date_df.head()

## Calculate Case Fatality Rate

In [None]:
recent_date_df["Mortality Rate"] = ((recent_date_df["Deaths"]) / (recent_date_df["Positive"])) * 100
recent_date_df.to_csv("Resources/covid_api_data.csv")