In [144]:
import numpy as np
import pandas as pd
import folium
import json
from csv import writer
import requests
import seaborn as sns
import datetime
import os
import plotly.express as px
from datetime import date
import matplotlib.pyplot as plt
import matplotlib as mpl

%matplotlib inline

#remove print limit to better explore dataframe data
pd.set_option('display.max_rows', 300)

In [39]:
#helper function to make API call
def make_api_call():
    response = requests.get('https://coronavirus-tracker-api.herokuapp.com/v2/locations?timelines=1')
    timeline_json = response.json()
    
    try:
        with open('../api_data/timeline_json.json', 'w') as f:
          json.dump(timeline_json, f, ensure_ascii=False)
        
        #updating API log
        update_api_log(date.today())
        
        return timeline_json
    
    except Exception as e:
        print('Error making API call: ', e)

#helper function to load previously loaded api data
def use_existing_api_data():
    try:
        timeline_json = json.load(open('../api_data/timeline_json.json'))
        return timeline_json
    except Exception as e:
        print('Error reading existing JSON file: ', e)

#helper function to update api call log
def update_api_log(date):
    string_date = str(date)
    try:
        with open('../api_data/api_call_log.csv', 'a', newline='') as write_obj:
            csv_writer = writer(write_obj)
            csv_writer.writerow([string_date])

    except Exception as e:
        print('Error updating API log: ', e)

#helper function to load data (cached or via new API call) for webapp
def get_raw_data():
    today = str(date.today())
    log = pd.read_csv('../api_data/api_call_log.csv')
    last = log.loc[:,'date'].max()
    up_to_date = last == today
    raw_data = use_existing_api_data() if up_to_date else make_api_call()
    return raw_data

In [284]:
def process_raw_data():

#import ISO3 data for Dash Plotly Choropletyh mapping
    iso3 = pd.read_csv("../geodata/ISO3.csv",  index_col = 0)
    location_data = get_timeline_data()['locations']
    
    #create empty list to compile country-level data
    data_rows = []

    #Extract COVID morbidity and mortality data from COVID JSON
    for loc in location_data:

        #Remove non-countries and countries with missing data
        if loc['country'] in ['MS Zaandam', 'Eritrea', 'Diamond Princess']: continue   

        cases = [{'Date': k, 'Cases' :v} for k,v in loc['timelines']['confirmed']['timeline'].items()]
        deaths = [{'Date': k, 'Deaths' :v} for k,v in loc['timelines']['deaths']['timeline'].items()]

        country_data = pd.merge(
            pd.DataFrame(deaths), 
            pd.DataFrame(cases), 
            left_on = 'Date', 
            right_on = 'Date')

        country_data['Country'] = 'United States' if loc['country'] == 'US' else loc['country']
        country_data['Country Code'] =  loc['country_code']
        country_data['Population'] =  loc['country_population']
        country_data['Province'] =  loc['province']
        country_data['Latitude'], country_data['Longitude'] =  [*loc['coordinates'].values()]
        country_data['Cases per 1M'] = (country_data['Cases'] /  country_data['Population']* 1000000).round(1)
        country_data['Deaths per 1M'] = (country_data['Deaths'] /  country_data['Population']* 1000000).round(1)
        country_data['Change in Deaths (n)'] = country_data['Deaths'].diff()
        country_data['Change in Deaths (pct)'] = country_data['Deaths'].pct_change().round(2)
        country_data['Multiple_Territories'] = country_data['Country'].isin(['China', 'Canada', 'United Kingdom', 'France', 'Australia', 'Netherlands', 'Denmark'])
        
        #Date-related Variables
        country_data['Date'] =  pd.to_datetime(country_data['Date'].str.slice(0,10)) # + " " + country_data['Date'].str.slice(11, -1)
        country_data['Month and Year'] = pd.DatetimeIndex(country_data['Date']).strftime("%b %Y")
        # Later joined to dates on first of each month to create data labels
        country_data['Day'] = pd.DatetimeIndex(country_data['Date']).strftime('%-d')
        
        data_rows.append(country_data)

    df_cases = pd.concat(data_rows, axis = 0)
    
    #Merge ISO-3 country codes for cholopleth mapping
    processed_data = pd.merge(df_cases, iso3, left_on = 'Country', right_on = 'Country', how = 'left')
    return processed_data

In [293]:
# Group by Country and Date, to sum metrics (cases, deaths, etc.)
# for countries with multiple provinces listed. This allows our graphs 
# to render country-level statistics

def get_chart_ready_df():
    df = process_raw_data()

    chart_ready_df = df.groupby(
                    ['Country', 
                    'Population', 
                    'Date', 
                    'ISO-3', 
                    'Multiple_Territories',
                    'Month and Year',
                    'Day']
                    ).agg({
                        'Deaths':'sum',
                        'Deaths per 1M':'sum',
                        'Cases': 'sum',
                        'Cases per 1M' : 'sum'
                        }).reset_index()

    chart_ready_df.to_csv('../chart_ready.csv')
    return chart_ready_df


# some_countries = ['Vietnam', 'Mexico', 'United Arab Emirates', 'Sweden']

#tomorrow, refactor map code, then refactor the data table

# #Examine temporal trend in death rate
# filtered = df[df['country'].isin(some_countries)]
# filtered.sort_values('date', inplace = True)
# filtered.reset_index(inplace = True)
# # filtered['month'] = pd.to_datetime(filtered['date']).dt.to_period('M')
# filtered['month'] = filtered.loc[:, 'date'].str.slice(5,7)
# filtered.columns
# dm = filtered[['country', 'deaths_per_1m', 'deaths', 'date']].set_index('country')
# # dm.plot(kind = 'line', 
# #                 x = 'date', 
# #                 y = 'deaths', 
# #                 figsize = (20,10))

# filtered[filtered['country']] == 'Italy'


In [294]:
get_chart_ready_df()

Unnamed: 0,Country,Population,Date,ISO-3,Multiple_Territories,Month and Year,Day,Deaths,Deaths per 1M,Cases,Cases per 1M
0,Afghanistan,37172386,2020-01-22,AFG,False,Jan 2020,22,0,0.0,0,0.0
1,Afghanistan,37172386,2020-01-23,AFG,False,Jan 2020,23,0,0.0,0,0.0
2,Afghanistan,37172386,2020-01-24,AFG,False,Jan 2020,24,0,0.0,0,0.0
3,Afghanistan,37172386,2020-01-25,AFG,False,Jan 2020,25,0,0.0,0,0.0
4,Afghanistan,37172386,2020-01-26,AFG,False,Jan 2020,26,0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
56605,Zimbabwe,14439018,2020-12-15,ZWE,False,Dec 2020,15,310,21.5,11522,798.0
56606,Zimbabwe,14439018,2020-12-16,ZWE,False,Dec 2020,16,313,21.7,11749,813.7
56607,Zimbabwe,14439018,2020-12-17,ZWE,False,Dec 2020,17,314,21.7,11866,821.8
56608,Zimbabwe,14439018,2020-12-18,ZWE,False,Dec 2020,18,316,21.9,12047,834.3


In [None]:
notable = ['United States',  
           'Germany', 
           'Switzerland', 
           'Sweden', 
           'Singapore', 
           'Vietnam', 
           'United Arab Emirates', 
           'Italy',
           'United Kingdom']
adjusted = create_line_chart(grouped,
                             notable,
                             'deaths_per_1m', 
                             'Population-Adjusted Cumulative COVID-19 Mortality Rate')

In [None]:
#Plotting using Plotly


# adjusted = create_line_chart(grouped, some_countries, 'deaths', 'Crude Mortality in Top 25 Countries by Per Capita GDP' )
# unadjusted = create_line_chart(grouped, top_25, 'adjusted_deaths_per_1m', 'Population- Adjusted Mortality Rate in Top 25 Countries, by Per Capita GDP' )
# adjusted_ylog = create_line_chart(grouped, top_25, 'deaths', 'Crude Mortality in Top 25 Countries by Per Capita GDP', True)
# unadjusted_ylog = create_line_chart(grouped, top_25, 'adjusted_deaths_per_1m', 'Population- Adjusted Mortality Rate in Top 25 Countries, by Per Capita GDP', True )



In [280]:
#Code for looking at Province-level data on a given day
# df = process_raw_data()
# terr_counts = df.loc[df['Date'] == '2020-11-15', ['Country', 'Province', 'Cases']].reset_index(drop = True)
# individual_pops = terr_counts.loc[terr_counts['Country'].isin(('China', 'Canada', 'United Kingdom', 'France', 'Australia', 'Netherlands', 'Denmark'))]
# individual_pops.sort_values(['Country', 'Province', 'Cases'])