In [1]:
import pandas as pd 
import seaborn as sb
import numpy as np
import plotly
import sklearn as sk
import sqlite3
import matplotlib.pyplot as plt
import plotly.offline as offline
import plotly.graph_objs as go 

from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True) 

%matplotlib inline

1. Build a dataset

In [2]:
# hde school info
hde_school = pd.read_csv("HDE/global-school-closures-covid-19.csv")

# John Hopkins data
jh_cases = pd.read_csv("johns_hopkins_csse/2019-novel-coronavirus-covid-19-2019-ncov-data-repository-confirmed-cases.csv")

jh_deaths = pd.read_csv("johns_hopkins_csse/2019-novel-coronavirus-covid-19-2019-ncov-data-repository-deaths.csv")

jh_rec = pd.read_csv("johns_hopkins_csse/2019-novel-coronavirus-covid-19-2019-ncov-data-repository-recovered.csv")

In [3]:
# making John Hopkins data ready for merge
jh_cases['DateTime'] = pd.to_datetime(jh_cases['date'])
jh_deaths['DateTime'] = pd.to_datetime(jh_deaths['date'])
jh_rec['DateTime'] = pd.to_datetime(jh_rec['date'])

jh_cases.rename(columns={"country_region":"country"},inplace=True)
jh_deaths.rename(columns={"country_region":"country"},inplace=True)
jh_rec.rename(columns={"country_region":"country"},inplace=True)

# joining john hopkins data
john_hop = pd.merge(jh_cases, jh_deaths,on=["country","DateTime","province_state"])
john_hop.drop(columns=["date_x","date_y","lat_x","long_x"])
jh_all = pd.merge(john_hop, jh_rec,how="left",on=["country","DateTime","province_state"])
jh_all.drop(columns=["date_x","date_y","lat_x","long_x"])



Unnamed: 0,province_state,country,confirmed,DateTime,lat_y,long_y,deaths,lat,long,date,recovered
0,,Afghanistan,0,2020-01-22,33,65,0,33.0,65.0,2020-01-22,0.0
1,,Afghanistan,0,2020-01-23,33,65,0,33.0,65.0,2020-01-23,0.0
2,,Afghanistan,0,2020-01-24,33,65,0,33.0,65.0,2020-01-24,0.0
3,,Afghanistan,0,2020-01-25,33,65,0,33.0,65.0,2020-01-25,0.0
4,,Afghanistan,0,2020-01-26,33,65,0,33.0,65.0,2020-01-26,0.0
...,...,...,...,...,...,...,...,...,...,...,...
17915,,Sierra Leone,0,2020-03-27,8,-11,0,8.0,-11.0,2020-03-27,0.0
17916,,Sierra Leone,0,2020-03-28,8,-11,0,8.0,-11.0,2020-03-28,0.0
17917,,Sierra Leone,0,2020-03-29,8,-11,0,8.0,-11.0,2020-03-29,0.0
17918,,Sierra Leone,0,2020-03-30,8,-11,0,8.0,-11.0,2020-03-30,0.0


In [4]:
# HDE School info has country names in a column, but where different regions exist, these follow after a comma
#the following steps ensure a single consistent country field so that school info can be merged to John Hopkins data

def country(x):
    for i in jh_all["country"]:
        if i in x:
            return i

hde_school["country_1"] = hde_school["country"].apply(lambda x: country(x))

hde_school.drop_duplicates(subset=["country_1","date"],inplace=True)

hde_school.drop(columns="country",inplace=True)

hde_school.rename(columns={"country_1":"country"},inplace=True)

hde_school['DateTime'] = pd.to_datetime(hde_school['date'])

In [5]:
hde_school.sort_values(by=["country","DateTime"]).drop_duplicates(subset="country",keep="first",inplace=True)

In [6]:
# sql merge used so that school info can be merged for every date after the school closure
conn = sqlite3.connect(':memory:')

hde_school.to_sql("hde_school",conn,index=False)
jh_all.to_sql("john_hop",conn,index=False)

qry = '''
    select  
        john_hop.*,
        hde_school.scale        
    from
        john_hop left join hde_school on
        hde_school.DateTime <= john_hop.DateTime and 
        hde_school.country = john_hop.country
    '''

joint = pd.read_sql(qry,conn)
joint.shape

(83740, 16)

In [7]:
# Need to dedup by date, province and country
joint.drop_duplicates(subset=["province_state","country","DateTime"],inplace=True)

joint["scale"].fillna("None",inplace=True)



In [8]:
# Where no school closures - the value is set to none
def schools(x):
    if x in ["Localized","National"]:
        return 1
    else:
        return 0
    
joint["schools"] = joint["scale"].apply(lambda x: schools(x))

joint["deathRate"] = joint["deaths"] / joint["confirmed"]
# Fill in where division by 0 causes null
joint["deathRate"].fillna(0,inplace=True)


In [10]:
# Creating the a list of graphical data
data_slider = []

for day in joint["date_x"].unique():
    
    samp = joint[joint["date_x"] == day]
    
    for col in samp.columns:  # I transform the columns into string type so I can:
        samp[col] = samp[col].astype(str)
        
    samp["text"] = samp["country"] + "Cases: " + samp["confirmed"] + " Deaths:" + samp["deaths"] + " School Closures: " + samp["scale"] + " Recovered: " +samp["recovered"]
                    
    
    data_one_year = dict(
            type='choropleth', # type of map-plot
            colorscale = "Reds",
            reversescale = True,
            locations = samp['country'], # the column with the country
            locationmode = "country names",
            z = samp['deathRate'].astype(float)*100, # the variable I want to color-code
            text = samp['text'], # hover text
            marker = dict(     # for the lines separating states
                        line = dict (
                                  color = 'rgb(255,255,255)', 
                                  width = 2) ),               
            colorbar = dict(
                        title = "Virus Death Rate")
            ) 
       
    
    data_slider.append(data_one_year)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [11]:
# Creating a slider based on the date
steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Date {}'.format(joint["date_x"].unique()[i])) # label to be displayed for each step (year)
    step['args'][1][i] = True
    steps.append(step)
joint["date_x"].unique()

##  I create the 'sliders' object from the 'steps' 

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]  
    

In [12]:
layout = dict(geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'),
        sliders=sliders
        )

In [13]:
fig = go.Figure(data=data_slider, layout=layout)

# fig = go.Figure( data=data, layout=layout)
plot(fig,validate=False)

'temp-plot.html'