In [136]:
import altair as alt
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

alt.data_transformers.disable_max_rows()

def days_between(d1, d2):
    if (isinstance(d1, str) and isinstance(d2, str)):
        d1 = datetime.strptime(d1, "%m-%d-%Y")
        d2 = datetime.strptime(d2, "%m-%d-%Y")
        return int((d2 - d1).days)
    return None;

In [191]:
total_cases_df       = pd.read_csv("../91-DIVOC/pages/covid-visualization/jhu-data.csv",index_col=0)
empty_rows           = total_cases_df[(total_cases_df["Confirmed"]==0)&(total_cases_df["Recovered"]==0)&(total_cases_df["Active"]==0)&(total_cases_df["Deaths"]==0)].index
total_cases_df       = total_cases_df.drop(empty_rows, axis=0)

# Tabulate the top K countries with the highest total number of Confirmed cases
K              = 20  
topKcountries  = list(total_cases_df.groupby("Country_Region").sum()["Confirmed"].nlargest(20).index)
cases_df       = total_cases_df[total_cases_df["Country_Region"].isin(topKcountries)]

# Compute days since N = 50 confirmed cases; drop those with no N confirmed cases
N              = 50
days_since_N                   = cases_df[cases_df["Confirmed"]>N].groupby("Country_Region")["Date"].min().to_dict()
cases_df['Date N Confirmed']   = cases_df.apply  (lambda x: days_since_N.get(x['Country_Region']),axis=1)
cases_df                       = cases_df.dropna (subset=['Date N Confirmed'])
cases_df['Days_from_N']        = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date"]), axis = 1)
cases_df

Unnamed: 0,Country_Region,Province_State,Confirmed,Recovered,Active,Deaths,Date,Date N Confirmed,Days_from_N
34,United States,Washington,1.0,0.0,1.0,0.0,01-22-2020,03-03-2020,-41
3,China,,547.0,28.0,502.0,17.0,01-22-2020,01-22-2020,0
4,South Korea,,1.0,0.0,1.0,0.0,01-22-2020,02-20-2020,-29
34,United States,Washington,1.0,0.0,1.0,0.0,01-23-2020,03-03-2020,-40
4,Japan,,1.0,0.0,1.0,0.0,01-23-2020,02-19-2020,-27
...,...,...,...,...,...,...,...,...,...
43,Denmark,,1572.0,24.0,1524.0,24.0,03-23-2020,03-09-2020,14
85,South Korea,,8961.0,3166.0,5684.0,111.0,03-23-2020,02-20-2020,32
109,Netherlands,,4764.0,3.0,4547.0,214.0,03-23-2020,03-05-2020,18
146,Switzerland,,8795.0,131.0,8544.0,120.0,03-23-2020,03-03-2020,20


In [192]:
# Adding in information about lockdowns & possibly drop rows without lockdowns

quarantine_df   = pd.read_csv("quarantine-activity.csv",index_col=0).set_index('Country_Region')
full_lockdown   = quarantine_df[quarantine_df['Lockdown Type'] == 'Full'].groupby("Country_Region")['Date Enacted'].min().to_dict()

# Add date of full lockdown
cases_df['Date of Full Lockdown']     = cases_df.apply  (lambda x: full_lockdown.get(x['Country_Region']),axis=1)
cases_df['lockdown_day_from_N']    = cases_df.apply  (lambda x: days_between(x["Date N Confirmed"], x["Date of Full Lockdown"]), axis = 1)

# Drop rows for those without a full lockdown 
cases_df                              = cases_df.dropna (subset=['Date of Full Lockdown'])

cases_df

Unnamed: 0,Country_Region,Province_State,Confirmed,Recovered,Active,Deaths,Date,Date N Confirmed,Days_from_N,Date of Full Lockdown,lockdown_day_from_N
3,China,,547.0,28.0,502.0,17.0,01-22-2020,01-22-2020,0,1-23-2020,1.0
6,China,,639.0,30.0,591.0,18.0,01-23-2020,01-22-2020,1,1-23-2020,1.0
4,China,,916.0,36.0,854.0,26.0,01-24-2020,01-22-2020,2,1-23-2020,1.0
5,China,,1399.0,39.0,1318.0,42.0,01-25-2020,01-22-2020,3,1-23-2020,1.0
6,Malaysia,,3.0,0.0,3.0,0.0,01-25-2020,03-06-2020,-41,3-16-2020,10.0
...,...,...,...,...,...,...,...,...,...,...,...
125,Norway,,2383.0,1.0,2375.0,7.0,03-22-2020,03-04-2020,18,3-12-2020,8.0
175,United Kingdom,,5741.0,67.0,5392.0,282.0,03-22-2020,03-03-2020,19,3-23-2020,20.0
16,Belgium,,3743.0,401.0,3254.0,88.0,03-23-2020,03-06-2020,17,3-17-2020,11.0
33,China,,81496.0,72819.0,5403.0,3274.0,03-23-2020,01-22-2020,61,1-23-2020,1.0


In [199]:
# Logarithmic Axis (Stephen's prettier version)
chart_df = cases_df.loc[(cases_df.Days_from_N >= 0) * (cases_df.Days_from_N <= 32)]
for country in full_lockdown:
    if country not in days_since_N:
        continue
    val_to_insert = days_between(days_since_N[country], full_lockdown[country])
    # insert some dummy rows w/ Days_from_N == lockdown_day_from_N to get tooltip_rules w/ mouseover to work properly
    new_row = pd.Series({'Country_Region': country,
                         'lockdown_day_from_N': val_to_insert,
                         'Days_from_N': val_to_insert,
                        })
    chart_df = chart_df.append(new_row, ignore_index=True)


nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Days_from_N'], empty='none')

legend_selection = alt.selection_multi(fields=['Country_Region'], bind='legend')


shared_encodings = dict(
    x=alt.X("Days_from_N:Q", scale=alt.Scale(domain=(0,32)), title = "Days Since First 50 Confirmed"),
    y=alt.Y("Confirmed:Q", title="Total Confirmed Cases (Log)", scale=alt.Scale(type='log',domain=(100,100000))),
    color=alt.Color("Country_Region"),
    tooltip=["Country_Region"],
    opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.2)),
)

chart = alt.Chart(chart_df, width=1000, height=500)
lines = chart.mark_line(size=3).encode(
    **shared_encodings
).transform_filter(
    'datum.Confirmed !== null'
)
points = chart.mark_point(size=90, filled=True).encode(
    **shared_encodings
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = chart.mark_point().encode(
    x='Days_from_N:Q',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
tooltip_points = lines.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
tooltip_text = lines.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'tooltip_text:N', alt.value(' '))
).transform_calculate(
    tooltip_text='datum.Country_Region + ": " + datum.Confirmed'
)
# Draw a rule at the location of the selection
tooltip_rules = chart.mark_rule(color='gray').encode(
    x='Days_from_N:Q',
).transform_filter(
    nearest
)
lockdown_rules = chart.mark_rule(strokeDash=[7,3]).encode(
        x=alt.X("Days_from_N:Q"),
        color=alt.Color("Country_Region"),
        opacity=alt.condition(legend_selection, alt.value(1), alt.value(0.1)),
).transform_filter(
    'datum.Days_from_N == datum.lockdown_day_from_N'
)
lockdown_tooltip=lockdown_rules.mark_text(align='left', dx=5, dy=-220).encode(
    text=alt.condition(nearest, 'lockdown_tooltip_text:N', alt.value(' '))
).transform_calculate(
    lockdown_tooltip_text='datum.Country_Region + " locked down"'
)
alt.layer(
    lines,
    selectors,
    tooltip_points,
    tooltip_text,
    tooltip_rules,
    lockdown_rules,
    lockdown_tooltip,
    points
).add_selection(legend_selection)

In [149]:
chart_df['diff'] = (chart_df.Days_from_N - chart_df.lockdown_day_from_N).abs()
chart_df['diff'] = (chart_df.Days_from_N - chart_df.lockdown_day_from_N).abs()

In [150]:
chart_df.

Country_Region
Belgium           0.0
China             0.0
Denmark           0.0
France            1.0
Italy             0.0
Malaysia          0.0
Norway            2.0
Spain             0.0
United Kingdom    1.0
Name: diff, dtype: float64