## Set up


In [21]:
!wget https://raw.githubusercontent.com/jajsmith/cannpi-cmaj/develop/data/npi_canada_apr19.csv

--2020-06-15 02:49:04--  https://raw.githubusercontent.com/jajsmith/cannpi-cmaj/develop/data/npi_canada_apr19.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9503275 (9.1M) [text/plain]
Saving to: ‘npi_canada_apr19.csv.1’


2020-06-15 02:49:05 (37.6 MB/s) - ‘npi_canada_apr19.csv.1’ saved [9503275/9503275]



### Load Data

In [22]:
import pandas as pd
from datetime import datetime, date, timedelta
import numpy as np

# get NPI data
npis_csv = "/content/npi_canada_apr19.csv"
raw_data = pd.read_csv(npis_csv)

# remove any rows that don't have a start_date, region, or intervention_category
df = raw_data.dropna(how='any', subset=['start_date', 'region', 'intervention_category'])
df['region'] = df['region'].replace('Newfoundland', 'Newfoundland and Labrador')
num_rows_removed = len(raw_data)-len(df)
print("Number of rows removed: {}".format(num_rows_removed))

# get all regions
regions = list(set(df.region.values))
print("Number of unique regions: {}".format(len(regions)))

# get all intervention categories
num_cats = list(set(df.intervention_category.values))
num_interventions = len(num_cats)
print("Number of unique intervention categories: {}".format(len(num_cats)))

Number of rows removed: 60
Number of unique regions: 14
Number of unique intervention categories: 62


### Grab only Public event restrictions


In [0]:
# per = public event restrictions
per_cats = ["Public event size restriction (<=250)", "Public event size restriction (<=50)", "Public event size restriction (<=25)", "Public event size restriction (<=10)", "Public event size restriction (<=5)", "Public event size restriction (<=2)"]
data = []
coi = ["start_date", "region", "subregion","intervention_category"]
# grabbing only public event restrictions and columns in coi
for index, row in df.iterrows():
  if row.intervention_category in per_cats and str(row.subregion) != 'nan':
    l = []
    for col in coi:
      l.append(row[col])
    data.append(l)

df_per_only = pd.DataFrame(data, columns=coi)
# sorting by start_date
df_per_only.sort_values("start_date", inplace = True) 
# dropping ALL duplicate values 
df_per_only.drop_duplicates(subset =["start_date", "region", "subregion", "intervention_category"], 
                     keep = False, inplace = True) 

# add max_event_size
df_per_only["max_event_size"] = 0
for index, row in df_per_only.iterrows():
  extract_size = int(row.intervention_category.strip("Public event size restriction (<=").strip(")"))
  df_per_only.at[index, "max_event_size"] = extract_size

# output the table to csv
df_per_only.to_csv('figure5_table.csv')

## Visualize 

In [24]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib
from plotly.offline import init_notebook_mode, plot_mpl
import matplotlib.pyplot as plt

def visualize(full_df, per_only_df):
    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    # add vertical lines
    fig.add_trace(go.Scatter(
      x=['2020-04-07', '2020-04-07',  '2020-04-07'],
      y=[255, 55, 15, 5],
      text=["Max = 250 people",
            "Max = 50 people",
            "Max = 10 people"],
            mode="text",
    ))

    # assign each subregion a color
    palette_colors = ["#8B2423", "#BA5441", "#D19AA9", "#DBBAF9", "#D5B74C", "#CACD5D", "#E1E79F", "#74CCC9", "#117A72", "#365434", "#0C2A66", "#664CA5", "#5552BD", "#9B80A3", "#58303E", "#9B417B"]
    i = 0
    colors = {}
    for sr in np.unique(per_only_df["subregion"].values):
      colors[sr] = palette_colors[i]
      i+=1 
    
    # add jitter to fix overlapping points
    i = 0
    prev_visited = []
    JITTER_NUM = 5
    for sr in np.unique(per_only_df["subregion"].values):
      subregion_only = per_only_df.loc[per_only_df['subregion'] == sr]
      # add jitter
      for sindex, srow in subregion_only.iterrows():
        x = srow['start_date']
        y = float(srow['max_event_size'])
        # if already seen, add to y value
        while (x,y) in prev_visited:
          y += JITTER_NUM
        subregion_only.loc[sindex, 'max_event_size'] = y
        prev_visited.append((x,y))

      # plot!
      fig.add_trace(go.Scatter( 
        x=subregion_only['start_date'],
        y=subregion_only['max_event_size'],
        mode="markers+lines",
        name=sr,
        #hovertemplate ='<i>'+sr + '</i>',
        line = dict(shape="hv"),
        marker=dict(
            #color=colors[sr],
            #symbol = symbols[0],
            color=colors[sr],
            size=15,
            opacity=0.85,
            line=dict(
                #olor=colors[sr],
                width=2
            )
          ),
        ),
        secondary_y=False
      )
      i+=1

    fig.update_layout(autosize=False, width=1200, height=700,
        titlefont_size=20,
        xaxis_tickfont_size=14,
        yaxis=dict(
            title='Public event size restriction <br> (Max. number of people)',
            titlefont_size=16,
            tickfont_size=14,
        ),
        xaxis=dict(
            title='Date',
            titlefont_size=16,
            tickfont_size=14,
        ),
        template='plotly_white',
       shapes=[
        dict(
          type= 'line',
          opacity=0.5,
          yref= 'y1', y0= 250, y1= 250,
          xref= 'x1', x0= "2020-03-11", x1= "2020-04-09",
          line=dict(
                width=1.5,
                color="#CCCCCC"
            )
        ),
         dict(
          type= 'line',
          yref= 'y1', y0= 50, y1= 50,
          xref= 'x1', x0= "2020-03-11", x1= "2020-04-09",
          opacity=0.5,
          line=dict(
                width=1.5,
                color="#CCCCCC"
            )
         ),
         dict(
          type= 'line',
          yref= 'y1', y0= 10, y1= 10,
          xref= 'x1', x0= "2020-03-11", x1= "2020-04-09",
          opacity=0.5,
          line=dict(
                width=1.5,
                color="#CCCCCC"
            )
         ),
        ]
    )

    fig.show()
visualize(df, df_per_only)