### Primary objectives:
1. Score / weigh effectiveness of each intervention for various countries using a weighted combination of scoring methods
2. Assign a daily aggregated intervention score for each country using the calculated intervention weights - these scores will be used for case count projection

In [None]:
import sys
sys.path.insert(1, 'src')
import config
import intervention_effectiveness_scorer as intv_scorer

import warnings
warnings.simplefilter('ignore')
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import time
import urllib.request
import os
from datetime import datetime, timedelta
from matplotlib import pyplot

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Fetch latest intervention data for countries

We used the publicly available data from https://oxcgrtportal.azurewebsites.net/api/CSVDownload for our experiments. But feel free to use any other more granular data following a similar structure.

In [None]:
# Set this flag to True if you want to download the latest COVID19 intervention data from respective web source
# Set it as False in case of subsequent runs on the same day.
LOAD_LATEST_DATA = True


In [None]:
if LOAD_LATEST_DATA:
    url = config.oxcgrt_intervention_data_online
    local_file = os.path.join(config.base_data_dir, config.oxcgrt_intervention_data_offline)
    try:
        with urllib.request.urlopen(url) as response, open(local_file, 'wb') as out_file:
            data = response.read() # a `bytes` object
            out_file.write(data)
            print ('Downloaded latest data from: {}'.format(url))
    except Exception as e:
        print ('Error while downloading {}: {}'.format(url, e.__class__))    

### Derive the Effectiveness Score for different interventions 

In [None]:
intv_scorer.data_src = os.path.join(config.base_data_dir, config.oxcgrt_intervention_data_offline)
#Select a country only if it has exceeded the conf_cases_threshold
intv_scorer.conf_cases_threshold = 10000
#Select records having confirmed cases >= min_case_threshold
intv_scorer.min_case_threshold = 0
#window for rollong averages of conf case counts
intv_scorer.smoothing_window_len = 3
#number of lags to use for time-series style modeling of conf cases
intv_scorer.num_lags = 1
#Skip a few recent dayes data for potential missing values
intv_scorer.recent_days_to_skip = 5 
#median incubation period for Covid19
intv_scorer.incubation_period = 5

fit_stringency_index = 0.5
fit_conf_cases = 0.5
fit_intv_effect = 0.0
intv_scorer.intervention_scoring_methods = {'fit_stringency_index':fit_stringency_index, 
                                'fit_conf_cases':fit_conf_cases, 
                                'fit_intv_effect':fit_intv_effect}

#Export location of intervention scores
analysis_outcome_export_loc = os.path.join(config.base_data_dir, config.intervention_impacts_loc)
#Export location of weighted & aggregated intervention scores
aggregated_intervention_scores_export_loc = os.path.join(config.base_data_dir, config.intervention_scores_loc)

### Score relative weights of individual interventions for each country

In [None]:
#selected_countries = ['IND', 'USA', 'GBR', 'ITA', 'JPN', 'SGP', 'NLD', 'ISR', 'BEL', 'BRA', 'DEU', 'CUB', 'ESP', 'MEX', 'MYS', 'PHL', 'HUN', 'ZAF']
selected_countries = None
# Calculating relative weights/importance of different interventions
data_all, selected_countries, all_country_intv_scores = intv_scorer.score_interventions (selected_countries=None)
all_country_intv_scores.to_csv(analysis_outcome_export_loc, index=False)


In [None]:
interventions = all_country_intv_scores['intervention'].unique().tolist()
relevant_cols = ['CountryName', 'CountryCode', 'ConfirmedCases', 'ConfirmedDeaths', 'StringencyIndex'] + interventions

data_filtered = data_all.loc[data_all['CountryCode'].isin(selected_countries), relevant_cols].copy()
data_filtered.reset_index(inplace=True)
data_filtered.fillna(0, inplace=True)
# Assign an aggregated intervention score for each country, each day
data_filtered = intv_scorer.assign_weighted_aggregations (data_filtered, all_country_intv_scores, selected_countries)

print ('* '*50 + '\nComparing the Stringency Index of the data provider (OxCGRT) and our calculated Aggregated Intervention Scores:')
display (data_filtered.groupby('CountryName')[['StringencyIndex', 'aggr_weighted_intv_norm']].aggregate(['mean', 'median', 'std']).sort_values(by=('StringencyIndex', 'median'), ascending=False).style.background_gradient(cmap='PuBu'))
data_filtered.to_csv(aggregated_intervention_scores_export_loc)
