- development notebook shows that we should expect low R values but significant results

- more description here

- Run this to generate the data (mac or linux). Run it from the terminal if you're on windows

In [1]:
#!python scripts/generate_dataset_for_regression.py

# 0. Imports

In [2]:
from covid_project.regression_funcs import fit_ols_model_single_policy
from covid_project.data_utils import get_all_policies, get_processed_data
from covid_project.policy_mappings import policy_dict_v1
from tqdm.notebook import tqdm
import os
import json

# 1. Run models

In [3]:
all_bins = [
        [(0, 14), (15, 999)],
        [(0, 14), (15, 28), (29, 999)],
        [(0, 7), (8, 14), (15, 999)],
        [(0, 7), (8, 14), (15, 28), (29, 60), (61, 999)]
    ]

all_policies = get_all_policies(policy_dict = policy_dict_v1,
                                min_samples = 3)

dep_vars = [
    'new_cases_1e6',
    'new_deaths_1e6',
    'new_cases_7day_1e6',
    'new_deaths_7day_1e6',
]

In [8]:
def run_model_on_policies(bins,
                          all_policies,
                          dep_var,
                          pbar=True):
    """Loop to run the regression model on all policies"""
    
    results = dict()
    for policy in tqdm(all_policies, desc='running models'):
        suc, data = get_processed_data(policy, bins)
        if not suc:
            print(f"[ERROR] data read failed: bins={bins}, policy={policy}, var={dep_var}")
            continue
        res = fit_ols_model_single_policy(data,
                                          policy,
                                          dep_var,
                                          True)
        results[policy] = res
    return results

In [9]:
def run_batch_of_models(all_bins,
                        all_policies,
                        dep_vars,
                        save_path="./data/regression_results/"):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
        
    for bins_list in tqdm(all_bins, desc="looping through bins"):
        for var in tqdm(dep_vars, desc="looping through dependent variables"):
            results = run_model_on_policies(bins=bins_list,
                                            all_policies=all_policies,
                                            dep_var=var,
                                            pbar=True)
            filename = var + "_bins=" + ''.join([str(b[0])+"-"+str(b[1])+"_" for b in bins_list])[:-1] + ".json"
            full_path = save_path + filename
            
            with open(full_path, "w") as f:
                json.dump(results, f, indent=2)

In [None]:
run_batch_of_models(all_bins=all_bins,
                    all_policies=all_policies,
                    dep_vars=dep_vars,)

looping through bins:   0%|          | 0/4 [00:00<?, ?it/s]

looping through dependent variables:   0%|          | 0/4 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

looping through dependent variables:   0%|          | 0/4 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]

running models:   0%|          | 0/50 [00:00<?, ?it/s]