# DRUMS - COVASIM Data Generation
***

#### Setup
Comment out the file you won't be using.

In [1]:
import sys
import joblib
sys.path.append('../../')
# from drums_data_gen import *
from drums_data_gen_multi import *
import Modules.Loaders.DataFormatter as DF
from Modules.Utils.Imports import *

Covasim 3.1.3 (2022-07-19) — © 2020-2022 by IDM


  from .autonotebook import tqdm as notebook_tqdm


- `drums_gata_gen` generates data for one simulation.
- `drums_data_gen_multi` generates data for multiple simulations and stores each simulation in a list.

In [2]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))
def to_torch(ndarray):
    arr = torch.tensor(ndarray, dtype=torch.float)
    arr.requires_grad_(True)
    arr = arr.to(device)
    return arr

def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


***
#### Set Parameters and generate data

- `population`: Integer value that corresponds to the number of agents in the simulation.
- `test_prob`: This is a float value that corresponds to the probability of testing given the agent is symptomatic. Currently, the values for proabbility of testing for other scenarios are `asymp_prob=0.001`, `symp_quar_prob=0.3`, and `asymp_quar_prob=0.3`. Additionally, the policy for quarantining is `quar_policy='daily'`
- `trace_lb` and `trace_ub`: These float values correspond to the lower and upper bounds on probability of tracing. Currently, the trace probability for the layers are `h=1.0`, `s=0.5`, `w=0.5`, `c=0.3`.
- `chi_type`: The type of the function that interacts with the tracing probability. Can choose between `constant`, `linear`, `piecewise`, and `sin`.
- `keep_d`: Boolean value that indicates wehter or not to include the D - diagnosed compartments.
- `dynamic`: Boolean value that indicates whether or not to include the interacting term/function.
- `masking`: Integer value that indicates the masking intervention to include.
  - `0`: No masking
  - `1`: Threshold masking intervention logistic function.
  - `2`: Uniform masking with no threshold logistic function.
  - `3`: Normally distributed $\beta_1$ coefficient logistic function.
- `multiple`: Boolean value that indicates whether or not to run multiple simulations.
- `parallelb`: Boolean value indicating whether or not to run in parallel.
- `n_runs`: Integer value indicating the number of simulations to run if `multiple==True`.
- `model_params:` Dictionary containing keys pointing to parameter values, the compartment model's values over the simulation, and more.

In [3]:
population = int(50e3)
test_prob = 0.1
trace_lb = 0.0
trace_ub = 0.3
chi_type = 'piecewise'
keep_d = True
dynamic = True
masking = 1
multiple = True
parallelb = False
n_runs = 2
model_params = ModelParams(population=int(50e3), 
                 test_prob=test_prob, 
                 trace_lb=trace_lb, 
                 trace_ub=trace_ub, 
                 chi_type=chi_type, 
                 keep_d=keep_d, 
                 dynamic=dynamic,
                 masking=masking,
                 parallel=parallelb)

Choose whether to use `drums_data_generator` or `drums_data_generator_multi`

In [4]:
# drums_data_generator(model_params)
drums_data_generator_multi(model_params, n_runs)

Initializing sim with 50000 people for 182 days
  Running "Sim 0": 2020-02-01 ( 0/182) (0.50 s)  ———————————————————— 1%
  Running "Sim 0": 2020-02-11 (10/182) (3.08 s)  •——————————————————— 6%
  Running "Sim 0": 2020-02-21 (20/182) (5.86 s)  ••—————————————————— 11%
  Running "Sim 0": 2020-03-02 (30/182) (8.66 s)  •••————————————————— 17%
  Running "Sim 0": 2020-03-12 (40/182) (11.36 s)  ••••———————————————— 22%
  Running "Sim 0": 2020-03-22 (50/182) (14.23 s)  •••••——————————————— 28%
  Running "Sim 0": 2020-04-01 (60/182) (17.23 s)  ••••••—————————————— 33%
  Running "Sim 0": 2020-04-11 (70/182) (20.53 s)  •••••••————————————— 39%
  Running "Sim 0": 2020-04-21 (80/182) (23.42 s)  ••••••••———————————— 44%
  Running "Sim 0": 2020-05-01 (90/182) (26.36 s)  •••••••••——————————— 50%
  Running "Sim 0": 2020-05-11 (100/182) (29.30 s)  •••••••••••————————— 55%
  Running "Sim 0": 2020-05-21 (110/182) (32.14 s)  ••••••••••••———————— 61%
  Running "Sim 0": 2020-05-31 (120/182) (35.00 s)  •••••

In [None]:
path = '../../Data/covasim_data/drums_data/'
retrain = False
case_name = get_case_name(population, test_prob, trace_ub, keep_d, dynamic=dynamic, chi_type=chi_type)

if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingthresh'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'
        
if multiple:
    case_name = case_name + '_' + str(n_runs)
    params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, case_name, plot=False)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, case_name, plot=False)
    
# Example: 
# If the data generated has 50,000 agents, test_prob of 0.1, trace_ub of 0.3, is dynamic, includes diagnosed, chi type is piecewise, 
# includes adaptive masking, and we ran 200 simulations, 
# then the case_name variable would be equal to '50000_dynamic_piecewise_0.1_0.3_masking_200'

In [None]:
if multiple:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
    avg_masking = np.mean(params['avg_masking'], axis=0)
    avg_masking = (avg_masking / params['population'])
else:
    data = params['data']
    data = (data / params['population']).to_numpy()
    avg_masking = params['avg_masking']
    avg_masking = (avg_masking / params['population'])

params.pop('data')
N = len(data)
t = np.arange(N)[:,None]

In [None]:
# rescale data values to represent number of people in state X rather than ratios
data *= population

plot_comps=True
plot_masks=True
if plot_comps:
    n = data.shape[1]
    col_names = list('STEAYDQRF') if keep_d else list('STEAYQRF')
    # plot compartments
    fig = plt.figure(figsize=(15, 15))
    
    if multiple:
        for i in range(1, n + 1):
            ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
            ax.plot(t, data[:, i - 1], '.k', label='Covasim Data')
            ax.set_title(col_names[i - 1])
            ax.legend(fontsize=8)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            plt.tight_layout(pad=2)
            plt.savefig(os.path.join(path, case_name + '_avg' + '.png') )
            # plt.show()
    else:
        for i in range(1, n + 1):
            ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
            ax.plot(t, data[:, i - 1], '.k', label='Covasim Data')
            ax.set_title(col_names[i - 1])
            ax.legend(fontsize=8)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            plt.tight_layout(pad=2)
            plt.savefig(os.path.join(path, case_name + '_single' + '.png') )
            # plt.show()
            
    plt.close()

avg_masking *= population

plot_masks=True
if plot_masks and masking > 0:
    fig = plt.figure(figsize=(10, 10))
    if multiple:
        plt.plot(t, avg_masking, '.k', label='Covasim Data')
        plt.title('Average Number of Agents Masking over Time (days)', fontsize=16)
        plt.legend(fontsize=14)
        plt.tight_layout(pad=4)
        plt.savefig(os.path.join(path, case_name + '_avgmasking' + '_avg' + '.png') )
        plt.show()
    else:
        plt.plot(t, avg_masking, '.k', label='Covasim Data')
        plt.title('Average Number of Agents Masking over Time (days)', fontsize=16)
        plt.legend(fontsize=14)
        plt.tight_layout(pad=4)
        plt.savefig(os.path.join(path, case_name + '_avgmasking' + '_single' + '.png') )
        plt.show()

## Running multiple simulations in parallel
- **Note**: The `final_data` matrix is normalized before storage. Therefore, when loading data generated with the parameter `parallel=True`, you must not normalize it. When loading data generated with the parameter `parallel=False`, you must normalize it.
- `num_batches`: Integer value indicating the number of batches of parallel simulating to run.
- `batch_size`: Integer value indiciating the size of each batch. This is the number of runs (`n_runs=batch_size`) that covasim is runnning under the `MultiSim` object over each iteration. Must not exceed 32.

In [3]:
population = int(500e3)
test_prob = 0.1
trace_lb = 0.0
trace_ub = 0.3
chi_type = 'piecewise'
keep_d = True
dynamic = True
masking = 1
parallelb = True
num_batches = 64
batch_size = 32
model_params = ModelParams(population, 
                           test_prob, 
                           trace_lb, 
                           trace_ub, 
                           chi_type, 
                           keep_d, 
                           dynamic, 
                           masking, 
                           parallel=parallelb,
                           batches=num_batches)

In [4]:
if num_batches<=0:
    raise Exception(f"`batches` must be a positive integer. Instead, the number of batches passed was: {model_params.batches}")

In [None]:
total_runs = num_batches * batch_size

for i in range(num_batches):
    drums_data_generator_multi(model_params, batch_size)
    
    # initilize file name, path, and final data array if first iteration
    if i==0:
        path = '../../Data/covasim_data/drums_data/'
        case_name = get_case_name(population, test_prob, trace_ub, keep_d, dynamic=dynamic, chi_type=chi_type)

        if not masking==0:
            if masking==1:
                case_name = case_name + '_maskingthresh'
            elif masking==2:
                case_name = case_name + '_maskinguni'
            elif masking==3:
                case_name = case_name + '_maskingnorm'

        batch_case_name = case_name + '_' + str(batch_size)
        params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, batch_case_name, plot=False)
    
        data = np.mean(params['data'], axis=0) # data of shape [num_days, 9]
        data = (data / params['population'])
        final_data = data.copy()[:,:,None]
        avg_masking = np.mean(params['avg_masking'], axis=0)
        avg_masking = (avg_masking / params['population'])
        final_am = avg_masking.copy()[:,None]
        
    else:
        params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, batch_case_name, plot=False)
        
        data = np.mean(params['data'], axis=0) # data of shape [num_days, 9]
        data = (data / params['population'])
        final_data = np.concatenate((final_data, data[:,:,None]), axis=2)
        
        avg_masking = np.mean(params['avg_masking'], axis=0)
        avg_masking = (avg_masking / params['population'])
        final_am = np.concatenate((final_am, avg_masking[:,None]), axis=1)
        
final_data = np.mean(final_data, axis=2)
params['data'] = final_data.copy()

final_am = np.mean(final_am, axis=1)
params['avg_masking'] = final_am.copy()

fig_name = case_name + '_' + str(total_runs)
file_name = 'covasim_'+ fig_name
file_name += '.joblib'

file_path = '../../Data/covasim_data/drums_data'

joblib.dump(params, os.path.join(file_path, file_name), compress=True)

N = final_data.shape[0]
t = np.arange(N)[:,None]

In [None]:
# rescale data values to represent number of people in state X rather than ratios (make sure not to run more than once)
final_data *= population

plot_comps=True
plot_masks=True
if plot_comps:
    n = data.shape[1]
    col_names = list('STEAYDQRF') if keep_d else list('STEAYQRF')
    # plot compartments
    fig = plt.figure(figsize=(15, 15))
    for i in range(1, n + 1):
        ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
        ax.plot(t, final_data[:, i - 1], '.k', label='Covasim Data')
        ax.set_title(col_names[i - 1])
        ax.legend(fontsize=8)
        fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
        plt.tight_layout(pad=2)
        plt.savefig(os.path.join(path, case_name + '_' + str(total_runs) + '_avg' + '.png') )
    plt.close()

# rescale avg_masking values to represent average number of people people masking
avg_masking *= population

if plot_masks and masking > 0:
    fig = plt.figure(figsize=(10, 10))
    plt.plot(t, avg_masking, '.k', label='Covasim Data')
    plt.title('Averaged Number of Agents Masking over Time (days)', fontsize=16)
    plt.legend(fontsize=14)
    plt.tight_layout(pad=4)
    plt.savefig(os.path.join(path, case_name + '_' + str(total_runs) + '_avgmasking_avg' + '.png') )
    plt.show()