# DRUMS - COVASIM Data Generation
***

#### Setup
Comment out the file you won't be using.

In [1]:
import sys
import joblib
sys.path.append('../../')
# from drums_data_gen import *
from drums_data_gen_multi import *
import Modules.Loaders.DataFormatter as DF
from Modules.Utils.Imports import *

Covasim 3.1.3 (2022-07-19) — © 2020-2022 by IDM


  from .autonotebook import tqdm as notebook_tqdm


- `drums_gata_gen` generates data for one simulation.
- `drums_data_gen_multi` generates data for multiple simulations and stores each simulation in a list.

In [2]:
def to_torch(ndarray):
    arr = torch.tensor(ndarray, dtype=torch.float)
    arr.requires_grad_(True)
    arr = arr.to(device)
    return arr

def to_numpy(x):
    return x.detach().cpu().numpy()

device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))

Device set to cpu


***
#### Set Parameters and generate data

- `population`: Integer value that corresponds to the number of agents in the simulation.
- `test_prob`: This is a float value that corresponds to the probability of testing given the agent is symptomatic. Currently, the values for proabbility of testing for other scenarios are `asymp_prob=0.001`, `symp_quar_prob=0.3`, and `asymp_quar_prob=0.3`. Additionally, the policy for quarantining is `quar_policy='daily'`
- `trace_lb` and `trace_ub`: These float values correspond to the lower and upper bounds on probability of tracing. Currently, the trace probability for the layers are `h=1.0`, `s=0.5`, `w=0.5`, `c=0.3`.
- `chi_type`: The type of the function that interacts with the tracing probability. Can choose between `constant`, `linear`, `piecewise`, and `sin`.
- `keep_d`: Boolean value that indicates wehter or not to include the D - diagnosed compartments.
- `dynamic`: Boolean value that indicates whether or not to include the interacting term/function.
- `masking`: Integer value that indicates the masking intervention to include.
  - `0`: No masking
  - `1`: Threshold masking intervention logistic function.
  - `2`: Uniform masking with no threshold logistic function.
  - `3`: Normally distributed $\beta_1$ coefficient logistic function.
- `multiple`: Boolean value that indicates whether or not to run multiple simulations.
- `parallelb`: Boolean value indicating whether or not to run in parallel.
- `n_runs`: Integer value indicating the number of simulations to run if `multiple==True`.
- `model_params:` Dictionary containing keys pointing to parameter values, the compartment model's values over the simulation, and more.

In [3]:
population = int(200e3)
test_prob = 0.1
trace_lb = 0.0
trace_ub = 0.3
chi_type = 'piecewise'
keep_d = True
dynamic = True
masking = 3
multiple = True
parallelb = False
n_runs = 32
model_params = ModelParams(population, test_prob, trace_lb, trace_ub, chi_type, keep_d, dynamic, masking, parallelb)

Choose whether to use `drums_data_generator` or `drums_data_generator_multi`

In [None]:
# drums_data_generator(model_params)
drums_data_generator_multi(model_params, n_runs)

In [13]:
path = '../Data/covasim_data/drums_data/'
retrain = False
case_name = get_case_name(population, test_prob, trace_ub, keep_d, dynamic=dynamic, chi_type=chi_type)

if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingthresh'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'
        
if multiple:
    case_name = case_name + '_' + str(n_runs)
    params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, case_name, plot=True)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, case_name, plot=True)
    
# Example: 
# If the data generated has 50,000 agents, test_prob of 0.1, trace_ub of 0.3, is dynamic, includes diagnosed, chi type is piecewise, 
# includes adaptive masking, and we ran 200 simulations, 
# then the case_name variable would be equal to '50000_dynamic_piecewise_0.1_0.3_masking_200'

In [14]:
if multiple:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
else:
    data = params['data']
    data = (data / params['population']).to_numpy()

params.pop('data')
N = len(data)
t = np.arange(N)[:,None]

In [15]:
# rescale data values to represent number of people in state X rather than ratios
data *= population

plot=True
if plot:
    n = data.shape[1]
    col_names = list('STEAYDQRF') if keep_d else list('STEAYQRF')
    # plot compartments
    fig = plt.figure(figsize=(15, 15))
    
    if multiple:
        for i in range(1, n + 1):
            ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
            ax.plot(t, data[:, i - 1], '.k', label='Covasim Data')
            ax.set_title(col_names[i - 1])
            ax.legend(fontsize=8)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            plt.tight_layout(pad=2)
            plt.savefig(os.path.join(path, case_name + '_avg' + '.png') )
            # plt.show()
    else:
        for i in range(1, n + 1):
            ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
            ax.plot(t, data[:, i - 1], '.k', label='Covasim Data')
            ax.set_title(col_names[i - 1])
            ax.legend(fontsize=8)
            fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
            plt.tight_layout(pad=2)
            plt.savefig(os.path.join(path, case_name + '_single' + '.png') )
            # plt.show()

## Running multiple simulations in parallel
- **Note**: The `final_data` matrix is normalized before storage. Therefore, when loading data generated with the parameter `parallel=True`, you must not normalize it. When loading data generated with the parameter `parallel=False`, you must normalize it.
- `num_batches`: Integer value indicating the number of batches of parallel simulating to run.
- `batch_size`: Integer value indiciating the size of each batch. This is the number of runs (`n_runs=batch_size`) that covasim is runnning under the `MultiSim` object over each iteration. Must not exceed 32.

In [4]:
population = int(200e3)
test_prob = 0.1
trace_lb = 0.0
trace_ub = 0.3
chi_type = 'piecewise'
keep_d = True
dynamic = True
masking = 0
parallelb = True
model_params = ModelParams(population, test_prob, trace_lb, trace_ub, chi_type, keep_d, dynamic, masking, parallel=parallelb)

In [None]:
num_batches = 32
batch_size = 32
n_runs = num_batches * batch_size

for i in range(num_batches):
    drums_data_generator_multi(model_params, batch_size)
    
    # initilize file name, path, and final data array if first iteration
    if i==0:
        path = '../../Data/covasim_data/drums_data/'
        case_name = get_case_name(population, test_prob, trace_ub, keep_d, dynamic=dynamic, chi_type=chi_type)

        if not masking==0:
            if masking==1:
                case_name = case_name + '_maskingthresh'
            elif masking==2:
                case_name = case_name + '_maskinguni'
            elif masking==3:
                case_name = case_name + '_maskingnorm'

        batch_case_name = case_name + '_' + str(batch_size)
        params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, batch_case_name, plot=False)
    
        data = np.mean(params['data'], axis=0) # data of shape [num_days, 9]
        data = (data / params['population'])
        final_data = data.copy()[:,:,None]
        
    else:
        params = DF.load_covasim_data(path, population, test_prob, trace_ub, keep_d, batch_case_name, plot=False)
        data = np.mean(params['data'], axis=0) # data of shape [num_days, 9]
        data = (data / params['population'])
        final_data = np.concatenate((final_data, data[:,:,None]), axis=2)
        
final_data = np.mean(final_data, axis=2)
params['data'] = final_data.copy()

fig_name = case_name + '_' + str(n_runs)
file_name = 'covasim_'+ fig_name
file_name += '.joblib'

file_path = '../../Data/covasim_data/drums_data'

joblib.dump(params, os.path.join(file_path, file_name), compress=True)

N = final_data.shape[0]
t = np.arange(N)[:,None]

In [None]:
# rescale data values to represent number of people in state X rather than ratios (make sure not to run more than once)
final_data *= population

plot=True
if plot:
    n = data.shape[1]
    col_names = list('STEAYDQRF') if keep_d else list('STEAYQRF')
    # plot compartments
    fig = plt.figure(figsize=(15, 15))
    
    for i in range(1, n + 1):
        ax = fig.add_subplot(int(np.ceil(n / 3)), 3, i)
        ax.plot(t, final_data[:, i - 1], '.k', label='Covasim Data')
        ax.set_title(col_names[i - 1])
        ax.legend(fontsize=8)
        fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
        plt.tight_layout(pad=2)
        plt.savefig(os.path.join(path, case_name + '_' + str(n_runs) + '_avg' + '.png') )