This notebook is for the purpose of doing sensitivty analysis with our drums data using pat's code


In [2]:
# Imports
import numpy as np
import os
import sys
sys.path.append('../')
from Modules.Utils.DRUMS_Lasso import DRUMS_Lasso
from Modules.Utils.Imports import *
import Modules.Loaders.DataFormatter as DF

from Modules.Utils.Imports import *
from Modules.Models.BuildBINNs import BINNCovasim
from Modules.Utils.ModelWrapper import ModelWrapper
#from Notebooks.utils import utils
from jordan_scratch_work.utils import get_case_name
#from utils import get_case_name
import warnings
warnings.filterwarnings("ignore", message="Support of Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) enabled only processors has been deprecated.")



____________ following code copied from BINNCovasimEvaluation_dynamic.ipynb ________________________

In [4]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))
# helper functions
def to_torch(x):
    return torch.from_numpy(x).float().to(device)
def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


In [5]:
# instantiate BINN model parameters and path
path = '../Data/covasim_data/drums_data/'
# path = '../Data/covasim_data/xin_data/'

population = int(200e3)
test_prob = 0.1
trace_prob = 0.3
keep_d = True
retrain = False
dynamic = True
masking = 0
multiple = True
parallelb = True
n_runs = 1024
chi_type = 'piecewise'

case_name = get_case_name(population, test_prob, trace_prob, keep_d, dynamic=dynamic, chi_type=chi_type)
# yita_lb, yita_ub = 0.2, 0.4

In [6]:
if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingthresh'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'

if multiple:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name + '_' + str(n_runs), plot=False)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name, plot=False)

In [7]:
# split into train/val and convert to torch
# multiple==True and parallelb==False means that data is a list and not normalized
if multiple and not parallelb:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
# multiple==True and parallelb==True means that the data is a 2d array and normalized
elif multiple and parallelb:
    data = params['data']
# otherwise, the data is from a single simulation and is not normalized
else:
    data = params['data']
    data = (data / params['population']).to_numpy()

params.pop('data')

N = len(data)
t_max = N - 1
t = np.arange(N)[:,None]

tracing_array = params['tracing_array']

In [8]:
mydir = '../models/covasim/2023-07-12_11-00-45' # no masking, 200e3 pop, dynamic piecewise, keepd, 1024 avg., 50e3 epochs, lr=1e4

In [11]:
# instantiate BINN model
binn = BINNCovasim(params, t_max, tracing_array, keep_d=keep_d).to(device)
parameters = binn.parameters()
model = ModelWrapper(binn, None, None, save_name=os.path.join(mydir, case_name))


In [12]:
s_min, s_max = data[:,0].min(), data[:,0].max()
a_min, a_max = data[:,3].min(), data[:,3].max()
y_min, y_max = data[:,4].min(), data[:,4].max()
say_min, say_max = (data[:,0] + data[:,3] + data[:,4]).min(), (data[:,0] + data[:,3] + data[:,4]).max()
chi_min, chi_max = 0.0, params['eff_ub']
# grab value ranges
yita_lb, yita_ub = model.model.yita_lb, model.model.yita_ub
beta_lb, beta_ub = model.model.beta_lb, model.model.beta_ub
tau_lb, tau_ub = model.model.tau_lb, model.model.tau_ub

In [None]:
# learned contact_rate function
def contact_rate(u):
    res = binn.eta_func(to_torch(u)) # [:,[0,3,4]]
    return to_numpy(res)
# learned tracing rate function
def beta(u):
    res = binn.beta_func(to_torch(u))
    return to_numpy(res)
# learned diagnoses rate of quarantined individuals
def tau(u):
    res = binn.tau_func(to_torch(u))
    return to_numpy(res)


## **check me, does it matter that I have references to tau_func and beta_func here? Isn't that something we were trying to avoid?

In [13]:
def get_samples_ct(u):
    s, a, y =  u[:, 0][:, None], u[:, 1][:, None], u[:, 2][:, None]
    candidates = [s, s**2, a, y] # s related terms
    # candidates += [a]
    # candidates += [y]
    # candidates += [chi]
    candidates = np.concatenate(candidates, axis=1)
    return candidates

def get_samples_beta(u):
    drf, chi = u[:, 0][:, None], u[:, 1][:, None]
    candidates = [drf, chi] # , chi**2
    candidates = np.concatenate(candidates, axis=1)
    return candidates

def get_samples_tau(u):
    a, y = u[:, 0][:, None], u[:, 1][:, None]
    candidates = [a, y]
    candidates = np.concatenate(candidates, axis=1)
    return candidates



s_grid = np.linspace(s_min, s_max, 10)
a_grid = np.linspace(a_min, a_max, 10)
y_grid = np.linspace(y_min, y_max, 10)

#Eta -------------------------
train_x1 = np.array(np.meshgrid(s_grid, a_grid, y_grid)).T.reshape(-1,3)

data_x1 = get_samples_ct(train_x1)

data_y1 = contact_rate(train_x1)
data_y1 = data_y1[:,0][:, None]

eta_rhs_values = {
    'S' : train_x1[:,0],
    'A' : train_x1[:,1],
    'Y' : train_x1[:,2],
}

results_eta = DRUMS_Lasso(input_dict = eta_rhs_values, lhs_values = data_y1) 


print("Equation for Eta: " + str(results_eta['Equation']))
print("Eta MSE: " + str(results_eta["MSE"]))
#------------------------------



#Beta--------------------------


say_grid = np.linspace(0.6, 1.0, 10)
chi_grid = np.linspace(chi_min, chi_max, 10)

# Create meshgrid from the 1D arrays
SAY, XX = np.meshgrid(say_grid, chi_grid)   #SAY is the sum S + A + Y

# Reshape and combine the arrays
data_x2 = np.column_stack((SAY.ravel(), XX.ravel()))
data_beta = beta(data_x2)

beta_rhs_values = {
    'SAY' : data_x2[:,0],
    'X' : data_x2[:,1]
}

results_beta = DRUMS_Lasso(input_dict = beta_rhs_values, lhs_values = data_beta) 


print("Equation for Beta: " + str(results_beta['Equation']))
print("Beta MSE: " + str(results_beta["MSE"]))

#----------------------------------

#Tau-------------------------------


a_grid = np.linspace(a_min, a_max, 10)
y_grid = np.linspace(y_min, y_max, 10)
labels = ['A', 'Y']

AA, YY = np.meshgrid(a_grid, y_grid)
data_x3 = np.column_stack((AA.ravel(), YY.ravel()))
data_tau = tau(data_x3)
print("DATA-TAU")
print(np.shape(data_tau))
#******
#data_tau = data_tau[:,0].reshape(AA.shape)
data_tau = tau_lb + (tau_ub - tau_lb) * data_tau # scaling
data_tau = np.round(data_tau, decimals=4)




print(np.shape(data_tau))


term_names = ['A', 'Y']



tau_rhs_values = {
    'A' : data_x3[:,0],
    'Y' : data_x3[:,1],
}

results_tau = DRUMS_Lasso(input_dict = tau_rhs_values, lhs_values = data_tau) 


print("Equation for Tau: " + str(results_tau['Equation']))
print("Tau MSE: " + str(results_tau["MSE"]))
print(results_tau["Lasso"].coef_)
print(results_tau["Lasso"].intercept_)


NameError: name 'contact_rate' is not defined