In [15]:
import sys
sys.path.append('../')
import joblib
from re import finditer

from Modules.Utils.Imports import *
from Modules.Utils.DRUMSLasso import *
from Modules.Utils.GetLowestGPU import *
import Modules.Loaders.DataFormatter as DF
from Modules.Models.BuildBINNs import AdaMaskBINNCovasim
from Modules.Models.BuildBINNs import chi
from Modules.Utils.ModelWrapper import ModelWrapper
from Modules.Utils.PruneEquation import PruneEquation

from Notebooks.utils import get_case_name

In [16]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))
# helper functions
def to_torch(x):
    return torch.from_numpy(x).float().to(device)
def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


In [17]:
# instantiate BINN model parameters and path
path = '../Data/covasim_data/drums_data/'

population = int(500e3)
test_prob = 0.1
trace_prob = 0.3
keep_d = True
retrain = False
dynamic = True
masking = 1
multiple = True
parallelb = True
n_runs = 64
chi_type = 'piecewise'

# model parameters
maskb = True
masking_learned = False

case_name = get_case_name(population, test_prob, trace_prob, keep_d, dynamic=dynamic, chi_type=chi_type)

In [18]:
if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingdem'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'

if multiple:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name + '_' + str(n_runs), plot=False)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name, plot=False)

In [19]:
# multiple==True and parallelb==False means that data is a list and not normalized
if multiple and not parallelb:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
    avg_masking = np.mean(params['avg_masking'], axis=0)
    avg_masking = (avg_masking / params['population'])
# multiple==True and parallelb==True means that the data is a 2d array and normalized
elif multiple and parallelb:
    data = params['data'] # parallel simulations store normalized data
    avg_masking = params['avg_masking']
# otherwise, the data is from a single simulation and is not normalized
else:
    data = params['data']
    data = (data / params['population']).to_numpy()
    avg_masking = params['avg_masking']
    avg_masking = (avg_masking / params['population'])
    
params.pop('data')

N = len(data)
t_max = N - 1
t = np.arange(N)[:,None]

tracing_array = params['tracing_array']

In [20]:
model_path = '../models/covasim'
if maskb:
    model_path += '/mask'
    if masking_learned:
        model_path += '/learned_masking'
    else:
        model_path += '/observed_masking'
else:
    model_path += '/no_mask'

In [21]:
#--------------------no masking----------------------#
# model_folder = '/2023-07-20_17-53-03' # no masking, 500e3 pop, dynamic piecewise, 64 avg., 800e3 epochs, lr=1e-5
# model_folder = '/2023-07-21_18-42-24' # no masking, 500e3 pop, dynamic piecewise, 64 avg., 1e6 epochs, lr=1e-6
# model_folder = '/2023-07-22_10-20-01' # no masking, 500e3 pop, dynamic piecewise, 64 avg., 1e6 epochs, lr=5e-6
# model_folder = '/2023-07-23_00-48-24' # no masking, 500e3 pop, dynamic piecewise, 64 avg., 1e6 epochs, lr=9e-6
# model_folder = '/2023-07-23_15-17-23' # no masking, 500e3 pop, dynamic piecewise, 64 avg., 1e6 epochs, lr=9e-6

#------------------normal masking--------------------#
# model_folder = '/2023-07-20_18-13-01' # masking-norm, observed M, 500e3, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-21_18-41-30' # masking-norm, observed M, 500e3, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-21_21-48-16' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-22_10-16-47' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-22_12-30-47' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-23_00-01-28' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-23_00-27-07' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=5e-5
# model_folder = '/2023-07-23_15-14-54' # masking-norm, observed M, 1e6, dynamic piecewise, 64 avg., 800e3, lr=4e-5

#---------------demographic masking------------------#
# model_folder = '/2023-07-20_22-20-10' # masking-dem, observed M, 500e3, dynamic piecewise, 64 avg., 600e3, lr=5e-5
# model_folder = '/2023-07-23_15-15-56' # masking-dem, observed M, 500e3, dynamic piecewise, 64 avg., 600e3, lr=3e-6
# model_folder = '/2023-07-24_23-09-21' # masking-dem, observed M, 500e3, dynamic piecewise, 64 avg., 600e3, lr=3e-6
# model_folder = '/2023-07-24_23-09-34' # masking-dem, observed M, 500e3, dynamic piecewise, 64 avg., 600e3, lr=3e-6
model_folder = '/2023-07-25_20-44-25' # masking-dem, observed M, 500e3, dynamic piecewise, 64 avg., 700e3, lr=4e-6

mydir = model_path + model_folder

In [22]:
yita_lb = 0.0
yita_ub = 1.0
beta_lb = 0.0
beta_ub = 0.5
tau_lb = 0.0
tau_ub = 0.5
eta_deep = True
beta_deep = True
tau_deep = False

In [23]:
binn = AdaMaskBINNCovasim(params, 
                t_max_real=t_max, 
                tracing_array=tracing_array, 
                yita_lb=None, 
                yita_ub=None,
                beta_lb=None,
                beta_ub=None,
                tau_lb=tau_lb,
                tau_ub=tau_ub, 
                chi_type=chi_type,
                eta_deep=eta_deep,
                beta_deep=beta_deep,
                tau_deep=tau_deep,
                maskb=maskb,
                masking_learned=masking_learned).to(device)
parameters = binn.parameters()
model = ModelWrapper(binn, None, None, save_name=os.path.join(mydir, case_name))

In [24]:
# load model weights
# model.save_name = '../Weights/'
# model.save_name += case_name
if retrain:
    model.save_name += '_retrain'
model.save_name += '_best_val'
model.load(model.save_name + '_model', device=device)
save_path = model.save_folder
# grab initial condition
u0 = data[0, :].copy()

In [25]:
# grab value ranges
yita_lb, yita_ub = model.model.yita_lb, model.model.yita_ub
beta_lb, beta_ub = model.model.beta_lb, model.model.beta_ub
tau_lb, tau_ub = model.model.tau_lb, model.model.tau_ub

In [26]:
# learned contact_rate function
def contact_rate(u):
    res = binn.eta_func(to_torch(u)) # [:,[0,3,4]]
    return to_numpy(res)

# learned effective tracing rate function
def beta(u):
    res = binn.beta_func(to_torch(u))
    return to_numpy(res)

# learned diagnosis of quarantined rate function
def tau(u):
    res = binn.tau_func(to_torch(u))
    return to_numpy(res)

def chi_func(t):
    chi_t = chi(1 + to_torch(t) * t_max, trace_prob, chi_type)
    return chi_t

### Get the values of $\eta, \beta, \tau$ evaluated on the observed data.

In [27]:
if masking > 0:
    all_data = np.concatenate([data, avg_masking[:,None]], axis=1) # STEAYDQRFM
else:
    all_data = data

if maskb:
    eta_input = np.concatenate([data[:,[0,3,4]], avg_masking[:,None]], axis=1) #SAYM
else:
    eta_input = np.concatenate([data[:,[0,3,4]]], axis=1) #SAY
eta0 = contact_rate(eta_input) # eta(S,A,Y,M)
eta_values = yita_lb + (yita_ub - yita_lb) * eta0[:, 0][:, None]

chi_t = to_numpy(chi_func(t))
beta_input = np.concatenate([np.sum(data[:,[0,3,4]], axis=1)[:,None], chi_t], axis=1)
beta_values = beta(beta_input)

tau_input = data[:,[3,4]]
tau0 = tau(tau_input)
tau_values = tau_lb + (tau_ub - tau_lb) * tau0

### Load the results of LASSO on $\eta, \beta, \tau$ for a desired number of components.

In [28]:
eta_dl = joblib.load(mydir + '/' + case_name + '/eta_eq_coef/' + case_name + '_' + str(n_runs) + '_sparse_coef_11comps')
beta_dl = joblib.load(mydir + '/' + case_name + '/beta_eq_coef/' + case_name + '_' + str(n_runs) + '_sparse_coef_4comps')
tau_dl = joblib.load(mydir + '/' + case_name + '/tau_eq_coef/' + case_name + '_' + str(n_runs) + '_sparse_coef_1comps')

In [29]:
eta_lasso = eta_dl['Lasso']
beta_lasso = beta_dl['Lasso']
tau_lasso = tau_dl['Lasso']

eta_eq = eta_dl['Equation']
beta_eq = beta_dl['Equation']
tau_eq = tau_dl['Equation']

eta_coef = np.append(eta_lasso.intercept_, eta_lasso.coef_)
beta_coef = np.append(beta_lasso.intercept_, beta_lasso.coef_)
tau_coef = np.append(tau_lasso.intercept_, tau_lasso.coef_)

In [30]:
eta_reg_coef = eta_coef[eta_coef.nonzero()]
beta_reg_coef = beta_coef[beta_coef.nonzero()]
tau_reg_coef = tau_coef[tau_coef.nonzero()]
#tau_reg_coef = np.where(np.abs(tau_coef) < float(1e-6), 0, tau_coef)
#tau_reg_coef = tau_reg_coef[tau_reg_coef.nonzero()]

eta_rhs = eta_eq[4:]
beta_rhs = beta_eq[4:]
tau_rhs = tau_eq[4:]

### Get the feature names from the equations

In [31]:
eta_features = []
eta_rhs_split = eta_rhs.split('*')
for i, elem in enumerate(eta_rhs_split):
    if i == 0:
        continue
    else:
        sl = elem.split(' +')
        eta_features.append(sl[0])
        
beta_features = []
beta_rhs_split = beta_rhs.split('*')
for i, elem in enumerate(beta_rhs_split):
    if i == 0:
        continue
    else:
        sl = elem.split(' +')
        beta_features.append(sl[0])
        
tau_features = []
tau_rhs_split = tau_rhs.split('*')
for i, elem in enumerate(tau_rhs_split):
    if i == 0:
        continue
    else:
        sl = elem.split(' +')
        tau_features.append(sl[0])

### Transform the data to be up to degreee 2 and initialize dictionary to store information

In [32]:
poly = PolynomialFeatures(2)
if maskb:
    all_data = np.concatenate((data, avg_masking[:,None]), axis=1)
else:
    all_data = data
X = poly.fit_transform(all_data)

eta_theta0 = X[:,eta_coef.nonzero()[0]]
beta_theta0 = X[:,beta_coef.nonzero()[0]]
tau_theta0 = X[:,tau_coef.nonzero()[0]]

eta_theta_od = dict()
beta_theta_od = dict()
tau_theta_od = dict()

eta_theta_od['features'] = eta_features
eta_theta_od['theta'] = eta_theta0

beta_theta_od['features'] = beta_features
beta_theta_od['theta'] = beta_theta0

tau_theta_od['features'] = tau_features
tau_theta_od['theta'] = tau_theta0

### Run the pruning algorithm

In [36]:
eta_theta_nd = PruneEquation(eta_theta_od, eta_values, alpha=0.1, max_pruning=5)
#beta_theta_nd = PruneEquation(beta_theta_od, beta_values, alpha=0.1, max_pruning=5)
#tau_theta_nd = PruneEquation(tau_theta_od, tau_values, alpha=0.1, max_pruning=5)

In [42]:
print(f"The original components of eta are: {eta_theta_nd['old_features']}")
print()
print(f"The components of eta after pruning are: {eta_theta_nd['features']}")

The original components of eta are: ['S', 'A', 'Y', 'M', 'S^2', 'S A', 'S Y', 'S M', 'A M', 'Y M', 'M^2']

The components of eta after pruning are: ['S Y', 'S M', 'A M', 'Y M', 'M^2']


### Evaluate

In [437]:
print(np.array_equal(eta_theta_nd['old_features'], eta_theta_nd['features']))
eta_theta_nd['features']

False


['R',
 'M',
 'S^2',
 'S T',
 'S E',
 'S A',
 'S Y',
 'S D',
 'S R',
 'S M',
 'E M',
 'R^2',
 'R M',
 'M^2']

In [438]:
beta_theta_od['features'] = beta_features
beta_theta_od['theta'] = beta_theta0

beta_theta_nd = PruneEquation(beta_theta_od, beta_values, alpha=0.1, max_pruning=5)

print(np.array_equal(beta_theta_nd['old_features'], beta_theta_nd['features']))
beta_theta_nd['features']

False


['S E', 'S A', 'S Y', 'S R', 'T R']

In [442]:
tau_theta_od['features'] = tau_features
tau_theta_od['theta'] = tau_theta0

tau_theta_nd = PruneEquation(tau_theta_od, tau_values, alpha=0.1, max_pruning=5)

print(np.array_equal(tau_theta_nd['old_features'], tau_theta_nd['features']))
tau_theta_nd['features']

TypeError: cannot unpack non-iterable NoneType object