In [1]:
import sys
sys.path.append('../')
import joblib

from scipy.signal import savgol_filter

from Modules.Utils.Imports import *
from Modules.Utils.DRUMS_Lasso import *
from Modules.Utils.GetLowestGPU import *
import Modules.Loaders.DataFormatter as DF

from Notebooks.utils import get_case_name
from queue import PriorityQueue

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))
# helper functions
def to_torch(x):
    return torch.from_numpy(x).float().to(device)
def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


In [3]:
# instantiate BINN model parameters and path
path = '../Data/covasim_data/drums_data/'

population = int(500e3)
test_prob = 0.1
trace_prob = 0.3
keep_d = True
retrain = False
dynamic = True
masking = 3
multiple = True
parallelb = True
n_runs = 64
chi_type = 'piecewise'

case_name = get_case_name(population, test_prob, trace_prob, keep_d, dynamic=dynamic, chi_type=chi_type)

In [4]:
if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingdem'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'

if multiple:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name + '_' + str(n_runs), plot=False)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name, plot=False)

In [5]:
# multiple==True and parallelb==False means that data is a list and not normalized
if multiple and not parallelb:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
    avg_masking = np.mean(params['avg_masking'], axis=0)
    avg_masking = (avg_masking / params['population'])
# multiple==True and parallelb==True means that the data is a 2d array and normalized
elif multiple and parallelb:
    data = params['data'] # parallel simulations store normalized data
    avg_masking = params['avg_masking']
# otherwise, the data is from a single simulation and is not normalized
else:
    data = params['data']
    data = (data / params['population']).to_numpy()
    avg_masking = params['avg_masking']
    avg_masking = (avg_masking / params['avg_masking'])
    
params.pop('data')

N = len(data)
t_max = N - 1
t = np.arange(N)[:,None]

tracing_array = params['tracing_array']

In [6]:
# estimate dmdmt
window_size = 15
degree = 3
mt = to_torch(savgol_filter(avg_masking, window_size, degree, deriv=1, axis=0))
#plt.plot(t, mt)

In [7]:
comps = list('STEAYDQRF')
X_dict = {key : value for key, value in zip(comps, data.T)}

eq_set = set()
eq_q = PriorityQueue()
alphas_list = np.linspace(float(1e-7), float(1e-5), num=5000)

for i in alphas_list:
    lasso_dict = DRUMS_Lasso(X_dict, mt, intercept=True, alphas=np.array([i]))
    if not lasso_dict['Equation'] in eq_set:
        eq_set.add(lasso_dict['Equation'])
        eq_q.put((lasso_dict['MSE'], i, lasso_dict['Equation'], lasso_dict))

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd

In [8]:
good_five = False
while not good_five:
    cand = eq_q.get()
    num_components = len(cand[2].split('+')) - 1
    if num_components <=5:
        good_five = True

five_comp_tuple = cand

good_four = False
while not good_four:
    cand = eq_q.get()
    num_components = len(cand[2].split('+')) - 1
    if num_components <=4:
        good_four = True
        
four_comp_tuple = cand

In [9]:
if not os.path.exists(path + '/sparse_coef_masking'):
    os.makedirs(os.path.join(path, 'sparse_coef_masking'))

file_path = path + '/sparse_coef_masking'

four_file_name = case_name + '_' + str(n_runs) +  '_sparse_coef_4comps'
five_file_name = case_name + '_' + str(n_runs) +  '_sparse_coef_5comps'

In [10]:
joblib.dump(four_comp_tuple[-1], os.path.join(file_path, four_file_name), compress=True)

joblib.dump(five_comp_tuple[-1], os.path.join(file_path, five_file_name), compress=True)

['../Data/covasim_data/drums_data//sparse_coef_masking\\500000_0.1_0.3_dynamic_piecewise_maskingnorm_64_sparse_coef_5comps']

In [17]:
masking_lasso_dict = joblib.load(file_path + '/' + four_file_name)
masking_coef_indices = masking_lasso_dict['Lasso'].coef_.nonzero()
masking_coef = masking_lasso_dict['Lasso'].coef_[masking_coef_indices]
print(masking_coef)

[ 0.03451494 -0.43577385  2.39221827 -0.00652447]


In [36]:
arr = np.array(masking_lasso_dict['Lasso'].intercept_)
arr = np.append(arr, masking_coef)

In [38]:
masking_lasso_dict['Equation']

'f = 0.03451*S + -0.43577*T + 2.39222*Y + -0.00652*R + -0.03254'