In [1]:
import sys
sys.path.append('../../')

from scipy.signal import savgol_filter

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split

from Modules.Utils.Imports import *
from Modules.Utils.DRUMS_Lasso import *
from Modules.Utils.GetLowestGPU import *
import Modules.Loaders.DataFormatter as DF

from Notebooks.utils import get_case_name

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device(GetLowestGPU(pick_from=[0,1,2,3]))
# helper functions
def to_torch(x):
    return torch.from_numpy(x).float().to(device)
def to_numpy(x):
    return x.detach().cpu().numpy()

Device set to cpu


In [3]:
# instantiate BINN model parameters and path
path = '../../Data/covasim_data/drums_data/'

population = int(500e3)
test_prob = 0.1
trace_prob = 0.3
keep_d = True
retrain = False
dynamic = True
masking = 3
multiple = True
parallelb = True
n_runs = 2048
chi_type = 'piecewise'

case_name = get_case_name(population, test_prob, trace_prob, keep_d, dynamic=dynamic, chi_type=chi_type)

In [4]:
if not masking==0:
    if masking==1:
        case_name = case_name + '_maskingthresh'
    elif masking==2:
        case_name = case_name + '_maskinguni'
    elif masking==3:
        case_name = case_name + '_maskingnorm'

if multiple:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name + '_' + str(n_runs), plot=False)
else:
    params = DF.load_covasim_data(path, population, test_prob, trace_prob, keep_d, case_name, plot=False)

In [5]:
# multiple==True and parallelb==False means that data is a list and not normalized
if multiple and not parallelb:
    data = np.mean(params['data'], axis=0)
    data = (data / params['population'])
    avg_masking = np.mean(params['avg_masking'], axis=0)
    avg_masking = (avg_masking / params['avg_masking'])
# multiple==True and parallelb==True means that the data is a 2d array and normalized
elif multiple and parallelb:
    data = params['data'] # parallel simulations store normalized data
    avg_masking = params['avg_masking']
# otherwise, the data is from a single simulation and is not normalized
else:
    data = params['data']
    data = (data / params['population']).to_numpy()
    avg_masking = params['avg_masking']
    avg_masking = (avg_masking / params['avg_masking'])
    
params.pop('data')

N = len(data)
t_max = N - 1
t = np.arange(N)[:,None]

tracing_array = params['tracing_array']

In [6]:
#plt.plot(t, avg_masking)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(t, avg_masking)

degree = 12
training_error = []
cross_validation_error = []

best_error = int(1e6)

for d in range(1,degree):
    x_poly_train=PolynomialFeatures(degree=d).fit_transform(x_train) # generate polynomial features up to degree d
    x_poly_test=PolynomialFeatures(degree=d).fit_transform(x_test)
    
    lr=LinearRegression(fit_intercept=False) # instantiate the linear regression object/model
    model=lr.fit(x_poly_train,y_train) # fit the model
    
    y_train_pred=model.predict(x_poly_train) # save the predicted values of the model for given inputs x_poly_train
    
    mse_train=mean_squared_error(y_train,y_train_pred)
    cve=cross_validate(lr, x_poly_train, y_train, scoring='neg_mean_squared_error', cv=10, return_train_score=True)
    
    training_error.append(mse_train) # save MSE from training
    cross_validation_error.append(np.mean(np.absolute(cve['test_score']))) # save the CV error
    
    if cross_validation_error[d - 1] < best_error:
        best_degree = d
        best_model = model
        best_error = cross_validation_error[d - 1]
    
fig,ax=plt.subplots(figsize=(6,6))
ax.plot(range(1,degree),cross_validation_error)
ax.set_xlabel('Degree',fontsize=20)
ax.set_ylabel('MSE',fontsize=20)
ax.set_title('MSE VS Degree',fontsize=25)
plt.close()

In [8]:
t_poly=PolynomialFeatures(degree=best_degree).fit_transform(t)
#plt.plot(t, best_model.predict(t_poly))

In [9]:
window_size = 15
degree = 3
mt = to_torch(savgol_filter(avg_masking, window_size, degree, deriv=1, axis=0))

#plt.plot(t, mt)

In [10]:
comps = list('STEAYDQRF')
X_dict = {key : value for key, value in zip(comps, data.T)}

In [41]:
best_mse = int(1e6)
best_dict = {}
best_alpha = -1

mse_list = []
equation_set = set()
alphas_list = np.linspace(float(1e-6), float(1e-4), num=5000)

for i in alphas_list:
    lasso_dict = DRUMS_Lasso(X_dict, mt, intercept=False, alphas=np.array([i]))
    if lasso_dict['MSE'] < best_mse:
        best_dict = lasso_dict
        best_mse = lasso_dict['MSE']
        best_alpha = i
    mse_list.append(lasso_dict['MSE'])
    
    if not lasso_dict['Equation'] in equation_set:
        equation_set.add(lasso_dict['Equation'])

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

In [42]:
print(best_dict['Equation'])
print(best_dict['MSE'])
print(best_alpha)

f = -0.03827*T + -0.01819*R + 0.00132*S^2 + 0.00000
4.6584778687588884e-07
1e-06


In [43]:
np.abs(alphas_list[1] - alphas_list[0])

1.9803960792158524e-08

In [46]:
best_dict["Lasso"].coef_

array([ 0.        , -0.03826626,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.01818799, -0.        ,  0.0013249 ,
       -0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        , -0.        ,
       -0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        ,  0.        ,
        0.        ,  0.        , -0.        , -0.        ,  0.        ,
        0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        ,  0.        ])