In [2]:
import pickle
import numpy as np
import torch
import matplotlib.pyplot as plt
import os
import sys
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

MODEL_NAME = ".\\{}_{}_10_res_loss_10_power_features.pt".format("Positive","OFT")

TRAIN_FILE_NAME = ".\\OFT_model\\OFT_train_dict_old_features_hand_picked.pkl"
VALIDATION_FILE_NAME = ".\\OFT_model\\OFT_validation_dict_old_features_hand_picked.pkl"

EPM_TRAIN_FILE = ".\\EPM_model\\EPM_train_dict_May_17.pkl"
EPM_VAL_FILE = ".\\EPM_model\\EPM_val_dict_May_17.pkl"
OFT_TRAIN_FILE_NAME = ".\\OFT_model\\OFT_train_dict_old_features_hand_picked.pkl"
OFT_VAL_FILE_NAME = ".\\OFT_model\\OFT_validation_dict_old_features_hand_picked.pkl"
FLX_TRAIN_FILE = ".\\FLX_model\\FLX_train_dict_old_features.pkl"
FLX_VAL_FILE = ".\\FLX_model\\FLX_validation_dict_old_features.pkl"

FEATURE_LIST = ['X_power_1_2','X_coh_1_2','X_gc_1_2']
FEATURE_VECTOR = FEATURE_LIST
FEATURE_WEIGHTS = [10,1,1]
NETWORK_CONSTRAINT = "Positive"
UMC_PATH = ".\\Universal-Mouse-Code\\"

sys.path.append(UMC_PATH)
#from dCSFA_model import dCSFA_model
import umc_data_tools as umc_dt
from dCSFA_NMF import dCSFA_NMF

if torch.cuda.is_available():
    device="cuda:0"
else:
    device="cpu"


print("Using device: %s"%(device))

#For Consistency
RANDOM_STATE=42

import pandas as pd

model = torch.load(MODEL_NAME,map_location='cpu')
model.device = "cpu"

Using device: cuda:0


In [3]:
with open(TRAIN_FILE_NAME,'rb') as f:
    train_dict = pickle.load(f)

with open(VALIDATION_FILE_NAME,'rb') as f:
    val_dict = pickle.load(f)

NUM_FREQS = 56
FEATURE_LIST = ['X_power_1_2','X_coh_1_2','X_gc_1_2']

running_idx = 0
feature_groups = []
for idx,feature in enumerate(FEATURE_VECTOR):
    f_begin = running_idx
    f_end = f_begin + train_dict[feature].shape[1] 
    if idx == 0:
        f_end = f_end -1
    feature_groups.append((f_begin,f_end))

    running_idx = f_end


NUM_FEATURES = np.hstack([train_dict[feature] for feature in FEATURE_VECTOR]).shape[1] // NUM_FREQS
scale_vector = np.array([np.arange(1,NUM_FREQS+1) for feature in range(NUM_FEATURES)]).flatten()

#Train Arrays
X_train = np.hstack([train_dict[feature]*weight for feature,weight in zip(FEATURE_LIST,FEATURE_WEIGHTS)])#*scale_vector
y_hc_train = train_dict['y_Homecage'].astype(bool)
y_task_train = ~y_hc_train
y_ROI_train = train_dict['y_ROI']
y_vel_train = train_dict['y_vel']
y_mouse_train = train_dict['y_mouse']
y_time_train = train_dict['y_time']

#Validation Arrays
X_val = np.hstack([val_dict[feature]*weight for feature,weight in zip(FEATURE_LIST,FEATURE_WEIGHTS)])#*scale_vector
y_hc_val = val_dict['y_Homecage'].astype(bool)
y_task_val = ~y_hc_val
y_ROI_val = val_dict['y_ROI']
y_vel_val = val_dict['y_vel']
y_mouse_val = val_dict['y_mouse']
y_time_val = val_dict['y_time']

intercept_mask = OneHotEncoder().fit_transform(y_mouse_train.reshape(-1,1)).todense()

In [4]:
y_pred, s = model.predict(X_train,include_scores=True)
roc_auc_dict = umc_dt.lpne_auc(y_pred,y_task_train,y_mouse_train)
print("Training ROC AUCs: ",roc_auc_dict)
mw_auc_dict = umc_dt.lpne_auc(y_pred,y_task_train,y_mouse_train,s,True)
print("Training MW AUCs: ",mw_auc_dict)

df = umc_dt.make_projection_csv(OFT_TRAIN_FILE_NAME,model,FEATURE_VECTOR,
                    ['y_mouse','y_expDate','y_time','y_Homecage','y_ROI','y_vel'],
                    save_file = ".\\July_30_Projections\\{}_OFT_HC_vs_Task_Train_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict)

y_pred, s = model.predict(X_val,include_scores=True)
roc_auc_dict = umc_dt.lpne_auc(y_pred,y_task_val,y_mouse_val)
print("Training ROC AUCs: ",roc_auc_dict)
mw_auc_dict = umc_dt.lpne_auc(y_pred,y_task_val,y_mouse_val,s,True)
print("Training MW AUCs: ",mw_auc_dict)

df = umc_dt.make_projection_csv(OFT_VAL_FILE_NAME,model,FEATURE_VECTOR,
                    ['y_mouse','y_expDate','y_time','y_Homecage','y_ROI','y_vel'],
                    save_file = ".\\July_30_Projections\\{}_OFT_HC_vs_Task_Val_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict)




Training ROC AUCs:  {'auc_method': 'sklearn_roc_auc', 'Mouse04191': 0.9956236117908368, 'Mouse04201': 0.99822695035461, 'Mouse04202': 0.9985875706214689, 'Mouse04205': 0.998442367601246, 'Mouse04215': nan, 'Mouse3191': 0.9833740223337403, 'Mouse3192': nan, 'Mouse3193': 0.985501279181324, 'Mouse3194': nan, 'Mouse3203': nan, 'Mouse39114': 0.9981132075471698, 'Mouse39124': 0.9953891900030112, 'Mouse39133': 0.9981549815498155, 'Mouse69064': 0.9968796173885994, 'Mouse69072': 0.9951456310679612, 'Mouse69074': 0.9866310160427808}
Mouse  Mouse04215  has only one class - AUC cannot be calculated
n_positive samples  506
n_negative samples  0
Mouse  Mouse3192  has only one class - AUC cannot be calculated
n_positive samples  279
n_negative samples  0
Mouse  Mouse3194  has only one class - AUC cannot be calculated
n_positive samples  324
n_negative samples  0
Mouse  Mouse3203  has only one class - AUC cannot be calculated
n_positive samples  500
n_negative samples  0
Training MW AUCs:  {'auc_metho

In [5]:
y_pred, s = model.predict(X_train,include_scores=True)
#roc_mean,roc_std = umc_dt.get_mean_std_err_auc(y_pred,y_task_train,y_mouse_train)
#print("train roc auc mean: %0.2f +/- %0.2f"%(roc_mean,roc_std))
mw_mean,mw_std = umc_dt.get_mean_std_err_auc(y_pred,y_task_train,y_mouse_train,s,True)
print("train mw auc mean: %0.2f +/- %0.2f"%(mw_mean,mw_std))


y_pred, s = model.predict(X_val,include_scores=True)
#roc_mean,roc_std = umc_dt.get_mean_std_err_auc(y_pred,y_task_val,y_mouse_val)
#print("val roc auc mean: %0.2f +/- %0.2f"%(roc_mean,roc_std))
mw_mean,mw_std = umc_dt.get_mean_std_err_auc(y_pred,y_task_val,y_mouse_val,s,True)
print("val mw auc mean: %0.2f +/- %0.2f"%(mw_mean,mw_std))
#umc_dt.make_recon_plots(model,X_train,0,task="OFT (HC vs Task) Training",saveFile="oft_train_recon.png")
#umc_dt.make_recon_plots(model,X_val,0,task="OFT (HC vs Task) Val",saveFile="oft_val_recon.png")

Mouse  Mouse04215  has only one class - AUC cannot be calculated
n_positive samples  506
n_negative samples  0
Mouse  Mouse3192  has only one class - AUC cannot be calculated
n_positive samples  279
n_negative samples  0
Mouse  Mouse3194  has only one class - AUC cannot be calculated
n_positive samples  324
n_negative samples  0
Mouse  Mouse3203  has only one class - AUC cannot be calculated
n_positive samples  500
n_negative samples  0
train mw auc mean: 1.00 +/- 0.00
val mw auc mean: 0.84 +/- 0.06


In [6]:
with open(FLX_TRAIN_FILE,"rb") as f:
    flx_train_dict = pickle.load(f)

with open(FLX_VAL_FILE,"rb") as f:
    flx_validation_dict = pickle.load(f)

FULL_FEATURE_LIST = ["X_power_1_2_full","X_coh_1_2_full","X_gc_1_2_full"]

flx_X_train = np.hstack([flx_train_dict[feature]*weight for feature,weight in zip(FEATURE_VECTOR,FEATURE_WEIGHTS)])
flx_y_train = flx_train_dict['y_flx']
flx_y_mouse_train = flx_train_dict['y_mouse']
flx_y_expDate_train = flx_train_dict['y_expDate']
flx_y_time_train = flx_train_dict['y_time']

flx_X_validation = np.hstack([flx_validation_dict[feature]*weight for feature,weight in zip(FEATURE_VECTOR,FEATURE_WEIGHTS)])
flx_y_validation = flx_validation_dict['y_flx']
flx_y_mouse_validation = flx_validation_dict['y_mouse']
flx_y_expDate_validation = flx_validation_dict['y_expDate']
flx_y_time_validation = flx_validation_dict['y_time']

flx_X = np.vstack([flx_X_train,flx_X_validation])
flx_y_task = np.hstack([flx_y_train,flx_y_validation])
flx_y_mouse = np.hstack([flx_y_mouse_train,flx_y_mouse_validation])
flx_y_expDate = np.hstack([flx_y_expDate_train,flx_y_expDate_validation])
flx_y_time = np.hstack([flx_y_time_train,flx_y_time_validation])

y_pred, s = model.predict(flx_X,include_scores=True)
roc_auc_dict = umc_dt.lpne_auc(y_pred,flx_y_task,flx_y_mouse)
print("ROC AUCs: ",roc_auc_dict)
mw_auc_dict = umc_dt.lpne_auc(y_pred,flx_y_task,flx_y_mouse,s,True)
print("MW AUCs: ",mw_auc_dict)

df = umc_dt.make_projection_csv(FLX_TRAIN_FILE,model,FULL_FEATURE_LIST,
                    ['y_mouse_full','y_expDate_full','y_time_full','y_flx_full'],
                    save_file = ".\\July_30_Projections\\{}_OFT_onto_FLX_flx_vs_saline_Train_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict,mousevar="y_mouse_full")

df = umc_dt.make_projection_csv(FLX_VAL_FILE,model,FULL_FEATURE_LIST,
                    ['y_mouse_full','y_expDate_full','y_time_full','y_flx_full'],
                    save_file = ".\\July_30_Projections\\{}_OFT_onto_FLX_flx_vs_saline_Val_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict,mousevar="y_mouse_full")



ROC AUCs:  {'auc_method': 'sklearn_roc_auc', 'Mouse3191': 0.5458903996669785, 'Mouse3192': 0.6135640384426793, 'Mouse3193': 0.4773608675776547, 'Mouse3194': 0.4236473534448658, 'Mouse3202': nan, 'Mouse3203': 0.4167402952368501, 'Mouse99003': nan}
Mouse  Mouse3202  has only one class - AUC cannot be calculated
n_positive samples  1504
n_negative samples  0
Mouse  Mouse99003  has only one class - AUC cannot be calculated
n_positive samples  1874
n_negative samples  0
MW AUCs:  {'auc_method': 'mannWhitneyU', 'Mouse3191': (0.5615857144623523, 9.622433431733594e-11), 'Mouse3192': (0.6807863219574385, 2.4355634278180347e-78), 'Mouse3193': (0.4446372272148497, 8.302826272090886e-09), 'Mouse3194': (0.39739855897758875, 6.043847322966595e-27), 'Mouse3203': (0.37630105177312606, 1.1445687834739185e-37)}


In [7]:
y_pred, s = model.predict(flx_X,include_scores=True)
#roc_mean,roc_std = umc_dt.get_mean_std_err_auc(y_pred,oft_y_task,oft_y_mouse)
mw_mean,mw_std = umc_dt.get_mean_std_err_auc(y_pred,flx_y_task,flx_y_mouse,s,True)
#print("EPM->FLX roc-auc: {} +/- {}".format(roc_mean,roc_std))
print("EPM->FLX mw-auc: {:0.2f} +/- {:0.2f}".format(mw_mean,mw_std))
#umc_dt.make_recon_plots(model,flx_X,0,task="OFT onto FLX (FLX vs Saline) Train and Val",saveFile="oft_onto_flx_recon.png")

Mouse  Mouse3202  has only one class - AUC cannot be calculated
n_positive samples  1504
n_negative samples  0
Mouse  Mouse99003  has only one class - AUC cannot be calculated
n_positive samples  1874
n_negative samples  0
EPM->FLX mw-auc: 0.49 +/- 0.06


In [8]:
with open(EPM_TRAIN_FILE,"rb") as f:
    epm_train_dict = pickle.load(f)

with open(EPM_VAL_FILE,"rb") as f:
    epm_validation_dict = pickle.load(f)
#Load the data
NUM_FREQS = 56
NUM_FEATURES = (epm_train_dict["X_power_1_2"].shape[1] + \
                epm_train_dict["X_coh_1_2"].shape[1] + \
                epm_train_dict["X_gc_1_2"].shape[1]) // NUM_FREQS
SCALE_VECTOR = np.array([np.arange(1,57) for feature in range(NUM_FEATURES)]).flatten()

X_train = np.hstack([epm_train_dict["X_power_1_2"]*FEATURE_WEIGHTS[0],
                    epm_train_dict["X_coh_1_2"],
                    epm_train_dict["X_gc_1_2"]])#*SCALE_VECTOR

y_train = (epm_train_dict['y_ROI']%2).astype(bool)
y_in_task_mask_train = ~epm_train_dict['y_Homecage'].astype(bool)
y_mouse_train = epm_train_dict['y_mouse']
y_time_train = epm_train_dict['y_time']
train_nan_mask = (epm_train_dict['y_ROI'] > 0)


X_train_task = X_train[np.logical_and(y_in_task_mask_train==1,train_nan_mask)==1]
y_train_task = y_train[np.logical_and(y_in_task_mask_train==1,train_nan_mask)==1]
y_mouse_train_task = y_mouse_train[np.logical_and(y_in_task_mask_train==1,train_nan_mask)==1]
y_time_train_task = y_time_train[np.logical_and(y_in_task_mask_train==1,train_nan_mask)==1]

X_val = np.hstack([epm_validation_dict["X_power_1_2"]*FEATURE_WEIGHTS[0],
                    epm_validation_dict["X_coh_1_2"],
                    epm_validation_dict["X_gc_1_2"]])#*SCALE_VECTOR

y_val = (epm_validation_dict['y_ROI']%2).astype(bool)
y_in_task_mask_val= ~epm_validation_dict['y_Homecage'].astype(bool)
y_mouse_val = epm_validation_dict['y_mouse']
y_time_val = epm_validation_dict['y_time']
val_nan_mask = (epm_validation_dict['y_ROI'] > 0)

X_val_task = X_val[np.logical_and(y_in_task_mask_val==1,val_nan_mask)==1]
y_val_task = y_val[np.logical_and(y_in_task_mask_val==1,val_nan_mask)==1]
y_mouse_val_task = y_mouse_val[np.logical_and(y_in_task_mask_val==1,val_nan_mask)==1]
y_time_val_task = y_time_val[np.logical_and(y_in_task_mask_val==1,val_nan_mask)==1]

epm_X = np.vstack([X_train,X_val])
epm_y_task = np.hstack([y_in_task_mask_train,y_in_task_mask_val])
epm_y_mouse = np.hstack([y_mouse_train,y_mouse_val])
epm_y_time = np.hstack([y_time_train,y_time_val])

y_pred, s = model.predict(epm_X,include_scores=True)
roc_auc_dict = umc_dt.lpne_auc(y_pred,epm_y_task,epm_y_mouse)
print("ROC AUCs: ",roc_auc_dict)
mw_auc_dict = umc_dt.lpne_auc(y_pred,epm_y_task,epm_y_mouse,s,True)
print("MW AUCs: ",mw_auc_dict)

df = umc_dt.make_projection_csv(EPM_TRAIN_FILE,model,FEATURE_VECTOR,
                    ['y_mouse','y_expDate','y_time','y_Homecage','y_ROI','y_vel','y_locx','y_locy'],
                    save_file = ".\\July_30_Projections\\{}_OFT_onto_EPM_HC_vs_Task_Train_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict)

df = umc_dt.make_projection_csv(EPM_VAL_FILE,model,FEATURE_VECTOR,
                    ['y_mouse','y_expDate','y_time','y_Homecage','y_ROI','y_vel','y_locx','y_locy'],
                    save_file = ".\\July_30_Projections\\{}_OFT_onto_EPM_HC_vs_Task_Val_Projection_2022_July_30.csv".format(NETWORK_CONSTRAINT),
                    auc_dict=mw_auc_dict)

ROC AUCs:  {'auc_method': 'sklearn_roc_auc', 'Mouse04193': 0.6187636114396895, 'Mouse04201': 0.7455852625958275, 'Mouse04202': 0.7179058694258769, 'Mouse04205': 0.6444444444444444, 'Mouse04215': 0.6180336783678367, 'Mouse0630': 0.4736068087236772, 'Mouse0633': 0.5800378081427067, 'Mouse0634': 0.531364997860505, 'Mouse0642': 0.6173003703124185, 'Mouse0643': 0.582544378698225, 'Mouse1551': 0.5366787435102529, 'Mouse39114': 0.6643113830613832, 'Mouse39124': 0.6836372747460059, 'Mouse39125': 0.6284665356479965, 'Mouse39133': 0.731025461214802, 'Mouse6291': 0.5804691691327899, 'Mouse6292': 0.5761974427493827, 'Mouse6293': 0.6042929292929293, 'Mouse69064': 0.7194332527531561, 'Mouse69065': 0.6966189207195203, 'Mouse69074': 0.6379433819192855, 'Mouse8580': 0.5275973468055126, 'Mouse8581': 0.5036009070294785, 'Mouse8582': 0.5952380952380953, 'Mouse8891': 0.5049049101420462, 'Mouse8894': 0.5902855024100853}
MW AUCs:  {'auc_method': 'mannWhitneyU', 'Mouse04193': (0.6911350144267442, 5.0932675124

In [9]:
y_pred, s = model.predict(epm_X,include_scores=True)
roc_mean,roc_std = umc_dt.get_mean_std_err_auc(y_pred,epm_y_task,epm_y_mouse)
mw_mean,mw_std = umc_dt.get_mean_std_err_auc(y_pred,epm_y_task,epm_y_mouse,s,True)
print("OFT->EPM roc-auc: {} +/- {}".format(roc_mean,roc_std))
print("OFT->EPM mw-auc: {:0.2f} +/- {:0.2f}".format(mw_mean,mw_std))
#umc_dt.make_recon_plots(model,flx_X,0,task="OFT onto EPM (HC vs Task) Train and Val",saveFile="oft_onto_epm_recon.png")

OFT->EPM roc-auc: 0.6119341725532281 +/- 0.014600019701238457
OFT->EPM mw-auc: 0.67 +/- 0.02
