In [1]:
import pickle
import numpy as np
import torch
import matplotlib.pyplot as plt
import os
import sys
import sklearn
RANDOM_STATE=42

In [14]:
!dir

 Volume in drive E is HunterData
 Volume Serial Number is 1C77-F27A

 Directory of e:\Anx-Analyses-March-7\Final_Analyses\EPM_model

03/04/2022  05:52 PM    <DIR>          .
03/04/2022  05:52 PM    <DIR>          ..
02/06/2022  09:23 PM         3,382,895 EPM_FINAL_MODEL_10000_scaled_fixed.pt
02/07/2022  01:07 AM         3,058,799 EPM_FINAL_MODEL_1000_scaled_fixed.pt
02/04/2022  07:21 PM         3,826,095 EPM_FINAL_MODEL_100_scaled.pt
02/07/2022  12:35 AM         3,026,415 EPM_FINAL_MODEL_100_scaled_fixed.pt
02/07/2022  03:49 PM         2,089,263 EPM_FINAL_MODEL_2000_scaled_fixed.pt
02/03/2022  11:26 AM         4,722,671 EPM_FINAL_MODEL_25000_scaled.pt
02/07/2022  12:09 AM         3,023,855 EPM_FINAL_MODEL_30_scaled_fixed.pt
02/04/2022  04:17 PM         5,622,895 EPM_FINAL_MODEL_50000_scaled.pt
02/06/2022  05:52 PM         4,823,215 EPM_FINAL_MODEL_50000_scaled_fixed.pt
02/07/2022  05:13 PM         2,197,295 EPM_FINAL_MODEL_5000_FINAL.pt
02/07/2022  12:07 AM         3,024,943 EPM_FINAL_

In [2]:
DATA_PATH = "E:\\Anx-Analyses-March-7\\ANX_EPM_data_2022_May_17.pkl"
sys.path.append(DATA_PATH)
with open(DATA_PATH,"rb") as f:
    data_dict = pickle.load(f)

### Dalton Hand Picked Train Test Splits

In [3]:
EPM_TRAIN_MICE = [
    'Mouse04201',
    'Mouse04202',
    'Mouse04205',
    'Mouse04215',
    'Mouse39134',
    'Mouse69064',
    'Mouse69074',
    'Mouse3191',
    'Mouse3193',
    'Mouse3204',
    'Mouse3192',
    'Mouse3194',
    'Mouse3203',
    'Mouse04191',
    'Mouse39114',
    'Mouse39124',
    'Mouse39133',
    'Mouse69072',
    'Mouse0630',
    'Mouse0634',
    'Mouse0643',
    'Mouse1551',
    'Mouse6293',
    'Mouse8580',
    'Mouse8581',
    'Mouse8891',
    'Mouse6291',
    'Mouse6292',
    'Mouse8582',
    'Mouse8894',
]

EPM_VAL_MICE = [
    'Mouse04193',
    'Mouse39125',
    'Mouse69065',
    'Mouse0633',
    'Mouse0642',
]

EPM_TEST_MICE = [
    'Mouse39115',
    'Mouse39122',
    'Mouse39132',
    'Mouse39135',
    'Mouse69075',
    'Mouse04203',
    'Mouse39121',
    'Mouse69061',
    'Mouse69071',
    'Mouse0631',
    'Mouse0641',
    'Mouse1552',
    'Mouse1553',
    'Mouse6672',
    'Mouse6674',
    'Mouse8893',
    ]

In [8]:
data_dict.keys()

dict_keys(['X_power', 'X_directedSpectrum', 'y_mouse', 'y_ROI', 'y_avg_velocity', 'y_expDate', 'y_Homecage', 'y_time', 'area', 'channel', 'powerFeatures', 'dsFeatures', 's', 'fs', 'preprocessVersion', 'powVersion', 'dsVersion', 'windowLength', 'channelArea', 'fsRaw', 'X_power_1_2', 'X_coh_1_2', 'X_gc_1_2', 'y_locx', 'y_locy'])

In [9]:
X_psd = data_dict["X_power"]
X_ds = data_dict["X_directedSpectrum"]
X_power_1_2 = data_dict["X_power_1_2"]
X_coh_1_2 = data_dict["X_coh_1_2"]
X_gc_1_2 = data_dict["X_gc_1_2"]
y_mouse = data_dict["y_mouse"]
y_ROI = data_dict["y_ROI"]
y_vel = data_dict["y_avg_velocity"]
y_expDate = data_dict["y_expDate"]
y_Homecage = data_dict["y_Homecage"]
y_time = data_dict["y_time"]
y_locx = data_dict["y_locx"]
y_locy = data_dict["y_locy"]


### Save Training Data

In [10]:
train_mask = np.zeros_like(y_time)
for mouse in EPM_TRAIN_MICE:
    mouse_idxs = np.array(y_mouse)==mouse
    train_mask = np.logical_or(train_mask,mouse_idxs)

X_psd_train, X_ds_train = X_psd[train_mask==1], X_ds[train_mask==1]
X_power_1_2_train = X_power_1_2[train_mask==1]
X_coh_1_2_train = X_coh_1_2[train_mask==1]
X_gc_1_2_train = X_gc_1_2[train_mask==1]
y_mouse_train = np.array(y_mouse)[train_mask==1]
y_ROI_train = y_ROI[train_mask==1]
y_vel_train = y_vel[train_mask==1]
y_expDate_train = np.array(y_expDate)[train_mask==1]
y_Homecage_train = y_Homecage[train_mask==1]
y_time_train = np.array(y_time)[train_mask==1]
y_locx_train = np.array(y_locx)[train_mask==1]
y_locy_train = np.array(y_locy)[train_mask==1]

train_dict = {
    "X_psd":X_psd_train,
    "X_ds":X_ds_train,
    "X_power_1_2":X_power_1_2_train,
    "X_coh_1_2":X_coh_1_2_train,
    "X_gc_1_2":X_gc_1_2_train,
    "y_mouse":y_mouse_train,
    "y_locx":y_locx_train,
    "y_locy":y_locy_train,
    "y_ROI":y_ROI_train,
    "y_vel":y_vel_train,
    "y_expDate":y_expDate_train,
    "y_Homecage":y_Homecage_train,
    "y_time":y_time_train,
    "mice":EPM_TRAIN_MICE,
    "info": "This training set was hand selected by Dalton and processed on May 17 2022"
}

TRAIN_FILE_NAME = "EPM_train_dict_May_17.pkl"
with open(TRAIN_FILE_NAME,"wb") as f:
    pickle.dump(train_dict,f)

### Save Validation Data

In [11]:
validation_mask = np.zeros_like(y_time)
for mouse in EPM_VAL_MICE:
    validation_mask = np.logical_or(validation_mask,np.array(y_mouse)==mouse)

X_psd_validation, X_ds_validation = X_psd[validation_mask==1], X_ds[validation_mask==1]
X_power_1_2_validation = X_power_1_2[validation_mask==1]
X_coh_1_2_validation = X_coh_1_2[validation_mask==1]
X_gc_1_2_validation = X_gc_1_2[validation_mask==1]
y_mouse_validation = np.array(y_mouse)[validation_mask==1]
y_ROI_validation = y_ROI[validation_mask==1]
y_vel_validation = y_vel[validation_mask==1]
y_expDate_validation = np.array(y_expDate)[validation_mask==1]
y_Homecage_validation = y_Homecage[validation_mask==1]
y_time_validation = np.array(y_time)[validation_mask==1]
y_locx_validation = np.array(y_locx)[validation_mask==1]
y_locy_validation = np.array(y_locy)[validation_mask==1]
validation_dict = {
    "X_psd":X_psd_validation,
    "X_ds":X_ds_validation,
    "X_power_1_2":X_power_1_2_validation,
    "X_coh_1_2":X_coh_1_2_validation,
    "X_gc_1_2":X_gc_1_2_validation,
    "y_mouse":y_mouse_validation,
    "y_locx":y_locx_validation,
    "y_locy":y_locy_validation,
    "y_ROI":y_ROI_validation,
    "y_vel":y_vel_validation,
    "y_expDate":y_expDate_validation,
    "y_Homecage":y_Homecage_validation,
    "y_time":y_time_validation,
    "mice":EPM_VAL_MICE,
    "info": "This validation set was hand selected by Dalton and processed on March 15 2022"
}

VAL_FILE_NAME = "EPM_val_dict_May_17.pkl"
with open(VAL_FILE_NAME,"wb") as f:
    pickle.dump(validation_dict,f)

### Save Test Data

In [12]:
test_mask = np.zeros_like(y_time)
for mouse in EPM_TEST_MICE:
    test_mask = np.logical_or(test_mask,np.array(y_mouse)==mouse)

X_psd_test, X_ds_test = X_psd[test_mask==1], X_ds[test_mask==1]
X_power_1_2_test = X_power_1_2[test_mask==1]
X_coh_1_2_test = X_coh_1_2[test_mask==1]
X_gc_1_2_test = X_gc_1_2[test_mask==1]
y_mouse_test = np.array(y_mouse)[test_mask==1]
y_ROI_test = y_ROI[test_mask==1]
y_vel_test = y_vel[test_mask==1]
y_expDate_test = np.array(y_expDate)[test_mask==1]
y_Homecage_test = y_Homecage[test_mask==1]
y_time_test = np.array(y_time)[test_mask==1]
y_locx_test = np.array(y_locx)[test_mask==1]
y_locy_test = np.array(y_locy)[test_mask==1]

test_dict = {
    "X_psd":X_psd_test,
    "X_ds":X_ds_test,
    "X_power_1_2":X_power_1_2_test,
    "X_coh_1_2":X_coh_1_2_test,
    "X_gc_1_2":X_gc_1_2_test,
    "y_mouse":y_mouse_test,
    "y_locx":y_locx_test,
    "y_locy":y_locy_test,
    "y_ROI":y_ROI_test,
    "y_vel":y_vel_test,
    "y_expDate":y_expDate_test,
    "y_Homecage":y_Homecage_test,
    "y_time":y_time_test,
    "mice":EPM_TEST_MICE,
    "info": "This test set was hand selected by Dalton and processed on March 15 2022"
}

TEST_FILE_NAME = "EPM_test_dict_May_17.pkl"
with open(TEST_FILE_NAME,"wb") as f:
    pickle.dump(test_dict,f)