In [1]:
import pickle
import numpy as np
import torch
import matplotlib.pyplot as plt
import os
import sys
import sklearn
RANDOM_STATE=42

In [2]:
DATA_PATH = "Z:\\Projects\\lpne\\DATA\\Pickle_Files_For_UMC\\ANX_OFT_data_2022_Feb_13.pkl"
sys.path.append(DATA_PATH)
with open(DATA_PATH,"rb") as f:
    data_dict = pickle.load(f)

### Dalton Hand Picked Train Test Splits

In [12]:
OFT_TRAINING_MICE = [
    'Mouse04201',
    'Mouse04202',
    'Mouse04205',
    'Mouse04215',
    'Mouse39134',
    'Mouse69064',
    'Mouse69074',# Comments here indicate the days for which these mice are included
    'Mouse3191', #"110918"
    'Mouse3193', #"110918"
    'Mouse3204', #"110918"
    'Mouse3192', #"121218"
    'Mouse3194', #"121218"
    'Mouse3203', #"121218"
    'Mouse04191',
    'Mouse39114',
    'Mouse39124',
    'Mouse39133',
    'Mouse69072',
]

DATE_FILTERED_MICE_110918 = [
    'Mouse3191', #"110918"
    'Mouse3193', #"110918"
    'Mouse3204', #"110918"
]

DATE_FILTERED_MICE_121218 = [
    'Mouse3192', #"121218"
    'Mouse3194', #"121218"
    'Mouse3203', #"121218"    
]

OFT_TEST_MICE = [
    'Mouse39115',
    'Mouse39122',
    'Mouse39132',
    'Mouse39135',
    'Mouse69075',
    'Mouse04203',
    'Mouse39121',
    'Mouse69061',
    'Mouse69071',
]

OFT_VALIDATION_MICE = [
    'Mouse04193',
    'Mouse39125',
    'Mouse69065',
]

In [13]:
X_psd = data_dict["X_power"]
X_ds = data_dict["X_directedSpectrum"]
y_mouse = data_dict["y_mouse"]
y_ROI = data_dict["y_ROI"]
y_vel = data_dict["y_avg_velocity"]
y_expDate = data_dict["y_expDate"]
y_Homecage = data_dict["y_Homecage"]
y_time = data_dict["y_time"]

### Save Training Data

In [14]:
train_mask = np.zeros_like(y_time)
for mouse in OFT_TRAINING_MICE:
    mouse_idxs = np.array(y_mouse)==mouse
    if mouse in DATE_FILTERED_MICE_110918:
        exp_filt_idxs = np.array(y_expDate)=="110918"
        mouse_idxs = np.logical_and(mouse_idxs,exp_filt_idxs)
    elif mouse in DATE_FILTERED_MICE_121218:
        exp_filt_idxs = np.array(y_expDate)=="121218"
        mouse_idxs = np.logical_and(mouse_idxs,exp_filt_idxs)
    train_mask = np.logical_or(train_mask,mouse_idxs)

X_psd_train, X_ds_train = X_psd[train_mask==1], X_ds[train_mask==1]
y_mouse_train = np.array(y_mouse)[train_mask==1]
y_ROI_train = y_ROI[train_mask==1]
y_vel_train = y_vel[train_mask==1]
y_expDate_train = np.array(y_expDate)[train_mask==1]
y_Homecage_train = y_Homecage[train_mask==1]
y_time_train = np.array(y_time)[train_mask==1]


train_dict = {
    "X_psd":X_psd_train,
    "X_ds":X_ds_train,
    "y_mouse":y_mouse_train,
    "y_ROI":y_ROI_train,
    "y_vel":y_vel_train,
    "y_expDate":y_expDate_train,
    "y_Homecage":y_Homecage_train,
    "y_time":y_time_train,
    "mice":OFT_TRAINING_MICE,
    "info": "This training set was hand selected by Dalton and processed on Feb 19 2022"
}

TRAIN_FILE_NAME = "OFT_train_dict_hand_picked.pkl"
with open(TRAIN_FILE_NAME,"wb") as f:
    pickle.dump(train_dict,f)

### Save Validation Data

In [15]:
validation_mask = np.zeros_like(y_time)
for mouse in OFT_VALIDATION_MICE:
    validation_mask = np.logical_or(validation_mask,np.array(y_mouse)==mouse)

X_psd_validation, X_ds_validation = X_psd[validation_mask==1], X_ds[validation_mask==1]
y_mouse_validation = np.array(y_mouse)[validation_mask==1]
y_ROI_validation = y_ROI[validation_mask==1]
y_vel_validation = y_vel[validation_mask==1]
y_expDate_validation = np.array(y_expDate)[validation_mask==1]
y_Homecage_validation = y_Homecage[validation_mask==1]
y_time_validation = np.array(y_time)[validation_mask==1]

validation_dict = {
    "X_psd":X_psd_validation,
    "X_ds":X_ds_validation,
    "y_mouse":y_mouse_validation,
    "y_ROI":y_ROI_validation,
    "y_vel":y_vel_validation,
    "y_expDate":y_expDate_validation,
    "y_Homecage":y_Homecage_validation,
    "y_time":y_time_validation,
    "mice":OFT_VALIDATION_MICE,
    "info":"This training set was hand selected by Dalton and processed on Feb 19 2022"
}
VALIDATION_FILE_NAME = "OFT_validation_dict_hand_picked.pkl"
with open(VALIDATION_FILE_NAME,"wb") as f:
    pickle.dump(validation_dict,f)

### Save Test Data

In [17]:
test_mask = np.zeros_like(y_time)
for mouse in OFT_TEST_MICE:
    test_mask = np.logical_or(test_mask,np.array(y_mouse)==mouse)

X_psd_test, X_ds_test = X_psd[test_mask==1], X_ds[test_mask==1]
y_mouse_test = np.array(y_mouse)[test_mask==1]
y_ROI_test = y_ROI[test_mask==1]
y_vel_test = y_vel[test_mask==1]
y_expDate_test = np.array(y_expDate)[test_mask==1]
y_Homecage_test = y_Homecage[test_mask==1]
y_time_test = np.array(y_time)[test_mask==1]

test_dict = {
    "X_psd":X_psd_test,
    "X_ds":X_ds_test,
    "y_mouse":y_mouse_test,
    "y_ROI":y_ROI_test,
    "y_vel":y_vel_test,
    "y_expDate":y_expDate_test,
    "y_Homecage":y_Homecage_test,
    "y_time":y_time_test,
    "mice":OFT_TEST_MICE,
    "info":"This training set was hand selected by Dalton and processed on Feb 19 2022" 
}

TEST_FILE_NAME = "OFT_test_dict_hand_picked.pkl"
with open(TEST_FILE_NAME,"wb") as f:
    pickle.dump(test_dict,f)

### Old Train Test Split Code

In [None]:
'''##OLD TRAIN TEST SPLIT CODE
print("Old mice that must be used for training: \n---------------------\n",data_dict["Old_Mice"])
print("\nNew Mice that can be split into training / validation / and test datasets: \n----------------------\n",data_dict["New_Mice"])
print("\nNumber of New Mice: ",np.unique(data_dict["New_Mice"]).shape[0])
print("Number of Old Mice: ",np.unique(data_dict["Old_Mice"]).shape[0])
print("Number of total Mice: ",data_dict["Num_Mice"])

print("Train perc: %0.2f, Validation perc: %0.2f, Holdout perc: %0.2f"%(0.6,0.1,0.3))
print("Num train mice: %d, Num val mice: %d, Num test mice: %d"%(0.6*31+1,0.1*31,0.3*31))

base_training_mice = data_dict["Old_Mice"]
train_mice,test_val_mice = train_test_split(data_dict["New_Mice"],test_size=12,random_state=RANDOM_STATE)
val_mice,test_mice = train_test_split(test_val_mice,test_size=9,random_state=RANDOM_STATE)

print("train_mice:\n-------------\n ",np.hstack([base_training_mice, train_mice]))
print("val_mice:\n-------------\n ",val_mice)
print("test_mice:\n-------------\n ",test_mice)

training_mice = np.hstack([base_training_mice, train_mice])
validation_mice = val_mice
test_mice = test_mice

# Set up the data and split it by training, validation and test mice
# Save a new dictionary in the local directory for the training/validation and test mice

X_psd = data_dict["X_power"]
X_ds = data_dict["X_directedSpectrum"]
y_mouse = data_dict["y_mouse"]
y_ROI = data_dict["y_ROI"]
y_vel = data_dict["y_avg_velocity"]
y_expDate = data_dict["y_expDate"]
y_Homecage = data_dict["y_Homecage"]
y_time = data_dict["y_time"]


#Make masks to isolate relevant windows
#Make corresponding dictionaries
#Save them in this directory
train_mask = np.zeros_like(y_time)
validation_mask = np.zeros_like(y_time)
test_mask = np.zeros_like(y_time)

for mouse in training_mice:
    train_mask = np.logical_or(train_mask,np.array(y_mouse)==mouse)
for mouse in validation_mice:
    validation_mask = np.logical_or(validation_mask,np.array(y_mouse)==mouse)
for mouse in test_mice:
    test_mask = np.logical_or(test_mask,np.array(y_mouse)==mouse)

X_psd_train, X_ds_train = X_psd[train_mask==1], X_ds[train_mask==1]
y_mouse_train = np.array(y_mouse)[train_mask==1]
y_ROI_train = y_ROI[train_mask==1]
y_vel_train = y_vel[train_mask==1]
y_expDate_train = np.array(y_expDate)[train_mask==1]
y_Homecage_train = y_Homecage[train_mask==1]
y_time_train = np.array(y_time)[train_mask==1]

train_dict = {
    "X_psd":X_psd_train,
    "X_ds":X_ds_train,
    "y_mouse":y_mouse_train,
    "y_ROI":y_ROI_train,
    "y_vel":y_vel_train,
    "y_expDate":y_expDate_train,
    "y_Homecage":y_Homecage_train,
    "y_time":y_time_train,
    "mice":training_mice
}

TRAIN_FILE_NAME = "OFT_train_dict.pkl"
with open(TRAIN_FILE_NAME,"wb") as f:
    pickle.dump(train_dict,f)

X_psd_validation, X_ds_validation = X_psd[validation_mask==1], X_ds[validation_mask==1]
y_mouse_validation = np.array(y_mouse)[validation_mask==1]
y_ROI_validation = y_ROI[validation_mask==1]
y_vel_validation = y_vel[validation_mask==1]
y_expDate_validation = np.array(y_expDate)[validation_mask==1]
y_Homecage_validation = y_Homecage[validation_mask==1]
y_time_validation = np.array(y_time)[validation_mask==1]

validation_dict = {
    "X_psd":X_psd_validation,
    "X_ds":X_ds_validation,
    "y_mouse":y_mouse_validation,
    "y_ROI":y_ROI_validation,
    "y_vel":y_vel_validation,
    "y_expDate":y_expDate_validation,
    "y_Homecage":y_Homecage_validation,
    "y_time":y_time_validation,
    "mice":validation_mice
}
VALIDATION_FILE_NAME = "OFT_validation_dict.pkl"
with open(VALIDATION_FILE_NAME,"wb") as f:
    pickle.dump(validation_dict,f)
    
X_psd_test, X_ds_test = X_psd[test_mask==1], X_ds[test_mask==1]
y_mouse_test = np.array(y_mouse)[test_mask==1]
y_ROI_test = y_ROI[test_mask==1]
y_vel_test = y_vel[test_mask==1]
y_expDate_test = np.array(y_expDate)[test_mask==1]
y_Homecage_test = y_Homecage[test_mask==1]
y_time_test = np.array(y_time)[test_mask==1]

test_dict = {
    "X_psd":X_psd_test,
    "X_ds":X_ds_test,
    "y_mouse":y_mouse_test,
    "y_ROI":y_ROI_test,
    "y_vel":y_vel_test,
    "y_expDate":y_expDate_test,
    "y_Homecage":y_Homecage_test,
    "y_time":y_time_test,
    "mice":test_mice
}

TEST_FILE_NAME = "OFT_test_dict.pkl"
with open(TEST_FILE_NAME,"wb") as f:
    pickle.dump(test_dict,f)'''