In [1]:
import pickle
import numpy as np
import torch
import matplotlib.pyplot as plt
import os
import sys
import sklearn
from sklearn.model_selection import train_test_split
DATA_PATH = "Z:\\Projects\\lpne\\DATA\\Pickle_Files_For_UMC\\ANX_FLX_data_2022_Jan_27.pkl"
UMC_PATH = "..\\Universal-Mouse-Code\\"
sys.path.append(DATA_PATH)
sys.path.append(UMC_PATH)
import umc_data_tools as umc_dt
#For Consistency
RANDOM_STATE=42

In [3]:
with open(DATA_PATH,"rb") as f:
    data_dict = pickle.load(f)
data_dict.keys()

dict_keys(['X_power', 'X_directedSpectrum', 'y_mouse', 'y_flx', 'y_saline', 'y_expDate', 'y_time', 'area', 'channel', 'powerFeatures', 'dsFeatures', 's', 'fs', 'preprocessVersion', 'powVersion', 'dsVersion', 'windowLength', 'channelArea', 'fsRaw', 'Old_Mice', 'New_Mice', 'Original_Label_Struct'])

In [6]:
X_psd = data_dict["X_power"]
X_ds = data_dict["X_directedSpectrum"]
y_mouse = data_dict["y_mouse"]
y_flx = data_dict["y_flx"]
y_expDate = data_dict["y_expDate"]
y_time = data_dict["y_time"]

#Isolate last 30 minutes of data
TIME_FILTER = 30*60
time_mask = np.array(data_dict['y_time']) > TIME_FILTER
X_psd_last_30 = data_dict['X_power'][time_mask==1]
X_ds_last_30 = data_dict['X_directedSpectrum'][time_mask==1]
y_mouse_last_30 = np.array(data_dict['y_mouse'])[time_mask==1]
y_time_last_30 = np.array(data_dict['y_time'])[time_mask==1]
y_flx_last_30 = np.array(data_dict['y_flx'])[time_mask==1]
y_expDate_last_30 = np.array(data_dict['y_expDate'])[time_mask==1]

In [2]:
FLX_TRAIN_MICE = [
    'Mouse3192', 
    'Mouse3193', 
    'Mouse3194', 
    'Mouse3202', 
    'Mouse3203',
    'Mouse99003'
    ]

FLX_VAL_MICE = [
    'Mouse3191'
    ]

FLX_TEST_MICE = [
    'Mouse69061',
    'Mouse99002',
    'Mouse99021']

### Fluoxetine Training Data Saving

In [21]:
#Save Full Training Data
train_mask = np.zeros_like(y_time)
for mouse in FLX_TRAIN_MICE:
    mouse_idxs = np.array(y_mouse)==mouse
    train_mask = np.logical_or(train_mask,mouse_idxs)

X_psd_train_full, X_ds_train_full = X_psd[train_mask==1], X_ds[train_mask==1]
y_mouse_train_full = np.array(y_mouse)[train_mask==1]
y_expDate_train_full = np.array(y_expDate)[train_mask==1]
y_time_train_full = np.array(y_time)[train_mask==1]
y_flx_train_full = np.array(y_flx)[train_mask==1]


train_dict_full = {
    "X_psd":X_psd_train_full,
    "X_ds":X_ds_train_full,
    "y_mouse":y_mouse_train_full,
    "y_expDate":y_expDate_train_full,
    "y_time":y_time_train_full,
    "mice":FLX_TRAIN_MICE,
    "y_flx":y_flx_train_full,
    "note": "Full dataset of FLX training data"
}

TRAIN_FILE_NAME_FULL = "FLX_train_dict_full_recordings.pkl"
with open(TRAIN_FILE_NAME_FULL,"wb") as f:
    pickle.dump(train_dict_full,f)

#Save Only last 30 minutes relevant to training the model.
train_mask_last_30 = np.zeros_like(y_time_last_30)
for mouse in FLX_TRAIN_MICE:
    mouse_idxs = np.array(y_mouse_last_30)==mouse
    train_mask_last_30 = np.logical_or(train_mask_last_30,mouse_idxs)

X_psd_train_last_30, X_ds_train_last_30 = X_psd_last_30[train_mask_last_30==1], X_ds_last_30[train_mask_last_30==1]
y_mouse_train_last_30 = np.array(y_mouse_last_30)[train_mask_last_30==1]
y_expDate_train_last_30 = np.array(y_expDate_last_30)[train_mask_last_30==1]
y_time_train_last_30 = np.array(y_time_last_30)[train_mask_last_30==1]
y_flx_train_last_30 = np.array(y_flx_last_30)[train_mask_last_30==1]

train_dict_last_30 = {
    "X_psd":X_psd_train_last_30,
    "X_ds":X_ds_train_last_30,
    "y_mouse":y_mouse_train_last_30,
    "y_expDate":y_expDate_train_last_30,
    "y_time":y_time_train_last_30,
    "mice":FLX_TRAIN_MICE,
    "y_flx":y_flx_train_last_30,
    "note": "Last 30 minutes of FLX training data"
}

TRAIN_FILE_NAME_LAST_30 = "FLX_train_dict_last_30.pkl"
with open(TRAIN_FILE_NAME_LAST_30,"wb") as f:
    pickle.dump(train_dict_last_30,f)

### Fluoxetine Validation Data Saving

In [32]:
#Save Full Training Data
validation_mask = np.zeros_like(y_time)
for mouse in FLX_VAL_MICE:
    mouse_idxs = np.array(y_mouse)==mouse
    validation_mask = np.logical_or(validation_mask,mouse_idxs)

X_psd_val_full, X_ds_val_full = X_psd[validation_mask==1], X_ds[validation_mask==1]
y_mouse_val_full = np.array(y_mouse)[validation_mask==1]
y_expDate_val_full = np.array(y_expDate)[validation_mask==1]
y_time_val_full = np.array(y_time)[validation_mask==1]
y_flx_val_full = np.array(y_flx)[validation_mask==1]


val_dict_full = {
    "X_psd":X_psd_val_full,
    "X_ds":X_ds_val_full,
    "y_mouse":y_mouse_val_full,
    "y_expDate":y_expDate_val_full,
    "y_time":y_time_val_full,
    "mice":FLX_VAL_MICE,
    "y_flx":y_flx_val_full,
    "note": "Full dataset of FLX validation data"
}

VALIDATION_FILE_NAME_FULL = "FLX_val_dict_full_recordings.pkl"
with open(VALIDATION_FILE_NAME_FULL,"wb") as f:
    pickle.dump(val_dict_full,f)


#Save Only last 30 minutes relevant to training the model.
val_mask_last_30 = np.zeros_like(y_time_last_30)
for mouse in FLX_VAL_MICE:
    mouse_idxs = np.array(y_mouse_last_30)==mouse
    val_mask_last_30 = np.logical_or(val_mask_last_30,mouse_idxs)

X_psd_val_last_30, X_ds_val_last_30 = X_psd_last_30[val_mask_last_30==1], X_ds_last_30[val_mask_last_30==1]
y_mouse_val_last_30 = np.array(y_mouse_last_30)[val_mask_last_30==1]
y_expDate_val_last_30 = np.array(y_expDate_last_30)[val_mask_last_30==1]
y_time_val_last_30 = np.array(y_time_last_30)[val_mask_last_30==1]
y_flx_val_last_30 = np.array(y_flx_last_30)[val_mask_last_30==1]

val_dict_last_30 = {
    "X_psd":X_psd_val_last_30,
    "X_ds":X_ds_val_last_30,
    "y_mouse":y_mouse_val_last_30,
    "y_expDate":y_expDate_val_last_30,
    "y_time":y_time_val_last_30,
    "mice":FLX_VAL_MICE,
    "y_flx":y_flx_val_last_30,
    "note": "Last 30 minutes of FLX validation data"
}

VALIDATION_FILE_NAME_LAST_30 = "FLX_val_dict_last_30.pkl"
with open(VALIDATION_FILE_NAME_LAST_30,"wb") as f:
    pickle.dump(val_dict_last_30,f)

### Fluoxetine Test Dataset Saving

In [33]:
#Save Full Training Data
test_mask = np.zeros_like(y_time)
for mouse in FLX_TEST_MICE:
    mouse_idxs = np.array(y_mouse)==mouse
    test_mask = np.logical_or(test_mask,mouse_idxs)

X_psd_test_full, X_ds_test_full = X_psd[test_mask==1], X_ds[test_mask==1]
y_mouse_test_full = np.array(y_mouse)[test_mask==1]
y_expDate_test_full = np.array(y_expDate)[test_mask==1]
y_time_test_full = np.array(y_time)[test_mask==1]
y_flx_test_full = np.array(y_flx)[test_mask==1]


test_dict_full = {
    "X_psd":X_psd_test_full,
    "X_ds":X_ds_test_full,
    "y_mouse":y_mouse_test_full,
    "y_expDate":y_expDate_test_full,
    "y_time":y_time_test_full,
    "mice":FLX_TEST_MICE,
    "y_flx":y_flx_test_full,
    "note": "Full dataset of FLX test data"
}

TEST_FILE_NAME_FULL = "FLX_test_dict_full_recordings.pkl"
with open(TEST_FILE_NAME_FULL,"wb") as f:
    pickle.dump(test_dict_full,f)

#Save Only last 30 minutes relevant to training the model.
test_mask_last_30 = np.zeros_like(y_time_last_30)
for mouse in FLX_VAL_MICE:
    mouse_idxs = np.array(y_mouse_last_30)==mouse
    test_mask_last_30 = np.logical_or(test_mask_last_30,mouse_idxs)

X_psd_test_last_30, X_ds_test_last_30 = X_psd_last_30[test_mask_last_30==1], X_ds_last_30[test_mask_last_30==1]
y_mouse_test_last_30 = np.array(y_mouse_last_30)[test_mask_last_30==1]
y_expDate_test_last_30 = np.array(y_expDate_last_30)[test_mask_last_30==1]
y_time_test_last_30 = np.array(y_time_last_30)[test_mask_last_30==1]
y_flx_test_last_30 = np.array(y_flx_last_30)[test_mask_last_30==1]

test_dict_last_30 = {
    "X_psd":X_psd_test_last_30,
    "X_ds":X_ds_test_last_30,
    "y_mouse":y_mouse_test_last_30,
    "y_expDate":y_expDate_test_last_30,
    "y_time":y_time_test_last_30,
    "mice":FLX_TEST_MICE,
    "y_flx":y_flx_test_last_30,
    "note": "Last 30 minutes of FLX test data"
}

TEST_FILE_NAME_LAST_30 = "FLX_test_dict_last_30.pkl"
with open(TEST_FILE_NAME_LAST_30,"wb") as f:
    pickle.dump(test_dict_last_30,f)

### OLD TRAIN TEST SPLIT DATA

In [None]:
'''#OLD TRAIN TEST SPLIT CODE
with open(DATA_PATH,"rb") as f:
    data_dict = pickle.load(f)

data_dict.keys()

##Get info about the data
print("Old mice that must be used for training: \n---------------------\n",data_dict["Old_Mice"])
print("\nNew Mice that can be split into training / validation / and test datasets: \n----------------------\n",data_dict["New_Mice"])
print("\nNumber of New Mice: ",np.unique(data_dict["New_Mice"]).shape[0])
print("Number of Old Mice: ",np.unique(data_dict["Old_Mice"]).shape[0])
print("Number of total Mice: ",np.unique(data_dict["New_Mice"]).shape[0]+np.unique(data_dict["Old_Mice"]).shape[0])

print("Train perc: %0.2f, Validation perc: %0.2f, Holdout perc: %0.2f"%(0.6,0.1,0.3))
print("Num train mice: %d, Num val mice: %d, Num test mice: %d"%(0.6*31+1,0.1*31,0.3*31))

train_mice = ['Mouse3191', 'Mouse3192', 'Mouse3193', 'Mouse3194', 'Mouse3202', 'Mouse3203']
val_mice = ['Mouse99003']
test_mice = ['Mouse69061','Mouse99002','Mouse99021']

print("train_mice:\n-------------\n ",train_mice)
print("val_mice:\n-------------\n ",val_mice)
print("test_mice:\n-------------\n ",test_mice)

#Isolate last 30 minutes of data
TIME_FILTER = 30*60
time_mask = np.array(data_dict['y_time']) > TIME_FILTER
X_psd_last_30 = data_dict['X_power'][time_mask==1]
X_ds_last_30 = data_dict['X_directedSpectrum'][time_mask==1]
y_mouse_last_30 = np.array(data_dict['y_mouse'])[time_mask==1]
y_time_last_30 = np.array(data_dict['y_time'])[time_mask==1]
y_flx_last_30 = np.array(data_dict['y_flx'])[time_mask==1]
y_expDate_last_30 = np.array(data_dict['y_expDate'])[time_mask==1]

X_psd_first_30 = data_dict['X_power'][time_mask==0]
X_ds_first_30 = data_dict['X_directedSpectrum'][time_mask==0]
y_mouse_first_30 = np.array(data_dict['y_mouse'])[time_mask==0]
y_time_first_30 = np.array(data_dict['y_time'])[time_mask==0]
y_flx_first_30 = np.array(data_dict['y_flx'])[time_mask==0]
y_expDate_first_30 = np.array(data_dict['y_expDate'])[time_mask==0]

#Make masks to isolate relevant windows
#Make corresponding dictionaries
#Save them in this directory
train_mask_last_30 = np.zeros_like(y_time_last_30)
validation_mask_last_30 = np.zeros_like(y_time_last_30)
test_mask_last_30 = np.zeros_like(y_time_last_30)

train_mask_first_30 = np.zeros_like(y_time_first_30)
validation_mask_first_30 = np.zeros_like(y_time_first_30)
test_mask_first_30 = np.zeros_like(y_time_first_30)

train_mask = np.zeros_like(data_dict['y_time'])
validation_mask = np.zeros_like(data_dict['y_time'])
test_mask = np.zeros_like(data_dict['y_time'])

for mouse in train_mice:
    train_mask = np.logical_or(train_mask,np.array(data_dict['y_mouse'])==mouse)
for mouse in train_mice:
    train_mask_last_30 = np.logical_or(train_mask_last_30,np.array(y_mouse_last_30)==mouse)
for mouse in train_mice:
    train_mask_first_30 = np.logical_or(train_mask_first_30,np.array(y_mouse_first_30)==mouse)


X_psd_train_full, X_ds_train_full = data_dict['X_power'][train_mask==1], data_dict['X_directedSpectrum'][train_mask==1]
y_mouse_train_full = np.array(data_dict['y_mouse'])[train_mask==1]
y_expDate_train_full = np.array(data_dict['y_expDate'])[train_mask==1]
y_time_train_full = np.array(data_dict['y_time'])[train_mask==1]
y_flx_train_full = np.array(data_dict['y_flx'])[train_mask==1]

X_psd_train, X_ds_train = X_psd_last_30[train_mask_last_30==1], X_ds_last_30[train_mask_last_30==1]
y_mouse_train = np.array(y_mouse_last_30)[train_mask_last_30==1]
y_expDate_train = np.array(y_expDate_last_30)[train_mask_last_30==1]
y_time_train = np.array(y_time_last_30)[train_mask_last_30==1]
y_flx_train = np.array(y_flx_last_30)[train_mask_last_30==1]

X_psd_train_first_30, X_ds_train_first_30 = X_psd_first_30[train_mask_first_30==1], X_ds_first_30[train_mask_first_30==1]
y_mouse_train_first_30 = np.array(y_mouse_first_30)[train_mask_first_30==1]
y_expDate_train_first_30 = np.array(y_expDate_first_30)[train_mask_first_30==1]
y_time_train_first_30 = np.array(y_time_first_30)[train_mask_first_30==1]
y_flx_train_first_30 = np.array(y_flx_first_30)[train_mask_first_30==1]

train_dict = {
    "X_psd":X_psd_train,
    "X_psd_first_30":X_psd_train_first_30,
    "X_ds":X_ds_train,
    "X_ds_first_30":X_ds_train_first_30,
    "y_mouse":y_mouse_train,
    "y_mouse_first_30":y_mouse_train_first_30,
    "y_expDate":y_expDate_train,
    "y_expDate_first_30":y_expDate_train_first_30,
    "y_time":y_time_train,
    "y_time_first_30":y_time_train_first_30,
    "mice":train_mice,
    "y_flx":y_flx_train,
    "y_flx_train_first_30":y_flx_train_first_30,
    "X_psd_full":X_psd_train_full,
    "X_ds_full":X_ds_train_full,
    "y_mouse_full":y_mouse_train_full,
    "y_expDate_full":y_expDate_train_full,
    "y_time_full":y_time_train_full,
    "y_flx_full":y_flx_train_full
}

TRAIN_FILE_NAME="FLX_train_dict.pkl"
with open(TRAIN_FILE_NAME,"wb") as f:
    pickle.dump(train_dict,f)

for mouse in val_mice:
    validation_mask = np.logical_or(validation_mask,np.array(data_dict['y_mouse'])==mouse)
for mouse in val_mice:
    validation_mask_last_30 = np.logical_or(validation_mask_last_30,np.array(y_mouse_last_30)==mouse)
for mouse in val_mice:
    validation_mask_first_30 = np.logical_or(validation_mask_first_30,np.array(y_mouse_first_30)==mouse)


X_psd_validation_full, X_ds_validation_full = data_dict['X_power'][validation_mask==1], data_dict['X_directedSpectrum'][validation_mask==1]
y_mouse_validation_full = np.array(data_dict['y_mouse'])[validation_mask==1]
y_expDate_validation_full = np.array(data_dict['y_expDate'])[validation_mask==1]
y_time_validation_full = np.array(data_dict['y_time'])[validation_mask==1]
y_flx_validation_full = np.array(data_dict['y_flx'])[validation_mask==1]

X_psd_validation, X_ds_validation = X_psd_last_30[validation_mask_last_30==1], X_ds_last_30[validation_mask_last_30==1]
y_mouse_validation = np.array(y_mouse_last_30)[validation_mask_last_30==1]
y_expDate_validation = np.array(y_expDate_last_30)[validation_mask_last_30==1]
y_time_validation = np.array(y_time_last_30)[validation_mask_last_30==1]
y_flx_validation = np.array(y_flx_last_30)[validation_mask_last_30==1]

X_psd_validation_first_30, X_ds_validation_first_30 = X_psd_first_30[validation_mask_first_30==1], X_ds_first_30[validation_mask_first_30==1]
y_mouse_validation_first_30 = np.array(y_mouse_first_30)[validation_mask_first_30==1]
y_expDate_validation_first_30 = np.array(y_expDate_first_30)[validation_mask_first_30==1]
y_time_validation_first_30 = np.array(y_time_first_30)[validation_mask_first_30==1]
y_flx_validation_first_30 = np.array(y_flx_first_30)[validation_mask_first_30==1]

validation_dict = {
    "X_psd":X_psd_validation,
    "X_ds":X_ds_validation,
    "y_mouse":y_mouse_validation,
    "y_expDate":y_expDate_validation,
    "y_time":y_time_validation,
    "mice":val_mice,
    "y_flx":y_flx_validation,
    "X_psd_first_30":X_psd_validation_first_30,
    "X_ds_first_30":X_ds_validation_first_30,
    "y_mouse_first_30":y_mouse_validation_first_30,
    "y_expDate_first_30":y_expDate_validation_first_30,
    "y_time_first_30":y_time_validation_first_30,
    "y_flx_train_first_30":y_flx_validation_first_30,
    "X_psd_full":X_psd_validation_full,
    "X_ds_full":X_ds_validation_full,
    "y_mouse_full":y_mouse_validation_full,
    "y_expDate_full":y_expDate_validation_full,
    "y_time_full":y_time_validation_full,
    "y_flx_full":y_flx_validation_full
}

VALIDATION_FILE_NAME = "FLX_validation_dict.pkl"
with open(VALIDATION_FILE_NAME,"wb") as f:
    pickle.dump(validation_dict,f)


for mouse in test_mice:
    test_mask = np.logical_or(test_mask,np.array(data_dict['y_mouse'])==mouse)
for mouse in test_mice:
    test_mask_last_30 = np.logical_or(test_mask_last_30,np.array(y_mouse_last_30)==mouse)
for mouse in test_mice:
    test_mask_first_30 = np.logical_or(test_mask_first_30,np.array(y_mouse_first_30)==mouse)

X_psd_test_full, X_ds_test_full = data_dict['X_power'][test_mask==1], data_dict['X_directedSpectrum'][test_mask==1]
y_mouse_test_full = np.array(data_dict['y_mouse'])[test_mask==1]
y_expDate_test_full = np.array(data_dict['y_expDate'])[test_mask==1]
y_time_test_full = np.array(data_dict['y_time'])[test_mask==1]
y_flx_test_full = np.array(data_dict['y_flx'])[test_mask==1]

X_psd_test, X_ds_test = X_psd_last_30[test_mask_last_30==1], X_ds_last_30[test_mask_last_30==1]
y_mouse_test = np.array(y_mouse_last_30)[test_mask_last_30==1]
y_expDate_test = np.array(y_expDate_last_30)[test_mask_last_30==1]
y_time_test = np.array(y_time_last_30)[test_mask_last_30==1]
y_flx_test = np.array(y_flx_last_30)[test_mask_last_30==1]

X_psd_test_first_30, X_ds_test_first_30 = X_psd_first_30[test_mask_first_30==1], X_ds_first_30[test_mask_first_30==1]
y_mouse_test_first_30 = np.array(y_mouse_first_30)[test_mask_first_30==1]
y_expDate_test_first_30 = np.array(y_expDate_first_30)[test_mask_first_30==1]
y_time_test_first_30 = np.array(y_time_first_30)[test_mask_first_30==1]
y_flx_test_first_30 = np.array(y_flx_first_30)[test_mask_first_30==1]

test_dict = {
    "X_psd":X_psd_test,
    "X_ds":X_ds_test,
    "y_mouse":y_mouse_test,
    "y_expDate":y_expDate_test,
    "y_time":y_time_test,
    "mice":test_mice,
    "y_flx":y_flx_test,
    "X_psd_first_30":X_psd_test_first_30,
    "X_ds_first_30":X_ds_test_first_30,
    "y_mouse_first_30":y_mouse_test_first_30,
    "y_expDate_first_30":y_expDate_test_first_30,
    "y_time_first_30":y_time_test_first_30,
    "y_flx_train_first_30":y_flx_test_first_30,
    "X_psd_full":X_psd_test_full,
    "X_ds_full":X_ds_test_full,
    "y_mouse_full":y_mouse_test_full,
    "y_expDate_full":y_expDate_test_full,
    "y_time_full":y_time_test_full,
    "y_flx_full":y_flx_test_full
}

TEST_FILE_NAME = "FLX_test_dict.pkl"
with open(TEST_FILE_NAME,"wb") as f:
    pickle.dump(test_dict,f)'''