This notebook is to half-manually collect accurate and inaccurate meal data with clusters of KMeans algorithm. 

We generally observe some raw data and the cluster visualizaion with PCA in one cluster of KMeans and decide if this cluster can be considered as an accurate meal cluster.

Then, we trained two models on these semi-labelled data to identify the accuracy pattern for an accurate meal.

In [3]:
import sys
sys.path.insert(0, '../scripts')

import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from meal_classifiers import *
from unsupervised_helpers import *
import numpy as np
from path import rev_cask_raw, rev_ctrl_raw

time_threshold = 60
pellet_count_threshold = 2

In [None]:
ctrl_data = extract_data_full_group('../data/reversal_ctrl.xlsx', sheets=rev_ctrl_raw)

In [None]:
exp_data = extract_data_full_group('../data/reversal_cask.xlsx', sheets=rev_cask_raw)

In [6]:
ctrl_input = dictionary2dataset(ctrl_data)
exp_input = dictionary2dataset(exp_data)

### Control Data 3-Pellet

In [None]:
find_k_by_elbow(ctrl_data[3])

In [None]:
model, meals_by_category = fit_model_single(ctrl_data[3], k=4)

In [None]:
meals_by_category[2][:20]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [0, 3])
update_data('./CASK_ctrl_good.pkl', new_good)
update_data('./CASK_ctrl_bad.pkl', new_bad)

#### Control Data 4-Pellet

In [None]:
find_k_by_elbow(ctrl_data[4])

In [None]:
model, meals_by_category = fit_model_single(ctrl_data[4], k=7)

In [None]:
meals_by_category[1][:20]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [1, 4])
update_data('./CASK_ctrl_good.pkl', new_good)
update_data('./CASK_ctrl_bad.pkl', new_bad)

#### Control Data 5-Pellet

In [None]:
find_k_by_elbow(ctrl_data[5])

In [None]:
model, meals_by_category = fit_model_single(ctrl_data[5], k=8)

In [None]:
meals_by_category[8][:20]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [1, 3, 7])
update_data('./CASK_ctrl_good.pkl', new_good)
update_data('./CASK_ctrl_bad.pkl', new_bad)

In [None]:
463/(463+360)

#### CASK Data 3-Pellet

In [None]:
find_k_by_elbow(exp_data[3])

In [None]:
model, meals_by_category = fit_model_single(exp_data[3], k=6)

In [None]:
meals_by_category[5][:10]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [0, 3])
update_data('./CASK_exp_good.pkl', new_good)
update_data('./CASK_exp_bad.pkl', new_bad)

#### CASK Data 4-Pellet

In [None]:
find_k_by_elbow(exp_data[4])

In [None]:
model, meals_by_category = fit_model_single(exp_data[4], k=9)

In [None]:
meals_by_category[6][:20]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [2, 6])
update_data('./CASK_exp_good.pkl', new_good)
update_data('./CASK_exp_bad.pkl', new_bad)

#### CASK Data 5-Pellet

In [None]:
find_k_by_elbow(exp_data[5])

In [None]:
model, meals_by_category = fit_model_single(exp_data[5], k=12)

In [None]:
meals_by_category[12][:20]

In [None]:
new_good, new_bad = collect_meals_from_categories(meals_by_category, [1, 3, 6])
update_data('./CASK_exp_good.pkl', new_good)
update_data('./CASK_exp_bad.pkl', new_bad)

In [None]:
1107/(1107+976)

### Model Training

In [4]:
# good meals are class 0
ctrl_X, ctrl_y = create_dataset_single_group(experiment='CASK', ctrl=True)
exp_X, exp_y = create_dataset_single_group(experiment='CASK', ctrl=False)
np.savez('CASK_data.npz', ctrl_X=ctrl_X, ctrl_y=ctrl_y, exp_X=exp_X, exp_y=exp_y)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
data = np.load('CASK_data.npz')
ctrl_X, ctrl_y, exp_X, exp_y = data['ctrl_X'], data['ctrl_y'], data['exp_X'], data['exp_y']
X, y = merge_dataset(ctrl_X, ctrl_y, exp_X, exp_y)
ctrl_input = torch.Tensor(ctrl_X).to(device)
exp_input = torch.Tensor(exp_X).to(device)

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=0.1)
print(f'Train Size: {len(y_train)},  Test Size: {len(y_test)}')

In [3]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

lr = 0.0001
batch_size = 256
num_epochs = 200
train_dataset = TimeSeriesDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
model = RNNClassifier(input_size=1, hidden_size=400, num_layers=2, num_classes=2).to(device)
model = train(model, lr, num_epochs, train_loader, X_test_tensor, y_test_tensor)

In [None]:
lr = 0.001
model = CNNClassifier(num_classes=2, maxlen=4).to(device)
model = train(model, lr, num_epochs, train_loader, X_test_tensor, y_test_tensor)

In [None]:
count_parameters(model)

In [None]:
model = RNNClassifier(input_size=1, hidden_size=400, num_layers=2, num_classes=2).to(device)
model.load_state_dict(torch.load('../data/LSTM_from_CASK.pth'))
evaluate_meals_by_groups(model, ctrl_input, ctrl_y, exp_input, exp_y)

In [None]:
model = CNNClassifier(num_classes=2, maxlen=4).to(device)
model.load_state_dict(torch.load('./CNN_from_CASK.pth'))
evaluate_meals_by_groups(model, ctrl_input, ctrl_y, exp_input, exp_y)

In [None]:
model = RNNClassifier(input_size=1, hidden_size=400, num_layers=2, num_classes=2).to(device)
model.load_state_dict(torch.load('../data/LSTM_from_CASK.pth'))
evaluate_meals_on_new_data(model, ctrl_input.to(device), exp_input.to(device))

In [None]:
model = CNNClassifier(num_classes=2, maxlen=4).to(device)
model.load_state_dict(torch.load('../data/CNN_from_CASK.pth'))
evaluate_meals_on_new_data(model, ctrl_input.to(device), exp_input.to(device))

In [23]:
torch.save(model.state_dict(), './CNN_from_CASK.pth')