In [1]:
import pandas as pd
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import joblib
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
from isaac.utils import get_cuda_device_if_available
device = get_cuda_device_if_available()
print(device)

cuda:0


In [3]:
from isaac.dataset import read_dataset, prepare_dataset, sample_subsequences
from isaac.utils import plot_confusion_matrix
from isaac.models import ComplexRNNModel
from isaac.constants import *
from isaac.training import evaluate, training_loop, evaluate_saved_model
from isaac.sanity import class_proportions

In [4]:
BATCH_SIZE = 128
EPOCHS = 100
NORMALISE_DATA = True
SEQ_END = 1800

# Define common model, loss and optimizer

In [5]:
ALL_COLS = BASIC_TRAINING_COLS + PUCK_SQUARE_DISTANCES + PUCK_ANGLE_FEATURES

train_trials = read_dataset("data/train_passive_trials.h5", n_trials=3500, cols=ALL_COLS)
val_trials = read_dataset("data/val_passive_trials.h5", n_trials=900, cols=ALL_COLS)

100%|██████████| 10/10 [00:00<00:00, 110.01it/s]
100%|██████████| 10/10 [00:00<00:00, 113.38it/s]


In [6]:
question_type = "mass"

In [7]:
if question_type == "mass":    
    class_columns = MASS_CLASS_COLS
    TR_COLS = BASIC_TRAINING_COLS + PUCK_SQUARE_DISTANCES + PUCK_ANGLE_FEATURES
    STEP_SIZE = 3
else:
    class_columns = FORCE_CLASS_COLS
    TR_COLS = BASIC_TRAINING_COLS
    STEP_SIZE = 4

In [8]:
INPUT_DIM = len(TR_COLS)    # input dimension
HIDDEN_DIM = 25  # hidden layer dimension
N_LAYERS = 4     # number of hidden layers
OUTPUT_DIM = 3   # output dimension
DROPOUT = 0.5

network_params = (INPUT_DIM, HIDDEN_DIM, N_LAYERS, OUTPUT_DIM, DROPOUT)

# TRAINING

In [9]:
class TLModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, output_dim, dropout=0., pretrained_layer=None, cell_type=nn.GRU):
        super(TLModel, self).__init__()
        self.rec_layer = pretrained_layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        out, _ = self.rec_layer(x)
        out = self.fc(out[:, -1, :]) 
        return out
    
def initialise_model(question_type, network_params, lr=0.01, seed=0, device=torch.device("cpu")):
    if question_type == "mass":
        model_type = "force"
    else:
        model_type = "mass"
        
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    network_params = (len(TR_COLS), HIDDEN_DIM, N_LAYERS, OUTPUT_DIM, DROPOUT)
    model_path = "models/passive_"+model_type+"_model.pt"
    pretrained_model = ComplexRNNModel(*network_params).to(device=device)
    pretrained_model.load_state_dict(torch.load(model_path))

    tl_model = TLModel(*network_params, pretrained_layer=pretrained_model.rec_layer).to(device=device)
    optimizer = torch.optim.Adam(
        [
            {"params": list(tl_model.rec_layer.parameters())[:8], "lr": 0.0},
            {"params": list(tl_model.rec_layer.parameters())[8:], "lr": 0.01},
            {"params": tl_model.fc.parameters(), "lr": 0.01},
        ])

    error = nn.CrossEntropyLoss()
    
    return tl_model, error, optimizer

In [10]:
stats_dfs = []


if question_type == "mass":
    scaler = joblib.load("scalers/passive_force_scaler.sk")
else:
    scaler = joblib.load("scalers/passive_mass_scaler.sk")
    
loaders, scaler = prepare_dataset([train_trials, val_trials], class_columns=class_columns, 
                                  training_columns=TR_COLS, batch_size=BATCH_SIZE, 
                                  normalise_data=NORMALISE_DATA, scaler=scaler, device=device)
best_model_overall = None
best_accuracy = 0.

for seed in [0, 42, 72]:
    df = pd.DataFrame(columns=["seed", "Epoch", "Loss"])


    model, error, optimizer = initialise_model(question_type, network_params, lr=0.01, seed=seed, device=device)
    epoch_losses, epoch_accuracies, best_model = training_loop(model, optimizer, error, loaders[0], 
                                                               loaders[1], EPOCHS, seq_end=SEQ_END, 
                                                               step_size=STEP_SIZE)

    model_accuracy = max(epoch_accuracies[1])
    if model_accuracy > best_accuracy:
        best_model_overall = best_model
        best_accuracy = model_accuracy
    
    df["Epoch"] = np.arange(EPOCHS)
    df["Loss"] = epoch_losses
    df["Train Accuracy"] = epoch_accuracies[0]
    df["Val Accuracy"] = epoch_accuracies[1]
    df["seed"] = str(seed)
    stats_dfs.append(df)
        
stats = pd.concat(stats_dfs)

100%|██████████| 10/10 [00:00<00:00, 276.98it/s]
100%|██████████| 10/10 [00:00<00:00, 1073.67it/s]
Train_loss (1.15)	 Train_acc (60.00)	 Val_acc (30.00): 100%|██████████| 2/2 [00:00<00:00,  3.90it/s] 
Train_loss (1.22)	 Train_acc (60.00)	 Val_acc (50.00): 100%|██████████| 2/2 [00:00<00:00,  4.42it/s] 
Train_loss (1.31)	 Train_acc (50.00)	 Val_acc (30.00): 100%|██████████| 2/2 [00:00<00:00,  4.39it/s] 


In [11]:
stats.to_hdf("transfer_learning_plots/"+question_type+"_stats.h5", key="stats")

## Save model and scaler

In [12]:
torch.save(best_model_overall.state_dict(), "models/tl_"+question_type+"_model.pt")
joblib.dump(scaler, "scalers/tl_"+question_type+"_scaler.sk")

['scalers/tl_mass_scaler.sk']