# Scale Test

In [9]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import pickle as pickle
import pandas as pd
import numpy as np

import copy

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset

from src.trainer import CaseDataSet
from src.model import DLModels
from src.trainer import Trainer

from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import importlib.util
torch_device = "cpu"
device_package = torch.cpu
if importlib.util.find_spec("torch.backends.mps") is not None:
    if torch.backends.mps.is_available():
        torch_device = torch.device("mps")
        device_package = torch.mps
if torch.cuda.is_available():
    torch_device = torch.device("cuda")
    device_package = torch.cuda
    
torch_device

device(type='cuda')

In [10]:
def train_scale_test_model(datasets_dict, model_hyper_para, trainer_hyper_para,
                       scale, embedding, print_iter=False, file_suffix=""):
    torch_device = "cpu"
    device_package = torch.cpu
    if importlib.util.find_spec("torch.backends.mps") is not None:
        if torch.backends.mps.is_available():
            torch_device = torch.device("mps")
            device_package = torch.mps
    if torch.cuda.is_available():
        torch_device = torch.device("cuda")
        device_package = torch.cuda
        
    current_model_para = model_hyper_para[embedding]
    # Instantiate the model
    model = DLModels.SimpleLSTM(current_model_para["input_size"],
                                current_model_para["hidden_size"],
                                current_model_para["num_layers"],
                                1).to(torch_device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=trainer_hyper_para["learning_rate"])
    
    # Load data sets
    target_data_sets = datasets_dict[scale][embedding]
    

    
    # print(torch_device)
    # print("training model with target data")
    model_source, train_loss_source, val_loss_srouce = Trainer.train_model(model, optimizer,
                                                                           None, None,
                                                                           target_data_sets["train"],
                                                                           target_data_sets["val"],
                                                                           trainer_hyper_para["batch_size"],
                                                                           torch_device,
                                                                           device_package,
                                                                           Trainer.prefix_weighted_loss,
                                                                           trainer_hyper_para["max_epoch"],
                                                                           trainer_hyper_para["max_ob_iter"],
                                                                           trainer_hyper_para["score_margin"],
                                                                           print_iter=print_iter)
    
    
    model_name = "LSTM_T_h" + str(current_model_para["hidden_size"]) + "_l" + str(current_model_para["num_layers"]) + "_" + embedding + "_" + scale
    torch.save(model_source, "../../Model/scale/LSTM/"+ model_name + "_" +  file_suffix + ".LSTM")
    
    training_stat = pd.DataFrame(columns=["TrainingLoss", "ValidationLoss"],
                                 data=np.hstack([train_loss_source.reshape((-1, 1)),
                                                 val_loss_srouce.reshape((-1, 1))]))
    training_stat.to_pickle("../../Model/scale/LSTM/"+ model_name + "_" + file_suffix + "_stat.pkl")
    
    
    # print("Finished training model with target data")

In [11]:
trainer_hyper_para = {"max_epoch": 100,
                      "max_ob_iter": 20,
                      "score_margin": 1,
                      "num_class": 1,
                      "num_layers": 1,
                      "learning_rate": 1e-3,
                      "batch_size": 4000,
                      "training_loss_func": "CCM",
                      "eval_loss_func": "CCM"}

model_hyper_para = {"w2v": {"input_size": 51,
                            "hidden_size": 128,
                            "num_layers": 1},
                    "st": {"input_size": 385,
                           "hidden_size": 512,
                           "num_layers": 1},
                    "onehot": {"input_size": 48,
                               "hidden_size": 128,
                               "num_layers": 1}}


scale_ratio = ["0.01" ,"0.05", "0.1", "0.5"]
data_set_type = ["source", "target"]
data_set_cat = ["train", "val"]
embedding_type = ["w2v", "st", "onehot"]
dataset_list_index = [scale_ratio, embedding_type, data_set_type, data_set_cat]

earliness_requirement = True
folder_path = "../../Data/Training/scale/"

In [12]:
data_sets_list = {}
for scale_num in scale_ratio:
    print("loading data scale: ", scale_num)
    data_scale_list = {}
    for embedding in embedding_type:
        data_embedding_list = {}
        for d_cat in data_set_cat:       
            data_version = "_" + d_cat + "_" + scale_num
            embedding_version = "_" + embedding                
            case_data_set = CaseDataSet.CaseDataset(project_data_path=folder_path,
                                                    split_pattern="",
                                                    input_data="target",
                                                    data_version=data_version,
                                                    embedding_version=embedding_version,
                                                    earliness_requirement=earliness_requirement)
            data_embedding_list[d_cat] = case_data_set
        data_scale_list[embedding] = data_embedding_list
    data_sets_list[scale_num] = data_scale_list

loading data scale:  0.01
loading data scale:  0.05
loading data scale:  0.1
loading data scale:  0.5


In [17]:
for i in range(6,10):
    print("Training iteration: ", i)
    train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           "0.5", "w2v", print_iter=False, file_suffix="r"+str(i))

Training iteration:  6
Training iteration:  7
Training iteration:  8
Training iteration:  9


In [None]:
for i in range(6,10):
    print("Training iteration: ", i)
    train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           "0.5", "st", print_iter=False, file_suffix="r"+str(i))
    train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           "0.5", "onehot", print_iter=False, file_suffix="r"+str(i))

In [15]:
for scale_num in scale_ratio:
    for i in range(10):
        print("Training iteration: ", i)
        train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           scale_num, "st", print_iter=False, file_suffix="r"+str(i))
        train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           scale_num, "w2v", print_iter=False, file_suffix="r"+str(i))
        train_scale_test_model(data_sets_list, model_hyper_para, trainer_hyper_para,
                           scale_num, "onehot", print_iter=False, file_suffix="r"+str(i))
    

Training iteration:  0
Training iteration:  1
Training iteration:  2
Training iteration:  3
Training iteration:  4
Training iteration:  5
Training iteration:  6
Training iteration:  7
Training iteration:  8
Training iteration:  9
Training iteration:  0
Training iteration:  1
Training iteration:  2
Training iteration:  3
Training iteration:  4
Training iteration:  5
Training iteration:  6
Training iteration:  7
Training iteration:  8
Training iteration:  9
Training iteration:  0
Training iteration:  1
Training iteration:  2
Training iteration:  3
Training iteration:  4
Training iteration:  5
Training iteration:  6
Training iteration:  7
Training iteration:  8
Training iteration:  9
Training iteration:  0
Training iteration:  1
Training iteration:  2
Training iteration:  3
Training iteration:  4
Training iteration:  5
Training iteration:  6


KeyboardInterrupt: 