# Import

In [None]:
from func.transformer import TransformerDataset, TransformerModel, MyLoss
from func.execution import eval_epoch, fit
import numpy as np

import torch
from torch.utils.data import DataLoader

import pickle

torch.manual_seed(0)

# Set up device

In [2]:
if(torch.has_mps):
    device = torch.device('mps')
    print('Training on Mac M1! Device was set as "mps"')
else:
    device = torch.device('cpu')
    print('Training on CPU! Device was set as "cpu"') 

Training on Mac M1! Device was set as "mps"


# Parameter dictionary

In [3]:
params = {'batch_size': 64,
          'lr': 0.0001,
          'func':'log10',
          'stat_path': 'stat_test/',
          'version': 'draft',
          'train_percent': 0.7,
          'val_percent':0.15,
          'epoch': 50,
          'max_len_i': 187,
          'max_pos': 187,
          'emb_size': 128,
          'num_heads': 8,
          'num_encoder_layers': 2,
          'num_decoder_layers': 2,
          'dropout_p': 0.1,
         }

# Data

In [4]:
combine = torch.Tensor([])
id2cost = torch.Tensor([])
cost_tensor = torch.Tensor([])

# Loading data

In [11]:
# max_visit=187, which is the default number
data = TransformerDataset(combine)

+ Splitting data

In [12]:
num_patients = len(data)

# divide data into training/validation/testing sets
train_percent = params['train_percent']
val_percent = params['val_percent']

num_train = int(np.around(train_percent * num_patients))
num_val = int(np.around(val_percent * num_patients))
num_test = num_patients - num_train - num_val
print(f"Number of patients for training is: {num_train}")
print(f"Number of patients for validation is: {num_val}")
print(f"Number of patients for testing is: {num_test}")

Number of patients for training is: 3421
Number of patients for validation is: 733
Number of patients for testing is: 733


In [13]:
train, val, test = torch.utils.data.random_split(data, [num_train, num_val, num_test])
print(f"Length for training dataset is: {len(train)}")
print(f"Length for validation dataset is: {len(val)}")
print(f"Length for testing dataset is: {len(test)}")

Length for training dataset is: 3421
Length for validation dataset is: 733
Length for testing dataset is: 733


+ Batchify DataLoader

In [14]:
BATCH_SIZE = params['batch_size']
train_DataLoader = DataLoader(dataset=train, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_DataLoader = DataLoader(dataset=val, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_DataLoader = DataLoader(dataset=test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

# Set up model

In [15]:
# parameter setting
max_len_i = params['max_len_i']

cost_vocab_size = len(id2cost_type)  # 53
age_vocab_size = dict_vocab_size['age']  # 93
gender_vocab_size = dict_vocab_size['gender']  # 2
diff_vocab_size = dict_vocab_size['diff']  # 5714
department_vocab_size = dict_vocab_size['department']  # 15
specialist_vocab_size = dict_vocab_size['specialist']  # 34
visit_type_vocab_size = dict_vocab_size['visit_type']  # 8

max_pos = params['max_pos']
emb_size = params['emb_size']
num_heads = params['num_heads']
num_encoder_layers = params['num_encoder_layers'] 
num_decoder_layers = params['num_decoder_layers']
dropout_p = params['dropout_p']

In [16]:
# model initiation
model = TransformerModel(cost_vocab_size=cost_vocab_size,
                          age_vocab_size=age_vocab_size,
                          gender_vocab_size=gender_vocab_size,
                          diff_vocab_size=diff_vocab_size,
                          department_vocab_size=department_vocab_size,
                          specialist_vocab_size=specialist_vocab_size,
                          visit_type_vocab_size=visit_type_vocab_size,
                          max_pos=max_pos,
                          emb_size=emb_size,
                          num_heads=num_heads,
                          num_encoder_layers=num_encoder_layers,
                          num_decoder_layers=num_decoder_layers,
                          dropout_p=dropout_p,
                         ).to(device)

# Training and validating

In [17]:
loss_function = MyLoss()
lr = params['lr']
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
epochs = params['epoch']

In [18]:
train_summary, val_summary, best_model = fit(train_DataLoader, val_DataLoader, model, optimizer, loss_function, id2cost, cost_tensor, params, device, epochs)

--------------------------------------------- Epoch 1 ---------------------------------------------
train batch: 1/53, train loss: 12.347
top3 acc: 2.01%, top5 acc: 4.31%, top10 acc: 9.48%
MAE: 13352.039459228516
MSE: 179154795.86754605
RMSE: 13384.871903292391
R-Squared: -280.73761408104735
eval batch: 1/11, eval loss: 11.093
top3 acc: 2.66%, top5 acc: 3.19%, top10 acc: 6.38%
MAE: 12508.011657714844
MSE: 156851045.2101695
RMSE: 12524.018732426484
R-Squared: -1557.175367049934
Epoch 1 summary:        
	train -> avg loss: 12.347        
	         MAE:13352.039, MSE:179154795.868, RMSE:13384.872, R2: -280.738         
	         top3 acc: 2.01%, top5 acc: 4.31%, top10 acc: 9.48%        
	val   -> avg loss: 11.093        
	         MAE:12508.012, MSE:156851045.210, RMSE:12524.019, R2: -1557.175         
	         top3 acc: 2.66%, top5 acc: 3.19%, top10 acc: 6.38%        
	time  -> 5.752218961715698s
The best_model with least val loss is in epoch 1:
avg total loss: 11.092607498168945
MAE:12

In [19]:
# Store the best model
PATH_model = params['stat_path'] + 'model_' + params['func'] + '_' + params['version']
torch.save(best_model.state_dict(), PATH_model)

# Test

In [20]:
# Load the best model's state_dict
loaded_model = TransformerModel(cost_vocab_size=cost_vocab_size,
                                 age_vocab_size=age_vocab_size,
                                 gender_vocab_size=gender_vocab_size,
                                 diff_vocab_size=diff_vocab_size,
                                 department_vocab_size=department_vocab_size,
                                 specialist_vocab_size=specialist_vocab_size,
                                 visit_type_vocab_size=visit_type_vocab_size,
                                 max_pos=max_pos,
                                 emb_size=emb_size,
                                 num_heads=num_heads,
                                 num_encoder_layers=num_encoder_layers,
                                 num_decoder_layers=num_decoder_layers,
                                 dropout_p=dropout_p,
                                ).to(device)
loaded_model.load_state_dict(torch.load(PATH_model))

<All keys matched successfully>

In [21]:
test_results = eval_epoch(test_DataLoader, model, loss_function, id2cost, cost_tensor, params, device)

eval batch: 1/11, eval loss: 11.074
top3 acc: 1.45%, top5 acc: 2.02%, top10 acc: 5.78%
MAE: 12439.210876464844
MSE: 155442007.55766794
RMSE: 12467.638411410075
R-Squared: -642.0649244024628


In [22]:
# print results
epoch_loss_test = test_results[0]
epoch_top3_test = test_results[1]
epoch_top5_test = test_results[2]
epoch_top10_test = test_results[3]
epoch_mae_test = test_results[4]
epoch_mse_test = test_results[5]
epoch_rmse_test = test_results[6]
epoch_r2_test = test_results[7]
print(f"Test summary:\
        \n\tavg loss: {epoch_loss_test:.3f}\
        \n\tMAE:{epoch_mae_test:.3f}, MSE:{epoch_mse_test:.3f}, RMSE:{epoch_rmse_test:.3f}, R2: {epoch_r2_test:.3f} \
        \n\ttop3 acc: {epoch_top3_test:.2f}%, top5 acc: {epoch_top5_test:.2f}%, top10 acc: {epoch_top10_test:.2f}%")

Test summary:        
	avg loss: 11.074        
	MAE:12439.211, MSE:155442007.558, RMSE:12467.638, R2: -642.065         
	top3 acc: 1.45%, top5 acc: 2.02%, top10 acc: 5.78%
