## Training Full Model with Vt_Pretrained MLP Regression

In [1]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [2]:
if torch.cuda.is_available():
    device = 'cuda:1'
else:
    device = 'cpu'

## 1) Load Vt Model

In [3]:
class VtMLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(5, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

vt_model = VtMLP()
vt_model.load_state_dict(torch.load('../checkpoint/vt_mlp_model.pt'))
vt_model.eval() # we pretrained this model... so we'll use this model for inference

VtMLP(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

## 2) Make dataset for model

In [4]:
class FullWithVtDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, scale_data=True):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            if scale_data:
                vt = vt_model(torch.Tensor(X)).detach().numpy()
                X = np.concatenate([X, vt], 1)
                self.scaler_x = StandardScaler().fit(X)
                # self.scaler_y = StandardScaler().fit(y)
                self.scaler_y = MinMaxScaler(feature_range=(0,10)).fit(y)
                X = self.scaler_x.transform(X)
                y = self.scaler_y.transform(y)
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]   

In [5]:
full_df = pd.read_csv('../data_full/full_dataset.csv')
len(full_df)


86832

In [6]:
full_X = full_df[['W','L','T','Vgs','Vds']].to_numpy()
print(len(full_X))
full_y = full_df[['Ids']].to_numpy()
print(len(full_y))
dataset = FullWithVtDataset(full_X, full_y)
print(len(dataset))

86832
86832
86832


In [7]:
train_set, test_set = torch.utils.data.random_split(dataset, [69466,17366])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False, num_workers=1)

## 3) Make Full Model

In [8]:
class FullWithVtMLP(nn.Module):
    '''
    Multilayer Perceptron for regression.
    '''
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
        nn.Linear(6, 128),
        nn.ReLU(),
        nn.Linear(128, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 32),
        nn.ReLU(),
        nn.Linear(32, 1),
        nn.Softplus()
        # nn.Sigmoid()
        )


    def forward(self, x):
        '''
        Forward pass
        '''
        return self.layers(x)

mlp = FullWithVtMLP().to(device)
# mlp.load_state_dict(torch.load('../checkpoint/full_vt_mlp_model.pt'))

## 4) Set Loss and Optim

In [9]:
class NRMSELoss(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.eps = eps
    def forward(self, y_hat, y):
        log_abs_y_hat = torch.log(torch.abs(y_hat)+self.eps)
        log_abs_y = torch.log(torch.abs(y)+self.eps)
        log_abs_delta = log_abs_y_hat - log_abs_y
        delta = y_hat - y
        len_delta = y_hat.size(dim=0)
        return torch.sqrt((1/len_delta)*torch.sum((log_abs_delta/(log_abs_y+self.eps))**2)) + torch.sqrt((1/len_delta)*torch.sum((delta/(y+self.eps))**2))

class NRMSETrainingLoss(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.eps = eps
        self.mse = nn.MSELoss()
    def forward(self, y_hat, y):
        delta = y_hat - y
        len_delta = y_hat.size(dim=0)
        return torch.sqrt((1/len_delta)*torch.sum((delta/(y+self.eps))**2)) + self.mse(y, y_hat)

loss_function = NRMSETrainingLoss()
# loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-1)
# optimizer = torch.optim.SGD(mlp.parameters(), lr=1e-3, momentum=0.9)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-3, 
#                                               step_size_up=5, max_lr=1e-3, 
#                                               gamma=0.5, mode='exp_range')

## 5) Training

In [10]:
# Run the training loop
for epoch in range(0, 20): # 20 epochs at maximum
    
    # Print epoch
    print(f'Starting epoch {epoch+1}')
    
    # Set current loss value
    current_loss = 0.0
    
    # Iterate over the DataLoader for training data
    for i, data in enumerate(train_loader, 0):
        # Get and prepare inputs
        inputs, targets = data
        inputs, targets = inputs.float().to(device), targets.float().to(device)
        targets = targets.reshape((targets.shape[0], 1))
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Perform forward pass
        outputs = mlp(inputs)
        
        # Compute loss
        loss = loss_function(outputs, targets)
        
        # Perform backward pass
        loss.backward()
        
        # Perform optimization
        optimizer.step()
        
        # Print statistics
        current_loss += loss.item()
        if i % 1000 == 0:
            print('Loss after mini-batch %5d: %.3f' %
                (i + 1, current_loss / (i+1)*32))
            current_loss = 0.0

# Process is complete.
print('Training process has finished.')

Starting epoch 1
Loss after mini-batch     1: 6072560128.000
Loss after mini-batch  1001: 127.208
Loss after mini-batch  2001: 63.761
Starting epoch 2
Loss after mini-batch     1: 124.925
Loss after mini-batch  1001: 127.128
Loss after mini-batch  2001: 64.016
Starting epoch 3
Loss after mini-batch     1: 133.508
Loss after mini-batch  1001: 127.718
Loss after mini-batch  2001: 63.572
Starting epoch 4
Loss after mini-batch     1: 128.240
Loss after mini-batch  1001: 127.315
Loss after mini-batch  2001: 63.954
Starting epoch 5
Loss after mini-batch     1: 144.905
Loss after mini-batch  1001: 127.545
Loss after mini-batch  2001: 63.706
Starting epoch 6
Loss after mini-batch     1: 124.935


KeyboardInterrupt: 

In [None]:
torch.save(mlp.state_dict(), '../checkpoint/full_vt_mlp_model_nrmse.pt')

## 6) Evaluation

In [11]:
predictions = torch.tensor([], dtype=torch.float).to(device)
actual = torch.tensor([], dtype=torch.float).to(device)
with torch.no_grad():
    mlp.eval()
    for data in test_loader:
        inputs, values = data
        inputs, values = inputs.float().to(device), values.float().to(device)
        values = values.reshape((values.shape[0], 1))

        outputs = mlp(inputs)
        # outputs = dataset.scaler_y.inverse_transform(outputs)
        predictions = torch.cat((predictions, outputs), 0)
        # values = dataset.scaler_y.inverse_transform(values)
        actual = torch.cat((actual, values), 0)
    
predictions = predictions.cpu().numpy()
actual = actual.cpu().numpy()

eval_loss = NRMSELoss()

predictions = dataset.scaler_y.inverse_transform(predictions)
actual = dataset.scaler_y.inverse_transform(actual)

nrmse = eval_loss(torch.tensor(predictions), torch.tensor(actual))

print(predictions)
print(actual)

print(nrmse)

[[9.4247e-15]
 [9.4247e-15]
 [9.4247e-15]
 ...
 [9.4247e-15]
 [9.4247e-15]
 [9.4247e-15]]
[[9.424600e-13]
 [3.348400e-14]
 [9.566200e-15]
 ...
 [1.502000e-13]
 [2.052800e-06]
 [7.666499e-12]]
tensor(1.5664)
