## Training Full Model with Vt_Pretrained MLP Regression

In [1]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler

In [2]:
if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'

## 1) Load Vt Model

In [3]:
class VtMLP(nn.Module):
  '''
    Multilayer Perceptron for regression.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
      nn.Linear(5, 64),
      nn.ReLU(),
      nn.Linear(64, 32),
      nn.ReLU(),
      nn.Linear(32, 1)
    )


  def forward(self, x):
    '''
      Forward pass
    '''
    return self.layers(x)

vt_model = VtMLP()
vt_model.load_state_dict(torch.load('../checkpoint/vt_mlp_model.pt'))
vt_model.eval() # we pretrained this model... so we'll use this model for inference

VtMLP(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)

## 2) Make dataset for model

In [4]:
class FullWithVtDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, scale_data=True):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            if scale_data:
                X = StandardScaler().fit_transform(X)
            vt = vt_model(torch.Tensor(X)).detach().numpy()
            X = np.concatenate([X, vt], 1)
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]   

In [5]:
full_df = pd.read_csv('../data_full/full_dataset.csv')
len(full_df)


86832

In [6]:
full_X = full_df[['W','L','T','Vgs','Vds']].to_numpy()
print(len(full_X))
full_y = full_df[['Ids']].to_numpy()
print(len(full_y))
dataset = FullWithVtDataset(full_X, full_y)
print(len(dataset))

86832
86832
86832


In [7]:
train_set, test_set = torch.utils.data.random_split(dataset, [69466,17366])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False, num_workers=1)

## 3) Make Full Model

In [8]:
class FullWithVtMLP(nn.Module):
    '''
    Multilayer Perceptron for regression.
    '''
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
        nn.Linear(6, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1)
        )


    def forward(self, x):
        '''
        Forward pass
        '''
        return self.layers(x)

mlp = FullWithVtMLP().to(device)
# mlp.load_state_dict(torch.load('../checkpoint/full_vt_mlp_model.pt'))

<All keys matched successfully>

## 4) Set Loss and Optim

In [9]:
class NRMSELoss(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.eps = eps
    def forward(self, y_hat, y):
        log_abs_y_hat = torch.log(torch.abs(y_hat))
        log_abs_y = torch.log(torch.abs(y))
        log_abs_delta = log_abs_y_hat - log_abs_y
        delta = y_hat - y
        # return torch.sqrt(torch.mean(log_abs_delta/log_abs_y)**2+self.eps)
        return torch.sqrt(torch.mean(log_abs_delta/log_abs_y)**2+self.eps) + torch.sqrt(torch.mean(delta/y)**2+self.eps)

loss_function = NRMSELoss()
# loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(mlp.parameters(), lr=1e-4, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=1e-4, 
                                              step_size_up=5, max_lr=1e-4, 
                                              gamma=0.5, mode='exp_range')

## 5) Training

In [10]:
# Run the training loop
for epoch in range(0, 100): # 20 epochs at maximum
    
    # Print epoch
    print(f'Starting epoch {epoch+1}')
    
    # Set current loss value
    current_loss = 0.0
    
    # Iterate over the DataLoader for training data
    for i, data in enumerate(train_loader, 0):
        # Get and prepare inputs
        inputs, targets = data
        inputs, targets = inputs.float().to(device), targets.float().to(device)
        targets = targets.reshape((targets.shape[0], 1))
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Perform forward pass
        outputs = mlp(inputs)
        
        # Compute loss
        loss = loss_function(outputs, targets)
        
        # Perform backward pass
        loss.backward()
        
        # Perform optimization
        optimizer.step()
        
        # Print statistics
        current_loss += loss.item()
        if i % 1000 == 0:
            print('Loss after mini-batch %5d: %.3f' %
                (i + 1, current_loss / 100*32))
            current_loss = 0.0

# Process is complete.
print('Training process has finished.')

Starting epoch 1
Loss after mini-batch     1: 152599019.520
Loss after mini-batch  1001: nan
Loss after mini-batch  2001: nan
Starting epoch 2
Loss after mini-batch     1: nan
Loss after mini-batch  1001: nan
Loss after mini-batch  2001: nan
Starting epoch 3
Loss after mini-batch     1: nan


KeyboardInterrupt: 

In [None]:
torch.save(mlp.state_dict(), '../checkpoint/full_vt_mlp_model_nrmse.pt')

## 6) Evaluation

In [None]:
predictions = torch.tensor([], dtype=torch.float).to(device)
actual = torch.tensor([], dtype=torch.float).to(device)
with torch.no_grad():
    mlp.eval()
    for data in test_loader:
        inputs, values = data
        inputs, values = inputs.float().to(device), values.float().to(device)
        values = values.reshape((values.shape[0], 1))

        outputs = mlp(inputs)
        predictions = torch.cat((predictions, outputs), 0)
        actual = torch.cat((actual, values), 0)
    
# predictions = predictions.cpu().numpy()
# actual = actual.cpu().numpy()

eval_loss = NRMSELoss()

nrmse = eval_loss(predictions, actual)

print(nrmse)
        

tensor(5.4321e+08, device='cuda:0')
