In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T
import numpy as np

from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

import sys
sys.path.insert(0,'/content/gdrive/My Drive/Colab Notebooks')

## Loss function and effiency metrics are already provided to you.
from project_utilities import Loss
from project_utilities import efficiency
from project_utilities import ValueSet

Mounted at /content/gdrive/


In [2]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

cd into project data

In [3]:
%cd /content/gdrive/MyDrive/Project/data

/content/gdrive/MyDrive/Project/data


In [4]:
%ls

bi_rnn.pt                Set_1.npz   Set_36.npz  Set_52.npz  Set_69.npz
linear.pt                Set_20.npz  Set_37.npz  Set_53.npz  Set_6.npz
RCNN_h64_l3_loss0351.pt  Set_21.npz  Set_38.npz  Set_54.npz  Set_70.npz
RCNN_h64_l3_loss0356.pt  Set_22.npz  Set_39.npz  Set_55.npz  Set_71.npz
RCNN_h64_l3.pt           Set_23.npz  Set_3.npz   Set_56.npz  Set_72.npz
RCNN.pt                  Set_24.npz  Set_40.npz  Set_57.npz  Set_73.npz
res_cnn.pt               Set_25.npz  Set_41.npz  Set_58.npz  Set_74.npz
RNN.pt                   Set_26.npz  Set_42.npz  Set_59.npz  Set_75.npz
Set_10.npz               Set_27.npz  Set_43.npz  Set_5.npz   Set_76.npz
Set_11.npz               Set_28.npz  Set_44.npz  Set_60.npz  Set_77.npz
Set_12.npz               Set_29.npz  Set_45.npz  Set_61.npz  Set_78.npz
Set_13.npz               Set_2.npz   Set_46.npz  Set_62.npz  Set_79.npz
Set_14.npz               Set_30.npz  Set_47.npz  Set_63.npz  Set_7.npz
Set_15.npz               Set_31.npz  Set_48.npz  Set_64.npz  Set_8

In [5]:
class MyDataset(torch.utils.data.Dataset):
  def __init__(self, setID):
        'Initialization'
        npz_files_content = np.load("./Set_"+str(setID)+".npz")
        self.X_set = torch.tensor(npz_files_content['X'])
        self.y_set = torch.tensor(npz_files_content['y'])

        # sets all nans to 0.0
        self.y_set[self.y_set != self.y_set] = 0.0 
        self.X_set[self.X_set != self.X_set] = 0.0 
  def __len__(self):
        'Denotes the total number of samples'
        return len(self.y_set)
  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        X = self.X_set[index]
        y = self.y_set[index]
        return X, y

load loss model

In [7]:
loss_model = Loss(0.00001)

rnn model

In [8]:
# Bidirectional recurrent neural network (many-to-one)
class BiRNN_2(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiRNN_2, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm_1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        #self.lstm_2 = nn.LSTM(128, 1, 1, batch_first=True, bidirectional=False)
        self.lstm_2 = nn.LSTM(2*hidden_size, 1, 1, batch_first=True, bidirectional=True)
    
    def forward(self, x): 
        x = torch.transpose(x, 2, 1)
        # x dim: [batch size, sentence length, feature dim]    
      
        out, _ = self.lstm_1(x)  # out: tensor of shape (batch_size, seq_length, hidden_size*2)
        # output dim: [batch size, sentence length, hidden dim*2]
        # hidden dim: [2, batch size, hidden dim]

        out, _ = self.lstm_2(out)

        # average pooling
        out = (out[:, :, 0] + out[:, :, 1])/2

        return out.squeeze()

validate function used from evaluation

In [13]:
def validate(model, dataset_num=11): 
    loss_val = [] 
    eff = ValueSet(0, 0, 0, 0) 
    # switch to evaluate mode
    #model.to("cpu") 
    model.eval() 
    with torch.no_grad(): 
        #val_set = MyDataset(setID+1) 
        val_set = MyDataset(dataset_num)
        val_generator = torch.utils.data.DataLoader(val_set,  
                                                    batch_size=64,  
                                                    shuffle=True) 
        #print(setID) 
        for X_val, y_val in val_generator: 
          # Forward pass 
          X_val = X_val.to(DEVICE)
          y_val = y_val.to(DEVICE)
          val_outputs = model(X_val) 
          loss_output = loss_model.forward(val_outputs, y_val) 
          loss_val.append(loss_output) 
          for label, output in zip(y_val.cpu().numpy(), val_outputs.cpu().numpy()):
              eff += efficiency(label, output, difference = 5.0,  
                                threshold = 1e-2, integral_threshold = 0.2,  
                                min_width = 3) 
    return sum(loss_val)/len(loss_val), eff.eff_rate, eff.fp_rate 

creates model

In [14]:
model = BiRNN_2(input_size=4, hidden_size=64, num_layers=3)

model = model.to(DEVICE)

loads state_dict

In [15]:
model.load_state_dict(torch.load('./RCNN_h64_l3_loss0351.pt'))
model.train()

BiRNN_2(
  (lstm_1): LSTM(4, 64, num_layers=3, batch_first=True, bidirectional=True)
  (lstm_2): LSTM(128, 1, batch_first=True, bidirectional=True)
)

evaluates the model on new data. (Note: the data is assumed to be in google drive under Project/data and is expected to have the name Set_81.npz.)

In [23]:
loss_val, eff_rate, fp_rate = validate(model, dataset_num=11)

In [24]:
print(f"Loss: {loss_val}")
print(f"Eff: {eff_rate}")
print(f"FP: {fp_rate}")

Loss: 0.03497587889432907
Eff: 0.8650372336503723
FP: 0.26294741051789644


evaluation code

In [None]:
loss_val, eff_rate, fp_rate = validate(model, dataset_num=81)

In [None]:
print(f"Loss: {loss_val}")
print(f"Eff: {eff_rate}")
print(f"FP: {fp_rate}")