In [1]:
# Imports
import torch
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets  # Standard datasets
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch import nn  # All neural network modules
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm  # For a nice progress bar!
import sys
import numpy as np
import pandas as pd

from torchviz import make_dot

In [2]:
model_name = sys.argv[1]

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Hyperparameters
input_size = 28
hidden_size = 256
num_layers = 2
num_classes = 10
sequence_length = 28
learning_rate = 0.005
batch_size = 64
num_epochs = 3

In [3]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate LSTM
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out

In [4]:
# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate GRU
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out

In [5]:
# Recurrent neural network with LSTM (many-to-one)
class RNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate LSTM
        out, _ = self.lstm(
            x, (h0, c0)
        )  # out: tensor of shape (batch_size, seq_length, hidden_size)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out

In [6]:
# Load Data
train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
model = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
model.load_state_dict(torch.load('./saved/minist_gru_model.pt'))
model.eval()

In [None]:
# Display all model layer weights
for name, para in model.named_parameters():
    print('{}: {}'.format(name, para.shape))

In [None]:
x = train_loader.dataset[0][0].to(device="cuda").squeeze(1)

In [None]:
x.shape

In [None]:
train_loader.dataset[0][0].to(device="cuda").shape

In [None]:
y = model(x)
y

In [None]:
make_dot(y.mean(), params=dict(model.named_parameters()))

In [None]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter("torchlogs/")
writer.add_graph(model, x)
writer.close()

In [None]:
out

In [None]:
out[0, 0, :]

In [7]:
input_size = 28
hidden_size = 28
num_layers = 1
num_classes = 10
sequence_length = 28
learning_rate = 0.005
batch_size = 64
num_epochs = 3
model_mnist_gruNN_028h_01l = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
model_mnist_gruNN_028h_01l.load_state_dict(torch.load('./saved/mnist_gruNN_028h_01l.pt'))
model_mnist_gruNN_028h_01l.eval()

# Display all model layer weights
for name, para in model_mnist_gruNN_028h_01l.named_parameters():
    print('{}: {}'.format(name, para.shape))

gru.weight_ih_l0: torch.Size([84, 28])
gru.weight_hh_l0: torch.Size([84, 28])
gru.bias_ih_l0: torch.Size([84])
gru.bias_hh_l0: torch.Size([84])
fc.weight: torch.Size([10, 784])
fc.bias: torch.Size([10])


In [9]:
# to save pytorch model to onnx
# torch.onnx.export(model_mnist_gruNN_028h_01l,               # model being run
#                   x,                         # model input (or a tuple for multiple inputs)
#                   "./saved/model_mnist_gruNN_028h_01l.onnx",   # where to save the model (can be a file or file-like object)
#                   export_params=True,        # store the trained parameter weights inside the model file
#                   opset_version=10,          # the ONNX version to export the model to
#                   do_constant_folding=True,  # whether to execute constant folding for optimization
#                   input_names = ['input'],   # the model's input names
#                   output_names = ['output'], # the model's output names
#                   dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
#                                 'output' : {0 : 'batch_size'}})

In [10]:
# bias_ih_l[k] = the learnable input-hidden bias of the k-th layer (b_ir | b_iz | b_in)
model_mnist_gruNN_028h_01l.gru.bias_ih_l0

Parameter containing:
tensor([-0.4000,  0.1476, -0.0735, -0.4648, -0.0993,  0.1795, -0.3264,  0.0839,
         0.1239, -0.2155,  0.2136, -0.0884, -0.2522,  0.1115, -0.5031, -0.2736,
        -0.3979,  0.1080, -0.4118, -0.0069, -0.3473, -0.5394, -0.0344,  0.3104,
         0.0153, -0.3172, -0.0998, -0.1300,  0.3321,  0.3410, -0.3726,  0.3558,
         0.2861,  0.6554,  0.3276,  0.6268, -0.2474,  0.3228,  0.1673,  0.3894,
        -0.1630,  0.0657,  0.0359,  0.1218,  0.0825, -0.0739,  0.4201, -0.2237,
         0.1578,  0.1416,  0.3530,  0.2813, -0.2020, -0.0252, -0.1244, -0.1051,
        -0.0922,  0.1953,  0.2017, -0.0677,  0.0365, -0.5013,  0.1221,  0.2899,
         0.2015, -0.1788,  0.0382, -0.1419,  0.1193,  0.0515,  0.2526,  0.4405,
         0.0326, -0.0390,  0.2030, -0.0430, -0.1003,  0.0746, -0.3898,  0.0556,
         0.0702, -0.1435,  0.1217, -0.1801], device='cuda:0',
       requires_grad=True)

In [11]:
model_mnist_gruNN_028h_01l.gru.bias_hh_l0

Parameter containing:
tensor([-0.3691,  0.0690, -0.0053, -0.1148, -0.2246, -0.0385, -0.2258,  0.2884,
         0.1681, -0.3050,  0.1956, -0.0092, -0.1746, -0.0905, -0.5100, -0.4589,
        -0.1954,  0.1527, -0.2799, -0.1340, -0.2656, -0.3069,  0.0447,  0.0908,
        -0.0516, -0.3764, -0.0749, -0.1740,  0.2364,  0.0821, -0.3590,  0.2619,
         0.2211,  0.5693,  0.4157,  0.6016,  0.0998,  0.0400,  0.2538,  0.3113,
         0.1467,  0.3964,  0.3508,  0.0704,  0.3375,  0.1091,  0.4836, -0.1224,
         0.0752,  0.0085,  0.4005,  0.0710, -0.1148, -0.0090, -0.0796,  0.0179,
         0.0616,  0.2382, -0.0596,  0.3472,  0.0390, -0.3615, -0.2248,  0.1447,
         0.2577,  0.1948, -0.3932,  0.0247, -0.1228,  0.1300, -0.0088,  0.0054,
         0.2527,  0.1980, -0.1795, -0.0170,  0.0184, -0.0293, -0.1308,  0.2345,
        -0.1236,  0.1183, -0.1797,  0.0976], device='cuda:0',
       requires_grad=True)

In [12]:
mnist_input = test_loader.dataset[782][0].to(device)
# mnist_input = test_loader.dataset[782][0].double().to(device)
mnist_input.shape
# mnist_input.dtype

#torch.save(mnist_input, f"saved/model_mnist_gruNN_028h_01l_input.pt")

#mnist_input_np = mnist_input.numpy().reshape([28, 28]) #convert to Numpy array
#mnist_input_df = pd.DataFrame(mnist_input_np) #convert to a dataframe
#mnist_input_df.to_csv("./saved/model_mnist_gruNN_028h_01l_input.csv", index=False, header=False) #save to file

#test_np = mnist_input.numpy()
#test_np.reshape([28, 28]).shape

# self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
# def forward(self, x):
h0 = torch.zeros(num_layers, mnist_input.size(0), hidden_size).to(device)
# Forward propagate GRU
out, _ = model_mnist_gruNN_028h_01l.gru(mnist_input, h0)

NameError: name 'model' is not defined

In [None]:
input_size = 2
hidden_size = 2
num_layers = 1
sequence_length = 5
batch_size = 1

smallest_gru_model = nn.GRU(input_size, hidden_size, num_layers)
input = torch.randn(sequence_length, batch_size, input_size)
h0 = torch.randn(num_layers, batch_size, hidden_size)
output, hn = smallest_gru_model(input, h0)


torch.save(smallest_gru_model.state_dict(), f"saved/the_smallest_gru.pt")
print(f"The model is saved in ./saved/the_smallest_gru.pt")

In [None]:
smallest_gru_model = nn.GRU(input_size, hidden_size, num_layers)
smallest_gru_model.load_state_dict(torch.load('./saved/the_smallest_gru.pt'))
smallest_gru_model.eval()

In [None]:
for name, para in smallest_gru_model.named_parameters():
    print('{}: {}'.format(name, para.shape))

In [None]:
torch.onnx.export(smallest_gru_model,               # model being run
                  input,                         # model input (or a tuple for multiple inputs)
                  "./saved/the_smallest_gru.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

In [None]:
torch.set_printoptions(precision=24)
input

In [None]:
input.size()

In [None]:
torch.save(input, f"saved/the_smallest_gru_input.pt")

In [None]:
#input_test = torch.zeros(sequence_length, batch_size, input_size)
input_test = torch.load(f"saved/the_smallest_gru_input.pt")

In [None]:
input_test

In [None]:
input_test[0]

In [None]:
smallest_gru_model(input)

In [None]:
smallest_gru_model(input[0])

In [None]:
smallest_gru_model(input[1])

In [None]:
smallest_gru_model(input[0:2])

In [None]:
input[0:2]

In [None]:
input[0]

In [None]:
smallest_gru_model(input)

In [None]:
smallest_gru_model.weight_ih_l0

In [None]:
smallest_gru_model.bias_ih_l0

In [None]:
smallest_gru_model.weight_hh_l0

In [None]:
smallest_gru_model.bias_hh_l0