In [1]:
import torch

## Custom Layers

In [10]:
# At some point we will have to implement our own custom layers. We can incorporate these in models
class FlipSignLayer(torch.nn.Module):
    def __init__(self):
        super().__init__()
    
    # This is activated by call
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return X * -1.0

In [27]:
class RandomNet(torch.nn.Module):
    def __init__(self, *dims):
        super().__init__()
        self._modules["first_layer"] = torch.nn.Linear(dims[0], dims[1])
        self._modules["second_layer"] = torch.nn.ReLU()
        self._modules["third_layer"] = FlipSignLayer()
        self._modules["fourth_layer"] = torch.nn.Linear(dims[1], dims[2])
        
    def forward(self, inp: torch.Tensor) -> torch.Tensor:
        # Running each of modules upon input
        inp = self._modules["first_layer"](inp)
        inp = self._modules["second_layer"](inp)
        inp = self._modules["third_layer"](inp)
        inp = self._modules["fourth_layer"](inp)
        return inp

In [12]:
X = torch.rand(120, 7) * 12 # Values up to about 20
# Linear fn with Gaussian noise
labels = X @ torch.Tensor([i * 0.1 for i in range(7)]) + 15  + torch.normal(0, 0.3, size = (1, 120))
labels

tensor([[29.3887, 23.0214, 22.8416, 29.6440, 24.3271, 24.4853, 25.8768, 26.1630,
         25.6556, 22.2919, 24.3260, 30.1035, 27.8868, 31.3106, 29.4413, 22.6502,
         26.3820, 28.3579, 24.5227, 27.9262, 27.3116, 29.0480, 24.6664, 23.6390,
         26.8320, 27.0954, 34.8502, 28.8629, 31.3889, 25.1338, 27.6076, 21.0981,
         29.1580, 27.5863, 30.0696, 27.9143, 32.5327, 28.1086, 32.4983, 29.7998,
         26.0591, 25.7864, 33.3250, 27.2345, 34.3764, 29.6239, 28.1153, 28.2970,
         31.9451, 22.5710, 32.2950, 28.4330, 31.8398, 23.6982, 26.0560, 28.7050,
         26.4636, 25.4484, 28.2710, 24.5987, 27.4215, 32.0461, 30.6935, 34.0772,
         28.5402, 22.4663, 28.7440, 26.8397, 25.7816, 26.6278, 26.5492, 24.0636,
         27.6262, 28.7638, 25.9158, 31.8723, 24.3145, 24.2697, 22.7294, 24.8195,
         24.2369, 28.9756, 29.3242, 31.1284, 35.2181, 22.5364, 31.3144, 30.2369,
         30.4376, 31.5393, 31.0419, 29.7808, 32.6896, 24.9879, 27.3114, 25.9619,
         28.7440, 32.6253, 2

In [13]:
def build_random_nn(lr:float):
    model = RandomNet(7, 5, 1)
    model.apply(initialize_parameters) # Initializing parameters for all Linear layers
    loss = torch.nn.MSELoss()
    trainer = torch.optim.AdamW(model.parameters(), lr)
    return model, trainer, loss

def train_random_nn(X: torch.Tensor, labels: torch.Tensor, lr:float, epochs:int):
    model, trainer, loss = build_random_nn(lr)
    for epoch in range(epochs):
        # Initializing graident to 0
        trainer.zero_grad()
        cost = loss(model(X), labels)
        if epoch % 100 == 0:
            print("epoch: ", epoch, ", cost: ", cost)
        cost.backward()
        # Stepping along gradient and updating weights
        trainer.step()
    return model

def initialize_parameters(layer: torch.nn.Module):
    """
    Applicable layer level function that can be applied by a NN to initialize each Linear Layer with Xavier weights 
    + constant bias
    """
    # Initializing all torch.nn.Linear layers with xavier variance maintaining weights + uniform constants
    if isinstance(layer, torch.nn.Linear):
        # Initializing from distribution used to maintain variance so as to avoid exploding and vanishing gradients
        torch.nn.init.xavier_uniform_(layer.weight)
        torch.nn.init.constant(layer.bias, 0.2)
            

In [17]:
model = train_random_nn(X, labels, 0.05, 701) # Cost drastically improves even with flip sign layer

  torch.nn.init.constant(layer.bias, 0.2)


epoch:  0 , cost:  tensor(646.4657, grad_fn=<MseLossBackward>)
epoch:  100 , cost:  tensor(28.1946, grad_fn=<MseLossBackward>)
epoch:  200 , cost:  tensor(22.3361, grad_fn=<MseLossBackward>)
epoch:  300 , cost:  tensor(18.1466, grad_fn=<MseLossBackward>)
epoch:  400 , cost:  tensor(14.3716, grad_fn=<MseLossBackward>)
epoch:  500 , cost:  tensor(10.7341, grad_fn=<MseLossBackward>)
epoch:  600 , cost:  tensor(10.6406, grad_fn=<MseLossBackward>)
epoch:  700 , cost:  tensor(10.6362, grad_fn=<MseLossBackward>)


In [18]:
X = torch.randn(120, 7) * 12 # Attempting same thing with negative labels
# Linear fn with Gaussian noise
labels = X @ torch.Tensor([i * 0.1 for i in range(7)]) + 15  + torch.normal(0, 0.3, size = (1, 120))
labels

tensor([[  5.8525,  22.0420,   6.9948,  14.3416,  15.4060,  10.9594,  14.7379,
          13.7233,  16.6976,  17.7019,   9.8036,  18.0956,  22.7790,  20.9690,
          21.5403,   6.0199,  -5.5988,  39.7358,  11.9324,  39.0403,  17.7365,
          17.6920,   5.4032,  25.6624,  10.3643,  30.1315,  22.2080,   4.5017,
          22.9967,  29.4886,  14.6891,  14.5611,  19.2987,  15.3195,  30.2501,
          16.9690,  21.3254,  10.1256,  30.0962,  27.4048,  15.1011,  -7.2394,
           4.9946,  21.5219,  23.2505, -16.4606,  24.9778,  13.6068,   3.9219,
          16.5412,  20.4664,  28.9199,  18.3090,  38.9800,   2.6729,  10.5015,
           0.9381,  18.9293,   6.5587,   9.8564,   9.9447,  -0.6758,   8.0680,
          10.7872,  25.1162,  15.1156,  22.1917,  -7.8005,  23.3381,  15.6603,
          26.4831,  20.5113, -15.5249,  23.3799,  26.5649,  13.7049,   5.1545,
          12.7119,  10.5934,  -2.1316,   5.2542,   1.6588,   2.9450,  17.9271,
         -14.7203,  11.2312,  16.5431,  13.1107,  35

In [22]:
model = train_random_nn(X, labels, 0.05, 701) # Still gets much better

  torch.nn.init.constant(layer.bias, 0.2)


epoch:  0 , cost:  tensor(512.5497, grad_fn=<MseLossBackward>)
epoch:  100 , cost:  tensor(147.1909, grad_fn=<MseLossBackward>)
epoch:  200 , cost:  tensor(138.0045, grad_fn=<MseLossBackward>)
epoch:  300 , cost:  tensor(137.8608, grad_fn=<MseLossBackward>)
epoch:  400 , cost:  tensor(137.8588, grad_fn=<MseLossBackward>)
epoch:  500 , cost:  tensor(137.8574, grad_fn=<MseLossBackward>)
epoch:  600 , cost:  tensor(137.8563, grad_fn=<MseLossBackward>)
epoch:  700 , cost:  tensor(137.8555, grad_fn=<MseLossBackward>)


Lesson: Can use NN Modules to encode any arbitrary layer we'd like within the scope of a NN

In [80]:
# Defining Parametric Layer
class MyLinearLayer(torch.nn.Module):
    def __init__(self, *layer_dims):
        super().__init__()
        # Initializing weights
        weight = torch.randn(layer_dims)
        
        # Defining properly dimensioned bias
        bias = torch.zeros(layer_dims[-1])
        
        # Casting to a parameter
        self.bias = torch.nn.Parameter(bias)
        
        # Initializing so as to maintain variance
        torch.nn.init.xavier_uniform_(weight)
        
        # Casting to a parameter
        self.weight = torch.nn.Parameter(weight)
        
    def forward(self, data: torch.Tensor) -> torch.Tensor:
        # Getting relu of matrix multiplication + bias
        return torch.nn.functional.relu(data @ self.weight + self.bias) # Mapping to inputs by neuron space

In [84]:
# RandomNet using custom linear layers
class RandomNet2(torch.nn.Module):
    def __init__(self, *dims):
        super().__init__()
        self._modules["first_layer"] = MyLinearLayer(dims[0], dims[1])
        self._modules["second_layer"] = MyLinearLayer(dims[1], dims[2])
    def forward(self, inp: torch.Tensor) -> torch.Tensor:
        # Running each of modules upon input
        inp = self._modules["first_layer"](inp)
        inp = self._modules["second_layer"](inp)
        return inp

In [85]:
def build_random_nn_2(lr:float):
    model = RandomNet2(7, 5, 1)
    loss = torch.nn.MSELoss()
    trainer = torch.optim.AdamW(model.parameters(), lr)
    return model, trainer, loss

def train_random_nn_2(X: torch.Tensor, labels: torch.Tensor, lr:float, epochs:int):
    model, trainer, loss = build_random_nn_2(lr)
    for epoch in range(epochs):
        # Initializing graident to 0
        trainer.zero_grad()
        cost = loss(model(X), labels)
        if epoch % 100 == 0:
            print("epoch: ", epoch, ", cost: ", cost)
        cost.backward()
        # Stepping along gradient and updating weights
        trainer.step()
    return model


In [86]:
train_random_nn_2(X, labels, 0.05, 500) # Still achieves an identical cost! Remember to define all model attributes as parameters

epoch:  0 , cost:  tensor(230.0297, grad_fn=<MseLossBackward>)
epoch:  100 , cost:  tensor(142.0027, grad_fn=<MseLossBackward>)
epoch:  200 , cost:  tensor(137.8823, grad_fn=<MseLossBackward>)
epoch:  300 , cost:  tensor(137.8648, grad_fn=<MseLossBackward>)
epoch:  400 , cost:  tensor(137.8574, grad_fn=<MseLossBackward>)


RandomNet2(
  (first_layer): MyLinearLayer()
  (second_layer): MyLinearLayer()
)

## Checkpointing models

In [87]:
x = torch.randn((2,1))

In [88]:
x

tensor([[-0.7747],
        [-0.3079]])

In [89]:
torch.save(x, 'test-file')

In [90]:
torch.load('test-file') # PyTorch capable of saving intermediary tensors and loading them in real time

tensor([[-0.7747],
        [-0.3079]])

In [91]:
model.eval() # Eval displays neural network

RandomNet(
  (first_layer): Linear(in_features=7, out_features=5, bias=True)
  (second_layer): ReLU()
  (third_layer): FlipSignLayer()
  (fourth_layer): Linear(in_features=5, out_features=1, bias=True)
)

In [92]:
state_dict = model.state_dict()

In [94]:
torch.save(state_dict, 'random-nn')

In [95]:
model.load_state_dict(torch.load('random-nn'))

<All keys matched successfully>

In [98]:
model.eval() # All Keys in State Dict loaded successfully

RandomNet(
  (first_layer): Linear(in_features=7, out_features=5, bias=True)
  (second_layer): ReLU()
  (third_layer): FlipSignLayer()
  (fourth_layer): Linear(in_features=5, out_features=1, bias=True)
)

In [99]:
!nvidia-smi

/bin/bash: nvidia-smi: command not found
