## Parameter initialization

### Why initialization matters
Based on this [blog post](https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79).

In [1]:
import torch
import torch.nn as nn
import numpy as np
torch.manual_seed(0) #set the seed for generating random number.return the torch.generator object

<torch._C.Generator at 0x7fa425c108b0>

In [4]:
dim = 100
# var = 0.001
x = torch.randn(dim)  # normalized input ~ N(0, 1)

n_layers = 100
#here we dont have any activation function so it it's linear
for i in range(n_layers): 
    W = torch.randn(dim, dim) * np.sqrt((1/dim)) # intialize parameters ~ N(0, var) 
    x = W @ x
    
print(x.mean(), x.std())

tensor(0.) tensor(0.)


### Pytorch initialization

Out of the box:
https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear

In [9]:
# model = nn.Sequential(nn.Linear(100, 100), nn.Tanh(), nn.Linear(100, 2))

class Model(nn.Module):
    def __init__(self,):
        super().__init__()

        self.lin = nn.Linear()
        self.conv = nn.Conv2d()

        torch.nn.init.xavier_normal_(self.lin.weight)



Custom: https://stackoverflow.com/a/49433937

In [17]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)  # underscore add the end -> modification in place
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2, bias=False))
net.apply(init_weights)

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=False)
)

## Regularization

https://rawgit.com/danielkunin/Deeplearning-Visualizations/master/regularization/index.html

In [None]:
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, weight_decay=1e-3)

l1 = nn.L1Loss()

nn.Dropout()