# Make Simple Neural Network

In [1]:
import torch
from torch import nn

In [19]:
x = torch.tensor([1.])
model = nn.Linear(1,1)   # 1 linear unit with input node 1, output node 1

# Initialize weight, bias of model as you create it
print(model.weight)
print(model.bias)

y = model(x)
print(y)
print('=='*30)
y = x @ model.weight + model.bias
print(y)

Parameter containing:
tensor([[-0.7029]], requires_grad=True)
Parameter containing:
tensor([-0.3070], requires_grad=True)
tensor([-1.0099], grad_fn=<AddBackward0>)
tensor([-1.0099], grad_fn=<AddBackward0>)


In [30]:
# fully connected layer
fc1 = nn.Linear(1,3)
fc2 = nn.Linear(3,1)

print(fc1.weight)
print(fc1.bias)
print(fc2.weight)
print(fc2.bias)
print('=='*30)
x = torch.tensor([1.])
x = fc1(x)
print(x)
x = fc2(x)
print(x)
print('=='*30)
x = torch.tensor([1.])
print((x@fc1.weight.T + fc1.bias)@fc2.weight.T + fc2.bias)

Parameter containing:
tensor([[-0.8834],
        [ 0.7991],
        [-0.6759]], requires_grad=True)
Parameter containing:
tensor([0.9296, 0.7428, 0.4460], requires_grad=True)
Parameter containing:
tensor([[ 0.1581,  0.0042, -0.3152]], requires_grad=True)
Parameter containing:
tensor([0.1063], requires_grad=True)
tensor([ 0.0463,  1.5420, -0.2298], grad_fn=<AddBackward0>)
tensor([0.1926], grad_fn=<AddBackward0>)
tensor([0.1926], grad_fn=<AddBackward0>)


## Why doesn't nn.Linear() automatically transpose its shape from the start, but instead transpose inside the function and then proceeds with inner product?
 -  https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
### nn.Linear() function want to get the data form (1D data) of "channel" in "number"x"channel"x"row"x"column"
### Node 1 is equal to 1 channel
### If you want a lot of data to pass through, you have to give it in the form of the number x channel
### why .T? it's because they also want to make weight like numberxchannel form
### First of all, "channel" in the weight shape in "number"x"channel" must match the number of pre-channel
### for example, nn.Linear(2,3) means The number of channels in front of the weights is 2, so if you convert it to number x channel format, it is ?x2. And we make 3 number of nodes by using 2 nodes. So it become 3(number)x2(channel).

In [31]:
fc1 = nn.Linear(1,3)
fc2 = nn.Linear(3,1)

nn.Sequential(fc1, fc2) # connect each layers

x = torch.tensor([1.])
print(model(x))

tensor([-1.0099], grad_fn=<AddBackward0>)


In [37]:
model = nn.Sequential(nn.Linear(2,5),     # it means nn.Linear("channel", "channel")
              nn.Linear(5,10),
              nn.Linear(10,3))

x = torch.randn(2)
print(x)
print(model(x))

x = torch.randn(1,2)
print(x)
print(model(x))

print("==="*30)
x = torch.randn(5,2) # randn(number, channel)
print(x)
print(model(x))

print("==="*30)
x = torch.randn(2,3,1,4,5,2)
print(model(x).shape)        # Only the last number is "channel". Everything else is "number"

tensor([-1.1612,  0.3413])
tensor([ 0.4130, -0.3833,  0.9307], grad_fn=<AddBackward0>)
tensor([[-0.4366, -1.5420]])
tensor([[-0.1054, -0.1389,  0.0686]], grad_fn=<AddmmBackward0>)
tensor([[-0.8385,  1.7113],
        [ 2.1997,  0.2398],
        [-0.3846, -1.2734],
        [ 1.2926, -1.0161],
        [ 1.5963,  1.7550]])
tensor([[ 0.7114, -0.3845,  1.2125],
        [ 0.0770,  0.3204, -0.4656],
        [-0.0459, -0.1414,  0.1284],
        [-0.1397,  0.1944, -0.4755],
        [ 0.4961,  0.1195,  0.2364]], grad_fn=<AddmmBackward0>)
torch.Size([2, 3, 1, 4, 5, 3])


In [39]:
class Mymodel1(nn.Module):
    def __init__(self):
        super().__init__()

        self.fc1 = nn.Linear(2,5)
        self.fc2 = nn.Linear(5,10)
        self.fc3 = nn.Linear(10,3)
        self.sig = nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.sig(x)
        x = self.fc2(x)
        x = self.sig(x)
        x = self.fc3(x)
        x = self.sig(x)
        
        return x

model = Mymodel1()
x = torch.randn(5,2)
y = model(x)
print(y)

tensor([[0.5572, 0.4649, 0.5630],
        [0.5578, 0.4662, 0.5639],
        [0.5596, 0.4694, 0.5644],
        [0.5592, 0.4689, 0.5644],
        [0.5570, 0.4650, 0.5641]], grad_fn=<SigmoidBackward0>)


In [42]:
print(model)
print(model.fc1.weight)
print(model.fc2.bias)

Mymodel(
  (fc1): Linear(in_features=2, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=3, bias=True)
  (sig): Sigmoid()
)
Parameter containing:
tensor([[ 0.3019, -0.2733],
        [ 0.5543,  0.2998],
        [-0.6582, -0.5331],
        [ 0.1604,  0.3092],
        [ 0.0419,  0.4929]], requires_grad=True)
Parameter containing:
tensor([ 0.2913,  0.2769, -0.0643, -0.2963, -0.2671,  0.0051, -0.1334, -0.1296,
         0.4072, -0.1227], requires_grad=True)


In [60]:
class Mymodel2(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Sequential(nn.Linear(2,5),
                                    nn.Sigmoid(),
                                    nn.Linear(5,10),
                                    nn.Sigmoid(),
                                    nn.Linear(10,3),
                                    nn.Sigmoid())
    
    def forward(self, x):
        x = self.linear(x)
        
        return x

model2 = Mymodel2()
x = torch.randn(5,2)
y = model2(x)
print(y)

tensor([[0.4505, 0.4297, 0.5781],
        [0.4481, 0.4295, 0.5769],
        [0.4501, 0.4294, 0.5779],
        [0.4492, 0.4277, 0.5772],
        [0.4480, 0.4280, 0.5766]], grad_fn=<SigmoidBackward0>)


In [68]:
print(model2.linear[0].weight)
print(model2.linear[-2].bias)

Parameter containing:
tensor([[ 0.6794, -0.1821],
        [-0.1408, -0.0258],
        [ 0.0923,  0.1911],
        [-0.4935,  0.3658],
        [-0.0263, -0.1386]], requires_grad=True)
Parameter containing:
tensor([-0.2640, -0.2410,  0.1625], requires_grad=True)


In [69]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.2910, -0.0544],
         [-0.2206,  0.5108],
         [-0.6840,  0.2542],
         [-0.1203,  0.4492],
         [ 0.4058, -0.1481]], requires_grad=True),
 Parameter containing:
 tensor([-0.5269,  0.0564, -0.5545, -0.4782,  0.6395], requires_grad=True),
 Parameter containing:
 tensor([[ 0.1453, -0.0690, -0.3936, -0.2466,  0.1901],
         [-0.2212, -0.2437,  0.1502,  0.3247, -0.0909],
         [ 0.1654, -0.3885, -0.0308,  0.2897,  0.3328],
         [ 0.4319,  0.3137,  0.2287, -0.1159,  0.2526],
         [-0.0755, -0.3308, -0.0101,  0.0570,  0.2057],
         [-0.0274, -0.1680,  0.2784, -0.4458, -0.3221],
         [-0.4331,  0.1470, -0.0375,  0.3969,  0.3828],
         [ 0.0179, -0.0319, -0.2022, -0.0397,  0.2068],
         [ 0.3457,  0.0375, -0.0965, -0.0563, -0.2741],
         [ 0.4264, -0.0435,  0.2967,  0.4086, -0.3839]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0791, -0.2150, -0.1478, -0.4296,  0.0831, -0.3926,  0.1757,  0.4294,
  

In [75]:
# you can get the number of parameters
num = sum([p.numel() for p in model.parameters() if p.requires_grad])
print(num)

108


## Weight Initialization
 - https://pytorch.org/docs/stable/nn.init.html

In [80]:
import torch
from torch import nn

Fin = 5000
Fout = 1000
w = torch.zeros(141, Fin)
nn.init.kaiming_uniform_(w, mode="fan_in", nonlinearity='relu')
print(w.std())
print(torch.sqrt(torch.tensor(2/Fin)))

w = torch.zeros(Fout, 212)
nn.init.kaiming_uniform_(w, mode="fan_out", nonlinearity="relu")
print(w.std())
print(torch.sqrt(torch.tensor(2/Fout)))

# CNN?
N=32
C=64
H=6
W=10
w = torch.zeros(N,C,H,W)
nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
print(w.std())
print(torch.sqrt(torch.tensor(2/(C*H*W))))

tensor(0.0200)
tensor(0.0200)
tensor(0.0447)
tensor(0.0447)
tensor(0.0228)
tensor(0.0228)
