In [1]:
import numpy as np
import torch
from torch import nn
from tqdm import tqdm

np.random.seed(123)
torch.manual_seed(123)


<torch._C.Generator at 0x7f524f7a8f90>

In [2]:
N = 500  # Input size
H = 100  # Hidden layer size
O = 10   # Output size

w1 = np.random.randn(N, H)
b1 = np.random.randn(H)

w2 = np.random.randn(H, O)
b2 = np.random.randn(O)


""" Numpy implementation
"""

def ffpass_np(x):
    a1 = np.dot(x, w1) + b1   # affine
    r = np.maximum(0, a1)    # ReLU
    a2 = np.dot(r, w2) + b2  # affine
    
    exps = np.exp(a2 - np.max(a2))  # softmax
    out = exps / exps.sum()
    return out
    

In [3]:
# x0 = np.random.random((N,))
# out_np    = ffpass_np(x0)
# print(x0.shape)
# print(out_np)
# print(sum(out_np))


In [4]:
# Build a feed-forward network
N = 500  # Input size
H = 100  # Hidden layer size
O = 10   # Output size

model = nn.Sequential(nn.Linear(N, H),
                      nn.ReLU(),
                      nn.Linear(H, O),
                      nn.Softmax(dim=0)
                      )
print(model)

Sequential(
  (0): Linear(in_features=500, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
  (3): Softmax(dim=0)
)


In [5]:
# list(model.parameters())

In [6]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[-0.0182,  0.0015, -0.0222,  ...,  0.0014,  0.0004,  0.0195],
                      [-0.0169,  0.0028,  0.0002,  ...,  0.0229,  0.0109, -0.0024],
                      [-0.0014, -0.0174, -0.0063,  ...,  0.0109, -0.0307, -0.0161],
                      ...,
                      [-0.0127,  0.0186, -0.0414,  ..., -0.0052, -0.0063, -0.0277],
                      [-0.0388,  0.0157, -0.0401,  ..., -0.0091,  0.0390,  0.0034],
                      [ 0.0002,  0.0251, -0.0419,  ...,  0.0051,  0.0196, -0.0079]])),
             ('0.bias',
              tensor([-0.0380, -0.0192, -0.0078, -0.0174, -0.0295, -0.0313, -0.0081,  0.0119,
                       0.0055,  0.0182, -0.0340,  0.0008,  0.0389, -0.0267,  0.0278,  0.0197,
                       0.0309, -0.0046, -0.0140,  0.0253, -0.0049, -0.0114, -0.0316,  0.0275,
                       0.0442, -0.0292, -0.0281, -0.0067,  0.0280,  0.0038, -0.0101,  0.0358,
                      -0.0360, -0.0419, 

In [7]:
model.state_dict().keys()

odict_keys(['0.weight', '0.bias', '2.weight', '2.bias'])

In [8]:
# print(w1)
print(w1.shape)
# print(w2)
print(w2.shape)
print(b1.shape)
print(b2.shape)

(500, 100)
(100, 10)
(100,)
(10,)


In [9]:
print(w1.sum(axis=0))  # numpy sum on axis

[ 22.84378573  12.56628603  -4.49614678  15.97763683  16.36104951
   1.66047005  31.58154144 -24.28073148  11.38918687  33.18315452
 -17.7030545   12.2674793   36.21000578  14.49912131   5.76589725
 -20.26305647  24.57815156  -0.6123041  -30.38395154 -22.63070129
  15.02246324 -23.43098839  -8.92305517 -71.43368671  12.92047565
  -2.00201329 -13.55404491  30.61295723   3.82290092  18.22779126
   0.6141344   52.43367301  -0.47411856  -2.17697884   9.8584961
  15.16443305  37.53068081 -27.12229942 -14.3778997   49.48576712
 -15.17455452 -27.95068053 -15.67491345  13.11250229 -29.75769356
  51.32944554  -9.86068001 -14.14289491  15.49182101  26.29384836
  15.45101129  21.11299288  -8.80564444 -12.90315271  -9.6455751
   4.78768298  -7.59410255 -24.78215372  -1.2330846   21.01129674
 -67.84602423   7.8663358   49.24122734 -11.11620235   3.30115769
  -9.06280129   2.47396198 -11.69213553   1.11795738 -15.99310514
  51.84906532   0.53380093  -0.70615098  -5.28115963  27.69747067
  26.2963512

In [10]:
print(model.state_dict()['0.weight'].shape)
print(model.state_dict()['2.weight'].shape)
print(model.state_dict()['0.bias'].shape)
print(model.state_dict()['2.bias'].shape)

torch.Size([100, 500])
torch.Size([10, 100])
torch.Size([100])
torch.Size([10])


In [11]:
# Customize the weight and bias
model.state_dict()['0.weight'].copy_(torch.from_numpy(w1.transpose()).float())
model.state_dict()['2.weight'].copy_(torch.from_numpy(w2.transpose()).float())
model.state_dict()['0.bias'].copy_(torch.from_numpy(b1.transpose()).float())
model.state_dict()['2.bias'].copy_(torch.from_numpy(b2.transpose()).float())

# list(model.parameters())
# model.state_dict()['0.weight']
# model.state_dict()['2.weight']

tensor([-0.1374,  0.5375,  0.3599, -0.1966, -0.0126,  0.2115, -1.2572, -0.0510,
         0.4954, -0.0551])

### Weights after assignment

In [29]:
model.state_dict()['0.weight'].sum(dim=0)
model.state_dict()['0.weight'].sum(dim=1)

tensor([ 22.8438,  12.5663,  -4.4961,  15.9776,  16.3611,   1.6605,  31.5815,
        -24.2807,  11.3892,  33.1832, -17.7031,  12.2675,  36.2100,  14.4991,
          5.7659, -20.2631,  24.5781,  -0.6123, -30.3839, -22.6307,  15.0225,
        -23.4310,  -8.9231, -71.4337,  12.9205,  -2.0020, -13.5540,  30.6130,
          3.8229,  18.2278,   0.6141,  52.4337,  -0.4741,  -2.1770,   9.8585,
         15.1644,  37.5307, -27.1223, -14.3779,  49.4858, -15.1746, -27.9507,
        -15.6749,  13.1125, -29.7577,  51.3294,  -9.8607, -14.1429,  15.4918,
         26.2939,  15.4510,  21.1130,  -8.8056, -12.9032,  -9.6456,   4.7877,
         -7.5941, -24.7822,  -1.2331,  21.0113, -67.8460,   7.8663,  49.2412,
        -11.1162,   3.3012,  -9.0628,   2.4740, -11.6921,   1.1180, -15.9931,
         51.8491,   0.5338,  -0.7062,  -5.2812,  27.6975,  26.2963,   2.2344,
         -6.9736, -16.8031, -36.6282, -35.9391,  31.9341,  -7.7523,  12.6896,
         -9.4399, -10.7352,  28.9524,  24.7135,   1.3597,  44.47

In [25]:
sum(model.state_dict()['0.weight'].sum(dim=0)) == sum(model.state_dict()['0.weight'].sum(dim=0))


tensor(True)

In [26]:
sum(model.state_dict()['2.weight'].sum(dim=0)) == sum(model.state_dict()['2.weight'].sum(dim=1))

tensor(True)

In [13]:
# xr = x0.reshape((1, x0.size))
# print(xr.shape)
# xr = torch.tensor(xr, dtype=torch.float32)
# print(xr.shape)
# print(xr.dtype)
# # model(xr)

In [14]:
def ffpass_torch(x):
    xr = x.reshape((1, x.size))
    # xr = torch.tensor(xr, dtype=torch.float32)
    xr = torch.from_numpy(xr).float()
    return model(xr)


In [15]:
x0 = np.random.random((N,))

out_np     = ffpass_np(x0)
out_torch_ = ffpass_torch(x0)
out_torch  = out_torch_.detach().numpy()

np.allclose(out_np, out_torch, 1e-8)

False

In [16]:
out_np

array([1.49472312e-238, 3.22041051e-089, 1.93599195e-182, 1.40464542e-105,
       2.90736603e-025, 5.86590900e-086, 2.10487618e-051, 1.00000000e+000,
       5.19955060e-095, 4.19231708e-034])

In [17]:
out_torch

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], dtype=float32)

In [18]:
ffpass_torch(x0)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], grad_fn=<SoftmaxBackward>)