In [1]:
import numpy as np
import torch
from torch import nn

import tensorflow as tf
from tensorflow.keras.layers import Dense

from tqdm import tqdm
from torchsummary import summary

np.random.seed(123)
torch.manual_seed(123)
tf.set_random_seed(123)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
N = 500  # Input size
H = 100  # Hidden layer size
O = 10   # Output size

w1 = np.random.randn(N, H)
b1 = np.random.randn(H)

w2 = np.random.randn(H, O)
b2 = np.random.randn(O)

In [3]:
""" Numpy implementation
"""

def ffpass_np(x):
    a1 = np.dot(x, w1) + b1   # affine
    r = np.maximum(0, a1)     # ReLU
    a2 = np.dot(r, w2) + b2   # affine
    
    exps = np.exp(a2 - np.max(a2))  # softmax
    out = exps / exps.sum()
    return out

In [4]:
x0 = np.random.random((N,))
out_np    = ffpass_np(x0)
out_np

array([1.49472312e-238, 3.22041051e-089, 1.93599195e-182, 1.40464542e-105,
       2.90736603e-025, 5.86590900e-086, 2.10487618e-051, 1.00000000e+000,
       5.19955060e-095, 4.19231708e-034])

## Create the model

### TensorFlow

In [5]:
""" Keras implementation

1. input_dim versus input_shape
Instead of input_dim also can be used input_shape but as a tuple instead of 
an integer. Example:

    model_tf.add(Dense(H, activation='relu', use_bias=True, input_dim=N))
    model_tf.add(Dense(O, activation='softmax', use_bias=True, input_dim=O))

`units` or first parameters is the output

2. `output` not necessary after first declaration. 
Instead of:

  model_tf.add(Dense(units=O, activation='softmax', use_bias=True, 
               input_shape=(O,) ))

Use this:

   model_tf.add(Dense(units=O, activation='softmax', use_bias=True))             

3. Unnecessary:
    sess = tf.InteractiveSession()
    sess.run(tf.initialize_all_variables())

    Replace with: tf.global_variables_initializer

See: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense
"""
# sess = tf.InteractiveSession()
# sess.run(tf.initialize_all_variables())
tf.global_variables_initializer

model_tf = tf.keras.Sequential()
model_tf.add(Dense(units=H, activation='relu', use_bias=True, 
              input_shape=(N,) ))

model_tf.add(Dense(units=O, activation='softmax', use_bias=True))             
    
def ffpass_tf(x):
    xr = x.reshape((1, x.size))
    return model_tf.predict(xr)[0]

model_tf.summary()    

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               50100     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1010      
Total params: 51,110
Trainable params: 51,110
Non-trainable params: 0
_________________________________________________________________


### PyTorch

In [6]:
model_pt = nn.Sequential(nn.Linear(N, H),
                      nn.ReLU(),
                      nn.Linear(H, O),
                      nn.Softmax(dim=1)
                      )


def ffpass_torch(x):
    xr = x.reshape((1, x.size))
    # xr = torch.tensor(xr, dtype=torch.float32)
    xr = torch.from_numpy(xr).float()
    return model_pt(xr)

print(model_pt)
summary(model_pt, input_size=(N,))

Sequential(
  (0): Linear(in_features=500, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
  (3): Softmax(dim=1)
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 100]          50,100
              ReLU-2                  [-1, 100]               0
            Linear-3                   [-1, 10]           1,010
           Softmax-4                   [-1, 10]               0
Total params: 51,110
Trainable params: 51,110
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.19
Estimated Total Size (MB): 0.20
----------------------------------------------------------------


## Tensorflow 
### Assign weights and biases

In [7]:
# before assignment
w0_tf = model_tf.get_weights()[0]
w0_tf.shape

(500, 100)

In [8]:
# assign weights and biases to model
model_tf.get_layer(index=0).set_weights([w1, b1])
model_tf.get_layer(index=1).set_weights([w2, b2])

In [9]:
x0 = np.random.random((N,))
# x0 /= sum(x0)

out_np    = ffpass_np(x0)
out_keras = ffpass_tf(x0)

np.allclose(out_np, out_keras, 1e-4)

True

### Get weights from model

In [10]:
# first set of weights
w0_tf = model_tf.get_weights()[0]
w0_tf.shape
print(w0_tf.sum())
w2_tf = model_tf.get_weights()[2]
print(w2_tf.sum())

310.53552
-24.158245


TensorFlow returns the weights as an `500x100` array

In [11]:
print(np.allclose(model_tf.get_weights()[0], w1))
print(np.allclose(model_tf.get_weights()[2], w2))

True
True


### Get biases from model

In [12]:
# b2 is equal to model.get_weights()[3]
print(np.allclose(model_tf.get_weights()[1], b1))
print(np.allclose(model_tf.get_weights()[3], b2))

True
True


In [13]:
print(model_tf.get_weights()[0].sum())
print(model_tf.get_weights()[2].sum())
print(model_tf.get_weights()[1].sum())
print(model_tf.get_weights()[3].sum())

310.53552
-24.158245
20.29108
-0.105651826


## PyTorch

### Assign weights and biases

In [14]:
model_pt.state_dict().keys()

odict_keys(['0.weight', '0.bias', '2.weight', '2.bias'])

In [15]:
print(model_pt.state_dict()['0.weight'].shape)
print(model_pt.state_dict()['2.weight'].shape)
print(model_pt.state_dict()['0.bias'].shape)
print(model_pt.state_dict()['2.bias'].shape)

torch.Size([100, 500])
torch.Size([10, 100])
torch.Size([100])
torch.Size([10])


In [16]:
# transform initial weight from numpy to tensor
w1_pt_t = torch.from_numpy(w1).float()
print(w1_pt_t.shape)  # torch.Size([500, 100])
# not the sane as weight shape in the model
model_pt.state_dict()['0.weight'].shape
# transpose w0_pt_t
w1_pt_tt = torch.transpose(w1_pt_t, 1, 0)
w1_pt_tt = torch.transpose(w1_pt_t, 0, 1)
print(w1_pt_tt.shape)

# is it now the same shape as model weights
print(model_pt.state_dict()['0.weight'].shape == w1_pt_tt.shape)

torch.Size([500, 100])
torch.Size([100, 500])
True


In [17]:
print(w1_pt_tt.shape)
w1_pt_tt.sum(dim=1)

torch.Size([100, 500])


tensor([ 22.8438,  12.5663,  -4.4961,  15.9776,  16.3610,   1.6605,  31.5816,
        -24.2807,  11.3892,  33.1832, -17.7031,  12.2675,  36.2100,  14.4991,
          5.7659, -20.2631,  24.5781,  -0.6123, -30.3839, -22.6307,  15.0225,
        -23.4310,  -8.9231, -71.4337,  12.9205,  -2.0020, -13.5540,  30.6130,
          3.8229,  18.2278,   0.6141,  52.4337,  -0.4741,  -2.1770,   9.8585,
         15.1644,  37.5307, -27.1223, -14.3779,  49.4858, -15.1746, -27.9507,
        -15.6749,  13.1125, -29.7577,  51.3295,  -9.8607, -14.1429,  15.4918,
         26.2938,  15.4510,  21.1130,  -8.8056, -12.9031,  -9.6456,   4.7877,
         -7.5941, -24.7822,  -1.2331,  21.0113, -67.8460,   7.8663,  49.2412,
        -11.1162,   3.3012,  -9.0628,   2.4740, -11.6921,   1.1180, -15.9931,
         51.8490,   0.5338,  -0.7061,  -5.2812,  27.6975,  26.2964,   2.2344,
         -6.9736, -16.8031, -36.6282, -35.9391,  31.9341,  -7.7523,  12.6896,
         -9.4399, -10.7352,  28.9524,  24.7135,   1.3597,  44.47

In [18]:
# assign the input weight #1 to the model
model_pt.state_dict()['0.weight'].copy_(w1_pt_tt)

tensor([[-1.0856,  0.6421,  0.7033,  ...,  1.0819,  1.6859, -1.8758],
        [ 0.9973, -1.9779, -0.5981,  ...,  0.5102,  0.3307, -0.5001],
        [ 0.2830,  0.7123,  2.2007,  ..., -0.0393, -1.3596, -0.4552],
        ...,
        [-1.3635, -0.1109,  0.4157,  ..., -0.6561, -0.1741, -0.4237],
        [ 0.3794, -0.3413,  0.1605,  ..., -0.5363,  0.1477, -1.0752],
        [-0.3792, -0.2179,  0.8198,  ..., -0.8757, -0.3540, -1.4866]])

In [19]:
# did it get assigned to the model?
print(model_pt.state_dict()['0.weight'])

tensor([[-1.0856,  0.6421,  0.7033,  ...,  1.0819,  1.6859, -1.8758],
        [ 0.9973, -1.9779, -0.5981,  ...,  0.5102,  0.3307, -0.5001],
        [ 0.2830,  0.7123,  2.2007,  ..., -0.0393, -1.3596, -0.4552],
        ...,
        [-1.3635, -0.1109,  0.4157,  ..., -0.6561, -0.1741, -0.4237],
        [ 0.3794, -0.3413,  0.1605,  ..., -0.5363,  0.1477, -1.0752],
        [-0.3792, -0.2179,  0.8198,  ..., -0.8757, -0.3540, -1.4866]])


In [20]:
w2_pt_tt = torch.transpose(torch.from_numpy(w2).float(), 1, 0)
w2_pt_tt = torch.transpose(torch.from_numpy(w2).float(), 0, 1)
w2_pt_tt.shape

torch.Size([10, 100])

In [21]:
# assign the input weight #2 to the model
model_pt.state_dict()['2.weight'].copy_(w2_pt_tt)

tensor([[ 1.1129e+00, -5.1684e-02,  1.5901e+00, -5.2124e-01, -8.7518e-01,
          8.5238e-01,  5.2425e-02, -1.0278e+00, -1.1003e+00,  3.6948e-01,
          6.5560e-01,  2.6299e-01,  1.0152e-01, -1.1134e+00, -3.2101e-01,
          8.5195e-01, -1.6749e+00,  2.0671e-01,  3.5323e-01,  1.5109e+00,
         -1.3764e+00,  4.9770e-01,  1.1479e+00,  2.0785e+00, -8.8207e-01,
         -3.3461e-01, -1.2842e+00, -2.6498e+00,  1.2119e-01,  7.4749e-01,
          6.5733e-01, -2.6226e+00, -2.1981e-01,  7.2719e-01,  6.4081e-01,
          2.5889e+00, -1.1228e+00,  3.8816e-01, -2.8974e-01,  5.0304e-01,
         -3.4724e-02,  5.7725e-01,  1.9024e+00, -4.8744e-01,  6.6559e-01,
         -7.3430e-01, -7.7303e-01, -6.2408e-01, -1.1740e-01, -1.9652e+00,
         -1.7360e-01, -1.4965e-02, -1.3572e+00, -1.0593e+00,  1.1157e+00,
          9.3877e-01,  2.5633e-01,  1.9308e+00, -7.7957e-01, -5.0439e-01,
         -6.0820e-01,  8.4222e-01,  1.1120e-01,  8.0944e-02,  4.0295e-01,
         -1.0022e-01, -4.6749e-01, -9.

In [22]:
# did it get assigned to the model?
print(model_pt.state_dict()['2.weight'])

tensor([[ 1.1129e+00, -5.1684e-02,  1.5901e+00, -5.2124e-01, -8.7518e-01,
          8.5238e-01,  5.2425e-02, -1.0278e+00, -1.1003e+00,  3.6948e-01,
          6.5560e-01,  2.6299e-01,  1.0152e-01, -1.1134e+00, -3.2101e-01,
          8.5195e-01, -1.6749e+00,  2.0671e-01,  3.5323e-01,  1.5109e+00,
         -1.3764e+00,  4.9770e-01,  1.1479e+00,  2.0785e+00, -8.8207e-01,
         -3.3461e-01, -1.2842e+00, -2.6498e+00,  1.2119e-01,  7.4749e-01,
          6.5733e-01, -2.6226e+00, -2.1981e-01,  7.2719e-01,  6.4081e-01,
          2.5889e+00, -1.1228e+00,  3.8816e-01, -2.8974e-01,  5.0304e-01,
         -3.4724e-02,  5.7725e-01,  1.9024e+00, -4.8744e-01,  6.6559e-01,
         -7.3430e-01, -7.7303e-01, -6.2408e-01, -1.1740e-01, -1.9652e+00,
         -1.7360e-01, -1.4965e-02, -1.3572e+00, -1.0593e+00,  1.1157e+00,
          9.3877e-01,  2.5633e-01,  1.9308e+00, -7.7957e-01, -5.0439e-01,
         -6.0820e-01,  8.4222e-01,  1.1120e-01,  8.0944e-02,  4.0295e-01,
         -1.0022e-01, -4.6749e-01, -9.

In [23]:
b1.shape
torch.from_numpy(b1).float()

tensor([ 1.9451e+00,  4.0798e-01,  2.1048e+00,  8.0404e-01, -1.3158e+00,
        -7.6591e-01, -2.7069e-01,  1.6593e-01,  1.0397e+00, -4.8862e-01,
         6.8756e-01, -1.4139e-02,  1.1679e+00, -1.4800e+00, -6.1002e-01,
         1.9840e+00, -9.6058e-01,  1.1664e+00, -6.0536e-01,  9.5474e-01,
         8.2983e-01,  2.3849e+00, -3.7025e-01, -7.8233e-01,  1.4159e-01,
        -6.7888e-01,  7.7579e-01,  8.7329e-01, -8.4585e-01,  1.6641e+00,
         5.0839e-01,  1.6293e+00, -8.0582e-01, -1.2374e+00,  1.4533e+00,
        -1.1955e+00,  4.1556e-01,  1.7001e+00,  6.6412e-01, -9.8218e-01,
        -4.6998e-01, -1.4745e+00,  4.2507e-01,  5.2342e-02,  1.3419e-01,
        -1.6378e-01, -3.7818e-01, -3.2270e-01, -1.3531e+00, -7.3084e-01,
         7.8842e-01,  4.0034e-01,  1.7838e+00, -5.6231e-01,  1.2295e+00,
         1.6651e+00,  6.6585e-02, -1.3740e-01,  1.1277e+00,  1.2111e+00,
        -1.5957e-01, -1.1463e-02,  1.8059e+00,  2.8191e-01, -8.0577e-01,
         4.8367e-01,  7.0699e-01, -8.8523e-02,  2.5

In [24]:
b1_pt_t = torch.from_numpy(b1).float()
# assign the input bias #1 to the model
model_pt.state_dict()['0.bias'].copy_(b1_pt_t)

b2_pt_t = torch.from_numpy(b2).float()
# assign the input bias #1 to the model
model_pt.state_dict()['2.bias'].copy_(b2_pt_t)

tensor([-0.1374,  0.5375,  0.3599, -0.1966, -0.0126,  0.2115, -1.2572, -0.0510,
         0.4954, -0.0551])

### Get weights from model

In [25]:
print(model_pt.state_dict()['0.weight'].sum())
print(model_pt.state_dict()['2.weight'].sum())

tensor(310.5356)
tensor(-24.1582)


In [26]:
print(np.allclose(model_pt.state_dict()['0.weight'], w1_pt_tt))
print(np.allclose(model_pt.state_dict()['2.weight'], w2_pt_tt))

True
True


### Get biases from model

In [27]:
print(model_pt.state_dict()['0.bias'].sum())
print(model_pt.state_dict()['2.bias'].sum())

print(np.allclose(model_pt.state_dict()['0.bias'], b1_pt_t))
print(np.allclose(model_pt.state_dict()['2.bias'], b2_pt_t))

tensor(20.2911)
tensor(-0.1057)
True
True


In [28]:
out_np    = ffpass_np(x0)
out_torch = ffpass_torch(x0)

# np.allclose(out_np, out_keras, 1e-4)
print(out_np)
print(out_torch)

[1.55103949e-145 2.20411161e-056 3.73912060e-089 7.52555573e-082
 1.80279742e-038 8.51485967e-029 3.01765540e-086 1.00000000e+000
 1.58494512e-042 3.23202418e-018]
tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.8028e-38, 8.5148e-29,
         0.0000e+00, 1.0000e+00, 1.5849e-42, 3.2320e-18]],
       grad_fn=<SoftmaxBackward>)


In [29]:
print(model_pt.state_dict()['0.weight'].sum())
print(model_pt.state_dict()['2.weight'].sum())
print(model_pt.state_dict()['0.bias'].sum())
print(model_pt.state_dict()['2.bias'].sum())

tensor(310.5356)
tensor(-24.1582)
tensor(20.2911)
tensor(-0.1057)


In [30]:
def ffpass_torch(x):
    xr = x.reshape((1, x.size))
    # xr = torch.tensor(xr, dtype=torch.float32)
    xr = torch.from_numpy(xr).float()
    return model_pt(xr)

In [31]:
x0.shape

(500,)

In [32]:
x0 = np.random.random((N,))
xr = x0.reshape((1, x0.size))
xr = torch.from_numpy(xr).float()
# xr = xr.t()
xr.shape

model_pt(xr)


tensor([[0.0000e+00, 8.0899e-31, 0.0000e+00, 0.0000e+00, 0.0000e+00, 4.0638e-44,
         5.3025e-36, 1.0000e+00, 0.0000e+00, 5.0871e-25]],
       grad_fn=<SoftmaxBackward>)

In [33]:
model_pt.forward(xr)

tensor([[0.0000e+00, 8.0899e-31, 0.0000e+00, 0.0000e+00, 0.0000e+00, 4.0638e-44,
         5.3025e-36, 1.0000e+00, 0.0000e+00, 5.0871e-25]],
       grad_fn=<SoftmaxBackward>)

In [34]:
import torch.nn.functional as F

class LR(nn.Module):
  def __init__(self):
    super().__init__()  # always
    self.linear1 = nn.Linear(N, H)
    self.linear2 = nn.Linear(H, O)

  def forward(self, x):
    x = F.relu(self.linear1(x))
    x = F.softmax(self.linear2(x), dim=1)
    return x

model = LR()
print(model)

LR(
  (linear1): Linear(in_features=500, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=10, bias=True)
)


In [35]:
model.state_dict().keys()

odict_keys(['linear1.weight', 'linear1.bias', 'linear2.weight', 'linear2.bias'])

In [36]:
print(model.state_dict()['linear1.weight'].sum())
print(model.state_dict()['linear2.weight'].sum())
print(model.state_dict()['linear1.bias'].sum())
print(model.state_dict()['linear2.bias'].sum())

tensor(4.6353)
tensor(-2.1698)
tensor(0.0626)
tensor(-0.0637)


In [37]:
w1_tt = torch.transpose(torch.from_numpy(w1).float(), 0, 1)
_ = model.state_dict()['linear1.weight'].copy_(w1_tt)
w2_tt = torch.transpose(torch.from_numpy(w2).float(), 0, 1)
_ = model.state_dict()['linear2.weight'].copy_(w2_tt)

b1_pt_t = torch.from_numpy(b1).float()
# assign the input bias #1 to the model
model.state_dict()['linear1.bias'].copy_(b1_pt_t)

b2_pt_t = torch.from_numpy(b2).float()
# assign the input bias #1 to the model
_ = model.state_dict()['linear2.bias'].copy_(b2_pt_t)

In [38]:
print(model.state_dict()['linear1.weight'].sum())
print(model.state_dict()['linear2.weight'].sum())
print(model.state_dict()['linear1.bias'].sum())
print(model.state_dict()['linear2.bias'].sum())

tensor(310.5356)
tensor(-24.1582)
tensor(20.2911)
tensor(-0.1057)


In [39]:
x0 = np.random.random((N,))
x0 = x0.reshape((1, x0.size))
x0_t = torch.from_numpy(x0).float()
model_x0_t = model(x0_t)
model_x0_t

tensor([[0.0000e+00, 1.4159e-41, 0.0000e+00, 0.0000e+00, 1.0696e-29, 0.0000e+00,
         0.0000e+00, 1.0000e+00, 0.0000e+00, 1.9758e-43]],
       grad_fn=<SoftmaxBackward>)

In [40]:
# comparing numpy versus model output
out_np    = ffpass_np(x0)
out_torch = ffpass_torch(x0)

np.allclose(out_np, model_x0_t.detach().numpy(), 1e-4)

True

In [41]:
out_np

array([[5.22034914e-187, 1.41593296e-041, 5.71192629e-129,
        2.03546147e-106, 1.06955293e-029, 1.26702997e-050,
        1.82215367e-079, 1.00000000e+000, 3.84671549e-058,
        1.96980493e-043]])

In [42]:
model_x0_t.detach().numpy()

array([[0.0000000e+00, 1.4158720e-41, 0.0000000e+00, 0.0000000e+00,
        1.0695545e-29, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
        0.0000000e+00, 1.9758308e-43]], dtype=float32)