In [1]:
# Dependencies.
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np

In [2]:
# Set constants.
BATCH_SIZE = 4
IN_DIM = 5
OUT_DIM = 2

In [3]:
# Set model, optimizer, inputs, and outputs.
model = nn.Linear(
    in_features=IN_DIM, out_features=OUT_DIM, bias=True
)
optimizer = optim.SGD(
    params=model.parameters(), lr=1e-3
)
x = torch.randn(size=(BATCH_SIZE, IN_DIM))
y = torch.randn(size=(BATCH_SIZE, OUT_DIM))

In [4]:
print('*'*79)
print('model weights:\n{}'.format(model.weight.data.clone().detach().numpy()))
print('*'*79)
print('model biases:\n{}'.format(model.bias.data.clone().detach().numpy()))

*******************************************************************************
model weights:
[[ 0.4377448   0.11098593  0.18216628  0.2425034   0.05848342]
 [ 0.15746039  0.00617778  0.4180209  -0.20695996 -0.01081291]]
*******************************************************************************
model biases:
[-0.43076822 -0.32568341]


In [5]:
# Define initialization values as some numpy arrays.
print('*'*79)
weights_np = np.random.randint(0, 10, [2, 5])*1e-1
print('new weights as np array:\n{}'.format(weights_np))
print('*'*79)
biases_np = np.random.randint(0, 10, [2, ])*1e-1
print('new biases as np array:\n{}'.format(biases_np))

*******************************************************************************
new weights as np array:
[[0.3 0.5 0.2 0.6 0.7]
 [0.  0.7 0.4 0.5 0.9]]
*******************************************************************************
new biases as np array:
[0.3 0. ]


In [6]:
model.weight.data = torch.from_numpy(weights_np).requires_grad_().float()
model.bias.data = torch.from_numpy(biases_np).requires_grad_().float()

In [7]:
print('*'*79)
print('model weights after initialization from numpy:\n{}'.format(model.weight.data.clone().detach().numpy()))
print('*'*79)
print('model biases after initialization from numpy:\n{}'.format(model.bias.data.clone().detach().numpy()))

*******************************************************************************
model weights after initialization from numpy:
[[0.3 0.5 0.2 0.6 0.7]
 [0.  0.7 0.4 0.5 0.9]]
*******************************************************************************
model biases after initialization from numpy:
[0.3 0. ]


In [8]:
model = model.train()
optimizer.zero_grad()
y_pred = model(x)
loss = torch.mean((y - y_pred)**2)
loss.backward()

In [9]:
# Save old weights and biases.
weights_before_reinit = model.weight.data.clone().detach().numpy()
biases_before_reinit = model.bias.data.clone().detach().numpy()

In [10]:
print('*'*79)
print('model weights before re-initialization:\n{}'.format(weights_before_reinit))
print('*'*79)
print('model biases before re-initialization:\n{}'.format(biases_before_reinit))

*******************************************************************************
model weights before re-initialization:
[[0.3 0.5 0.2 0.6 0.7]
 [0.  0.7 0.4 0.5 0.9]]
*******************************************************************************
model biases before re-initialization:
[0.3 0. ]


In [11]:
# Save gradients.
d_weights = model.weight.grad.data.clone().detach().numpy()
d_biases = model.bias.grad.data.clone().detach().numpy()

In [12]:
print('*'*79)
print('delta weights:\n{}'.format(d_weights))
print('*'*79)
print('delta biases:\n{}'.format(d_biases))

*******************************************************************************
delta weights:
[[ 0.4065137   0.2567979  -0.03359173  0.02399254  0.2703929 ]
 [ 0.10137926 -0.44343656  0.15873618  0.05178709  0.46842384]]
*******************************************************************************
delta biases:
[-0.2774894   0.51822275]


In [13]:
# Re-initialize weights manually to some other values.
print('*'*79)
weights_np_new = np.random.randint(0, 10, [2, 5])*1e-1
print('new weights for re-initialization as np array:\n{}'.format(weights_np_new))
print('*'*79)
biases_np_new = np.random.randint(0, 10, [2, ])*1e-1
print('new biases for re-initialization as np array:\n{}'.format(biases_np_new))

*******************************************************************************
new weights for re-initialization as np array:
[[0.6 0.1 0.5 0.2 0.5]
 [0.7 0.2 0.2 0.9 0.6]]
*******************************************************************************
new biases for re-initialization as np array:
[0.5 0.8]


In [14]:
# Re-initialize weights.
model.weight.data = torch.from_numpy(weights_np_new).requires_grad_().float()
model.bias.data = torch.from_numpy(biases_np_new).requires_grad_().float()

In [15]:
# Save weights after re-initialization.
weights_after_reinit = model.weight.data.clone().detach().numpy()
biases_after_reinit = model.bias.data.clone().detach().numpy()

In [16]:
print('*'*79)
print('model weights after re-initialization:\n{}'.format(weights_after_reinit))
print('*'*79)
print('model biases after re-initialization:\n{}'.format(biases_after_reinit))

*******************************************************************************
model weights after re-initialization:
[[0.6 0.1 0.5 0.2 0.5]
 [0.7 0.2 0.2 0.9 0.6]]
*******************************************************************************
model biases after re-initialization:
[0.5 0.8]


In [17]:
# Step the optimizer.
optimizer.step()

In [18]:
# Weights after step.
weights_after_reinit_and_step = model.weight.data.clone().detach().numpy()
biases_after_reinit_and_step = model.bias.data.clone().detach().numpy()

In [19]:
print('*'*79)
print('model weights after re-initialization and step:\n{}'.format(weights_after_reinit_and_step))
print('*'*79)
print('model biases after re-initialization and step:\n{}'.format(biases_after_reinit_and_step))

*******************************************************************************
model weights after re-initialization and step:
[[0.5995935  0.0997432  0.5000336  0.19997601 0.4997296 ]
 [0.6998986  0.20044345 0.19984126 0.8999482  0.5995316 ]]
*******************************************************************************
model biases after re-initialization and step:
[0.50027746 0.7994818 ]


In [20]:
print('*'*79)
print('weight updates are consistent:\n{}'.format(
    np.linalg.norm(weights_after_reinit_and_step - (weights_after_reinit - 1.0*1e-3*d_weights), ord=2),
))
print('*'*79)
print('bias updates are consistent:\n{}'.format(
    np.linalg.norm(biases_after_reinit_and_step - (biases_after_reinit - 1.0*1e-3*d_biases), ord=2),
))

*******************************************************************************
weight updates are consistent:
0.0
*******************************************************************************
bias updates are consistent:
0.0
