In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np 
import torch.optim as optim 
import sys 
import os 
import matplotlib.pyplot as plt

In [18]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.dense =  nn.Linear(3,2)

    def forward(self, x):
        return self.dense(x) 

In [19]:
net = Net() 
print("Printing dense layer weights...")
print(net.dense.weight.data)
print(net.dense.bias.data)
print("finished printing dense layer weights")

Printing dense layer weights...
tensor([[-0.0117,  0.1319,  0.4879],
        [ 0.4381, -0.2224,  0.4127]])
tensor([-0.1199,  0.4131])
finished printing dense layer weights


<h2>Manually set the layer weights</h2>
Setting the weights to match those used in the pyunit test case. This is usually better than 
relying on random seeds. 

Also computing the output directly as 

$y_{pred}=xw^{T}+b$

In [25]:
model_w = np.array([[1, 3, -1], [0, -4, 2]])
model_b = np.array([-3, 2])
w_tensor = torch.from_numpy(model_w).float()
b_tensor = torch.from_numpy(model_b).float() 
w = nn.Parameter(w_tensor)
b = nn.Parameter(b_tensor)
net.dense.weight  = w 
net.dense.bias = b
x = np.array([[1, -1], [2, 3], [-1, -2]]).T
print("#------ ")
print("model_w=np.{}".format(repr(model_w)))
print("model_b=np.{}".format(repr(model_b)))
print("x=np.{}".format(repr(x)))
yy = x@model_w.T + model_b 
print("# ---- expected final value (directly computed)----")
print("y-predicted=np.{}".format(repr(yy)))

#------ 
model_w=np.array([[ 1,  3, -1],
       [ 0, -4,  2]])
model_b=np.array([-3,  2])
x=np.array([[ 1,  2, -1],
       [-1,  3, -2]])
# ---- expected final value (directly computed)----
y-predicted=np.array([[  5,  -8],
       [  7, -14]])


In [28]:
input = torch.from_numpy(x).float() 
print("Input Shape:{}".format(input.shape))
output = net(input)
print("Output:{}".format(output.data))
y = np.array([[-1, 1], [-3, -1]])
target = torch.from_numpy(y).float()
print("Target:{}".format(target.data))

Input Shape:torch.Size([2, 3])
Output:tensor([[  5.,  -8.],
        [  7., -14.]])
Target:tensor([[-1.,  1.],
        [-3., -1.]])


In [29]:
criteria = nn.MSELoss()
loss = criteria(output, target)
print("loss: {}".format(loss))
optimizer = torch.optim.SGD(net.parameters(), lr=.001)

loss: 96.5


In [30]:
optimizer.zero_grad()
# perform a backward pass (backpropagation)
loss.backward()
# Update the parameters
optimizer.step()

In [31]:
print("Printing wight and weight gradient after one step")
print("Weight:{}".format(net.dense.weight.data))
print("W Grad:{}".format(net.dense.weight.grad.data))
print("\nPrinting bias and bias gradient after one step")
print("Bias:{}".format(net.dense.bias.data))
print("B grad:{}".format(net.dense.bias.grad.data))

Printing the wight and weight gradient after one step
Weight:tensor([[ 1.0020e+00,  2.9790e+00, -9.8700e-01],
        [-2.0000e-03, -3.9715e+00,  1.9825e+00]])
W Grad:tensor([[ -2.0000,  21.0000, -13.0000],
        [  2.0000, -28.5000,  17.5000]])

Printing bias and bias gradient after one step
Bias:tensor([-3.0080,  2.0110])
B grad:tensor([  8., -11.])
