The correponding pyunit test case is in 
<b>tests.core.np.TestDenseLayer.DenseLayerStandAlone#test_basic_op</b>

Note that given $w, b$ and $x$, we calculate 

$y=wx+b$ 

whereas you need to provide  $x^T, w $ and $b$  to pytorch to caculate 

$y_{torch}=xw^T+b$  in which case, $y=y^{T}_{torch}$

Strictly speaking, shape of $b$ should matter but I have not yet tested it. 

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np 
import torch.optim as optim 
import sys 
import os 
import matplotlib.pyplot as plt

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.dense =  nn.Linear(3,2)

    def forward(self, x):
        return self.dense(x) 

In [None]:
net = Net() 
print("Printing dense layer weights...")
print(net.dense.weight.data)
print(net.dense.bias.data)
print("finished printing dense layer weights")

<h2>Manually set the layer weights</h2>
Setting the weights to match those used in the pyunit test case. This is usually better than 
relying on seeding random number generators. 

Also computing the output directly as 

$y_{pred}=xw^{T}+b$

In [None]:
model_w = np.array([[1, 3, -1], [0, -4, 2]])
model_b = np.array([-3, 2])
w_tensor = torch.from_numpy(model_w).float()
b_tensor = torch.from_numpy(model_b).float() 
w = nn.Parameter(w_tensor)
b = nn.Parameter(b_tensor)
net.dense.weight  = w 
net.dense.bias = b
x = np.array([[1, -1], [2, 3], [-1, -2]]).T
print("#------ ")
print("model_w=np.{}".format(repr(model_w)))
print("model_b=np.{}".format(repr(model_b)))
print("x=np.{}".format(repr(x)))
yy = x@model_w.T + model_b 
print("# ---- expected final value (directly computed)----")
print("y-predicted=np.{}".format(repr(yy)))

In [None]:
input = torch.from_numpy(x).float() 
print("Input Shape:{}".format(input.shape))
output = net(input)
print("Output:{}".format(output.data))
y = np.array([[-1, 1], [-3, -1]]).T
target = torch.from_numpy(y).float()
print("Target:{}".format(target.data))

In [None]:
criteria = nn.MSELoss()
loss = criteria(output, target)
print("loss: {}".format(loss))
optimizer = torch.optim.SGD(net.parameters(), lr=.001)

In [None]:
optimizer.zero_grad()
# perform a backward pass (backpropagation)
loss.backward()
# Update the parameters
optimizer.step()

In [None]:
torch.set_printoptions(precision=8, sci_mode=False)
print("Printing wight and weight gradient after one step")
print("Weight:{}".format(net.dense.weight.data))
print("W Grad:{}".format(net.dense.weight.grad.data))
print("\nPrinting bias and bias gradient after one step")
print("Bias:{}".format(net.dense.bias.data))
print("B grad:{}".format(net.dense.bias.grad.data))

# Testing test_basic_op_large_matrix

In [None]:
import numpy as np 
x=np.array([[0.54566752, 0.66921034, 0.35265542, 0.32324271, 0.35036963,
        0.05317591],
       [0.97433629, 0.5027976 , 0.15637831, 0.72948084, 0.42097552,
        0.52522781],
       [0.41793729, 0.48112345, 0.46862087, 0.88918467, 0.48792933,
        0.32439625],
       [0.4775774 , 0.58105899, 0.35079832, 0.79657794, 0.3910011 ,
        0.72908915]])
w=np.array([[0.61013274, 0.86914947, 0.95211922, 0.96385655],
       [0.64290252, 0.2717017 , 0.193146  , 0.05004571],
       [0.14360354, 0.54256991, 0.90870491, 0.06577582]])
b=np.array([[0.76026806],
       [0.32982798],
       [0.01258297]])
y_pred = w@x+b
y_target = np.ones_like(y_pred)
print("shape of x:{}, shape of w:{}, shape of y_pred:{}".format(x.shape, w.shape, y_pred.shape))

In [None]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.dense =  nn.Linear(4,3)
    def set_weights(self, w_numpy, b_numpy):
        w_tensor= torch.from_numpy(w_numpy).float() 
        w_param = nn.Parameter(w_tensor)
        b_tensor = torch.from_numpy(b_numpy).float() 
        b_param = nn.Parameter(b_tensor)
        self.dense.weight = w_param 
        self.dense.bias  = b_param 
    def forward(self, x):
        return self.dense(x) 

In [None]:
net2 = Net2() 
net2.set_weights(w, b.reshape(-1))
input = torch.from_numpy(x.T).float()
input.requires_grad = True 
output = net2(input)
print("y_pred={}".format(output.data))
target = torch.ones_like(output, requires_grad=True)
print("y_target:{}".format(target.data))

In [None]:
criteria = nn.MSELoss()
loss = criteria(output, target)
print("loss: {}".format(loss))
optimizer = torch.optim.SGD(net2.parameters(), lr=.001)

In [None]:
torch.set_printoptions(precision=8, sci_mode=False)
print("---------------------------------------------------")
print("Printing original weight and bias")
print("---------------------------------------------------")
print("Weight:{}".format(net2.dense.weight.data))
print("Bias:{}".format(net2.dense.bias.data))

optimizer.zero_grad()
loss.backward()
optimizer.step()

print("---------------------------------------------------")
print("Printing wight and weight gradient after one step")
print("---------------------------------------------------")
print("Weight:{}".format(net2.dense.weight.data))
print("W Grad:{}".format(net2.dense.weight.grad.data))
print("\nPrinting bias and bias gradient after one step")
print("Bias:{}".format(net2.dense.bias.data))
print("B grad:{}".format(net2.dense.bias.grad.data))

In [None]:
w_tensor=torch.from_numpy(w).float() 
w_new =w_tensor - 0.001*net2.dense.weight.grad
print(w_new)