In [5]:
import torch
a = torch.Tensor([[1,2],[3,4]])
print(a)
print(a ** 2)

tensor([[1., 2.],
        [3., 4.]])
tensor([[ 1.,  4.],
        [ 9., 16.]])


In [7]:
from torch.autograd import Variable
a = Variable(torch.Tensor([[1,2],[3,4]]), requires_grad=True)
print(a)

y = torch.sum(a**2)
print(y)

y.backward()
print(a.grad)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)
tensor(30., grad_fn=<SumBackward0>)
tensor([[2., 4.],
        [6., 8.]])


In [11]:
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerNet(nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.

        D_in: input dimension
        H: dimension of hidden layer
        D_out: output dimension
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, H) 
        self.linear2 = nn.Linear(H, D_out)

    def forward(self, x):
            """
            In the forward function we accept a Variable of input data and we must 
            return a Variable of output data. We can use Modules defined in the 
            constructor as well as arbitrary operators on Variables.
            """
            h_relu = F.relu(self.linear1(x))
            y_pred = self.linear2(h_relu)
            return y_pred

#N is batch size; D_in is input dimension;
#H is the dimension of the hidden layer; D_out is output dimension.
N, D_in, H, D_out = 32, 100, 50, 10

#Create random Tensors to hold inputs and outputs, and wrap them in Variables
x = Variable(torch.randn(N, D_in))  # dim: 32 x 100
print(x)

#Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

#Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)   # dim: 32 x 10
print(y_pred)

tensor([[-1.2112,  0.2675, -0.2248,  ..., -1.5242, -0.3984, -0.2108],
        [ 0.7954, -0.6520, -1.6082,  ...,  0.2869, -0.2265,  0.0378],
        [ 0.5676,  0.6612, -0.2878,  ...,  0.3964, -0.1586, -0.5458],
        ...,
        [-1.2087,  0.9428,  0.0563,  ..., -0.3881,  1.7870, -0.2234],
        [ 0.2344, -1.7631,  0.5527,  ..., -0.4263,  2.8437, -0.9065],
        [-0.5487,  0.8993,  0.5907,  ...,  1.2975, -0.0622, -0.5710]])
tensor([[-3.4105e-03,  1.0401e-01, -2.8513e-01,  2.8813e-01,  6.1342e-02,
          4.4717e-01, -1.7314e-01,  1.8734e-02, -7.9513e-02,  4.9780e-02],
        [-1.0882e-01,  2.3368e-01, -3.0610e-01,  1.1576e-01, -3.8320e-02,
          1.1395e-01, -2.4785e-01, -1.9713e-01, -1.1446e-01,  1.6656e-01],
        [-2.0954e-01,  2.2291e-01, -2.6297e-01, -2.2822e-02,  1.1798e-01,
          4.3667e-01, -8.7946e-02,  2.9155e-01, -6.2115e-02,  1.0340e-01],
        [ 1.0110e-01, -1.0658e-03, -3.4622e-01, -1.5339e-02,  7.7534e-04,
          8.1481e-02, -2.7482e-01, -3.9968e-0

In [14]:
loss_fn = nn.CrossEntropyLoss()

def myCrossEntropyLoss(outputs, labels):
    batch_size = outputs.size()[0]            # batch_size
    outputs = F.log_softmax(outputs, dim=1)   # compute the log of softmax values
    outputs = outputs[range(batch_size), labels] # pick the values corresponding to the labels
    return -torch.sum(outputs)/num_examples


In [15]:
#pick an SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.9)

#or pick ADAM
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)