In [2]:
import torch 
# Variable keeps track of gradients to be able to do grad descent
# Variable is a tensor: a matrix with at least 3 dimensions 
import torch.nn as nn
from torch.autograd import Variable 
import torch.nn.functional as F

In [4]:
# Perpectron: a single linear neuron 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1,1) # weight applied 
    def forward(self, x):
        x = self.fc1(x) #fully connected layer with one input-one output
        return x # output is linear with no activation function

In [10]:
# verify the contents of your network
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=1, out_features=1, bias=True)
)


We can also inspect the parameters of our network. The parameters are automatically optimized by the network; but of course, hyperparameters such as the learning rate are tuned by humans.

In [20]:
# inspect network parameters
print(list(net.parameters())) 

[Parameter containing:
tensor([[-0.9794]], requires_grad=True), Parameter containing:
tensor([-0.8240], requires_grad=True)]


Because out network is linear the output is Ax + b = ([weight x input] + bias). The above values are the A and b that our network used to initialize: 
- -0.7240 (random) weight
- -0.4475 bias 

In [21]:
# create a random number, a tensor, with single dimension 
# setting requires_grad=T optimizes the variable
input = Variable(torch.randn(1,1,1), requires_grad=True)
print(input)

tensor([[[-0.9028]]], requires_grad=True)


In [22]:
# now, put this number through the network
out = net(input)
print(out)

tensor([[[0.0601]]], grad_fn=<AddBackward0>)


In [23]:
# Ax + b = output
(-0.9794 * -0.0601) + -0.8240

-0.76513806

In [18]:
(-0.0669 * -0.5085) + -0.4478

-0.41378135

In [None]:
# define loss function & optimizer using SGD
import torch.optim as optim 
 
# least squares loss: "square" gives us the magnitude of the error
def criterion(out, label):
    return (label - out)**2

# For each training example, SGD adjusts the available parameters
# based on how they affected the gradient of the error, and back-
# propogates gradients and updates through the network
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)

__Goal:__ teach the network how to treble a number

In [1]:
# approximate q-values of actions for current state  
from torch import nn 

class Network(nn.Module): # class to track nn architecture
    def __init__(self):
        super().__init__()

        # Inputs to hidden layer: linear transformation 
        # 30 inputs; 20 outputs
        self.hidden = nn.Linear(30,20) 
        
        # Output layer: linear transformation - 20 inputs; 1 output  
        # Does the size of the output depend on the # of actions?
        self.output = nn.Linear(20,1)
        
        # Define sigmoid activation and softmax output 
        
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        
        return x