In [8]:
import torch, torchvision
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

In [9]:
prediction = model(data)

In [10]:
loss = (prediction - labels).sum()
loss.backward() # backward pass

None


In [11]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [12]:
optim.step() #gradient descent

## Calculate tensor

In [26]:
import torch

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [27]:
Q = 3*a**3 - b**2

In [28]:
external_grad = torch.tensor([2., 2.])
Q.sum().backward()

In [29]:
print(a.grad)
print(b.grad)
print(9*a**2, -2*b)

tensor([36., 81.])
tensor([-12.,  -8.])
tensor([36., 81.], grad_fn=<MulBackward0>) tensor([-12.,  -8.], grad_fn=<MulBackward0>)


In [32]:
a = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float, requires_grad=True)
for i in range(2):
  for j in range(3):
    out = a[i,j] * a[i,j]
    print(i, j, out)
    out.backward()
    print(a.grad)
print(a.grad)

0 0 tensor(1., grad_fn=<MulBackward0>)
tensor([[2., 0., 0.],
        [0., 0., 0.]])
0 1 tensor(4., grad_fn=<MulBackward0>)
tensor([[2., 4., 0.],
        [0., 0., 0.]])
0 2 tensor(9., grad_fn=<MulBackward0>)
tensor([[2., 4., 6.],
        [0., 0., 0.]])
1 0 tensor(16., grad_fn=<MulBackward0>)
tensor([[2., 4., 6.],
        [8., 0., 0.]])
1 1 tensor(25., grad_fn=<MulBackward0>)
tensor([[ 2.,  4.,  6.],
        [ 8., 10.,  0.]])
1 2 tensor(36., grad_fn=<MulBackward0>)
tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])
tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])


In [35]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import gym
from tqdm import tqdm_notebook
import numpy as np
from copy import deepcopy

In [None]:
#discount factor for future utilities
DISCOUNT_FACTOR = 0.99

#number of episodes to run
NUM_EPISODES = 1000

#max steps per episode
MAX_STEPS = 5000

#device to run model on
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [36]:
#Using a neural network to learn our policy parameters
class PolicyNetwork(nn.Module):

    #Takes in observations and outputs actions
    def __init__(self, observation_space, action_space):
        super(PolicyNetwork, self).__init__()
        self.input_layer = nn.Linear(observation_space, 128)
        self.output_layer = nn.Linear(128, action_space)

    #forward pass
    def forward(self, x):
        #input states
        x = self.input_layer(x)

        #relu activation
        x = F.relu(x)

        #actions
        actions = self.output_layer(x)

        #get softmax for a probability distribution
        action_probs = F.softmax(actions, dim=1)

        return action_probs

In [37]:
def action_from_uniform_dist(action_space):
    ''' Select an action from a uniform distribution
    Args:
    - action_space (int): Number of actions in the action space of environment

    Return:
    - (int): Action sampled from uniform distribution
    - (int): Probability of action being sampled

    '''

    #uniform distribution of all actions in environment
    dist = torch.Tensor(np.full(action_space, 1/action_space)).to(DEVICE)

    #sample action
    m = Categorical(dist)
    action = m.sample()

    #return action and probability
    return action.item(), dist[action.item()].item()