# gym basic

In [1]:
import gym

env=gym.make("CartPole-v0")

for episode in range(20):
    env.reset()
    for t in range(100):
        action=env.action_space.sample()
        env.step(action)
        env.render()
        
# env.step(action) returns these parameters:
# observation, reward, done, info = env.step(action)



In [2]:
print(env.action_space)
# Discrete(2)
# meaning there are 2 discrete options

Discrete(2)


# PyTorch basic

In [3]:
import torch

tensor = torch.FloatTensor([[1, 2, 3], [4, 5, 6]])
tensor

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [4]:
import numpy as np
arr = np.array([[1, 2, 3], [4, 5, 6]])
tensor = torch.tensor(arr)
tensor

tensor([[1, 2, 3],
        [4, 5, 6]])

In [5]:
tensor = torch.tensor(arr, dtype=torch.float32)
tensor

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [6]:
summed_tensor = tensor.sum()
summed_tensor

tensor(21.)

In [7]:
summed_tensor.item()
# item() tensor to python

21.0

In [8]:
tensor.device

device(type='cpu')

In [10]:
# tensor_gpu = tensor.to('cuda')
# tensor_gpu

### using underscore to specify actions whether to its original or its copy

In [12]:
tensor.exp()

tensor([[  2.7183,   7.3891,  20.0855],
        [ 54.5982, 148.4132, 403.4288]])

In [13]:
tensor

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [14]:
tensor.exp_()

tensor([[  2.7183,   7.3891,  20.0855],
        [ 54.5982, 148.4132, 403.4288]])

In [15]:
tensor

tensor([[  2.7183,   7.3891,  20.0855],
        [ 54.5982, 148.4132, 403.4288]])

### network using pytorch

In [16]:
from torch import nn
import torch

In [17]:
linear_net = nn.Linear(in_features=2, out_features=5)

In [18]:
vector = torch.FloatTensor([1, 2])
linear_net(input=vector)

tensor([-0.2069,  0.1592, -0.6474,  1.3026,  1.2712], grad_fn=<AddBackward0>)

In [20]:
sequential = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.ReLU(),
    nn.Linear(in_features=5, out_features=20),
    nn.ReLU(),
    nn.Linear(in_features=20, out_features=10),
    nn.Dropout(p=0.5),
    nn.Softmax(dim=0),
)

sequential

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Softmax(dim=0)
)

In [21]:
output_vector = sequential(input=vector)
output_vector

tensor([0.0959, 0.0959, 0.1598, 0.0510, 0.1014, 0.0959, 0.0959, 0.0959, 0.0959,
        0.1126], grad_fn=<SoftmaxBackward>)

##### original layer

In [22]:
class YourLayer(nn.Module):

    def __init__(self):
        super(YourLayer, self).__init__()

    def forward(self, x):
        return x * 100


sequential = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.ReLU(),
    YourLayer(),
)
sequential(input=vector)

tensor([68.6444, 48.8254,  0.0000,  0.0000, 34.3281], grad_fn=<MulBackward0>)

##### loss & optimizer

In [23]:
mse = nn.MSELoss()
output_tensor = torch.FloatTensor([1,2,3])
y_tensor = torch.FloatTensor([2,2,3])
mse(output_tensor, y_tensor)

tensor(0.3333)

In [24]:
from torch.optim import Adam