# Pytorch basics for DL newbies


[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/BoostcampAITech/lecture-note-python-basics-for-ai/blob/main/codes/pytorch/torch_basics.ipynb)


## numpy to tensor

In [1]:
import numpy as np
n_array = np.arange(10).reshape(2,5)
n_array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [2]:
n_array.ndim

2

In [3]:
n_array.shape

(2, 5)

In [4]:
import torch
t_array = torch.FloatTensor(n_array)
t_array

tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]])

In [5]:
type(t_array)

torch.Tensor

In [6]:
t_array.ndim

2

In [7]:
print(t_array.shape)
print(t_array.ndim)
print(t_array.size()) # shape

torch.Size([2, 5])
2
torch.Size([2, 5])


In [8]:
t_array[1:]

tensor([[5., 6., 7., 8., 9.]])

In [9]:
t_array[:2, :3]

tensor([[0., 1., 2.],
        [5., 6., 7.]])

## tensor operations

In [10]:
n1 = np.arange(10).reshape(2,5)
n2 = np.arange(10).reshape(5,2)

In [11]:
t1 = torch.FloatTensor(n1)
t2 = torch.FloatTensor(n2)
print('Shape of t1: ', t1.shape)
print('Shape of t2: ', t2.shape)
print(t1.matmul(t2))

Shape of t1:  torch.Size([2, 5])
Shape of t2:  torch.Size([5, 2])
tensor([[ 60.,  70.],
        [160., 195.]])


In [12]:
n1.dot(n2)

array([[ 60,  70],
       [160, 195]])

In [13]:
n1 = np.arange(4).reshape(2,2)
n2 = np.arange(4).reshape(2,2)
t1 = torch.FloatTensor(n1)
t2 = torch.FloatTensor(n2)

t1 * t2

tensor([[0., 1.],
        [4., 9.]])

In [14]:
t1.mul(t2)

tensor([[0., 1.],
        [4., 9.]])

In [15]:
t1 * 5

tensor([[ 0.,  5.],
        [10., 15.]])

In [16]:
n1 = np.arange(10)
t1 = torch.FloatTensor(n1)
t1.mean()

tensor(4.5000)

In [17]:
n1 = np.arange(10).reshape(5,2)
t1 = torch.FloatTensor(n1)
t1.mean(dim=0) # axis

tensor([4., 5.])

In [18]:
t1.mean(dim=1)

tensor([0.5000, 2.5000, 4.5000, 6.5000, 8.5000])

In [19]:
n1 = np.arange(10)
t1 = torch.FloatTensor(n1)
t1.view(-1, 2) # reshape

tensor([[0., 1.],
        [2., 3.],
        [4., 5.],
        [6., 7.],
        [8., 9.]])

In [20]:
n1.reshape(-1, 2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [21]:
t1.view(-1, 10)

tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [22]:
t1.view(-1, 10).shape

torch.Size([1, 10])

In [23]:
t1.view(-1, 10).squeeze() # [ ] 하나 없애줌

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [24]:
t1.view(-1, 10).squeeze().shape

torch.Size([10])

In [25]:
t1.view(-1, 10).squeeze().unsqueeze(dim=0)

tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [26]:
t1.view(-1, 10).squeeze().unsqueeze(dim=1)

tensor([[0.],
        [1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

## tensor operations for ML/DL formula

In [27]:
import torch
import torch.nn.functional as F

In [28]:
tensor = torch.FloatTensor([0.5, 0.7, 0.1])
h_tensor = F.softmax(tensor, dim=0)
h_tensor

tensor([0.3458, 0.4224, 0.2318])

In [29]:
y = torch.randint(5, (10,5))
y_label = y.argmax(dim=1)
y_label

tensor([2, 1, 1, 3, 4, 4, 0, 3, 0, 0])

In [30]:
F.one_hot(y_label)

tensor([[0, 0, 1, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1],
        [1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0],
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0]])

## torch autogard

$$
y = w^2 \\ 
z = 2*y + 5 \\
z = 2*w^2 + 5 
$$

In [31]:
w = torch.tensor(2.0, requires_grad=True)
y = w**2
z = 2*y + 5

In [32]:
z.backward() # 미분

In [33]:
w.grad # 미분값 표시해라

tensor(8.)

$$ Q = 3a^3 - b^2  $$

In [34]:
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [35]:
Q = 3*a**3 - b**2

In [36]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

$$ \frac{\partial Q}{\partial a} = 9a^2 $$  

$$ \frac{\partial Q}{\partial b} = -2b $$

In [37]:
a.grad

tensor([36., 81.])

In [38]:
b.grad

tensor([-12.,  -8.])

## AutoGrad for Linear Regression
https://towardsdatascience.com/linear-regression-with-pytorch-eb6dedead817

$$ y=2x+1 $$

In [39]:
import numpy as np
# create dummy data for training
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1, 1)

y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1, 1)

In [40]:
x_train

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.]], dtype=float32)

In [41]:
y_train

array([[ 1.],
       [ 3.],
       [ 5.],
       [ 7.],
       [ 9.],
       [11.],
       [13.],
       [15.],
       [17.],
       [19.],
       [21.]], dtype=float32)

In [42]:
import torch
from torch.autograd import Variable
class LinearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(LinearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out

In [43]:
inputDim = 1        # takes variable 'x' 
outputDim = 1       # takes variable 'y'
learningRate = 0.01 
epochs = 100

model = LinearRegression(inputDim, outputDim)
##### For GPU #######
if torch.cuda.is_available():
    model.cuda()

In [44]:
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

In [45]:
for epoch in range(epochs):
    # Converting inputs and labels to Variable
    if torch.cuda.is_available():
        inputs = Variable(torch.from_numpy(x_train).cuda())
        labels = Variable(torch.from_numpy(y_train).cuda())
    else:
        inputs = Variable(torch.from_numpy(x_train))
        labels = Variable(torch.from_numpy(y_train))

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad() # 항상 적어줘야함

    # get output from the model, given the inputs
    outputs = model(inputs)

    # get loss for the predicted output
    loss = criterion(outputs, labels)
    print(loss)
    # get gradients w.r.t to parameters
    loss.backward()

    # update parameters
    optimizer.step()

    print('epoch {}, loss {}'.format(epoch, loss.item()))

tensor(112.5228, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 0, loss 112.52279663085938
tensor(9.3575, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 1, loss 9.357502937316895
tensor(0.9406, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 2, loss 0.9406320452690125
tensor(0.2521, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 3, loss 0.2521142065525055
tensor(0.1940, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 4, loss 0.19399531185626984
tensor(0.1873, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 5, loss 0.18731798231601715
tensor(0.1849, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 6, loss 0.1848582625389099
tensor(0.1828, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 7, loss 0.1827639490365982
tensor(0.1807, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 8, loss 0.18072061240673065
tensor(0.1787, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 9, loss 0.17870230972766876
tensor(0.1767, device='cuda:0', grad_fn=<MseLossBackward>)
epoch 10, loss 0

In [46]:
with torch.no_grad(): # we don't need gradients in the testing phase
    if torch.cuda.is_available():
        predicted = model(Variable(torch.from_numpy(x_train).cuda())).cpu().data.numpy()
    else:
        predicted = model(Variable(torch.from_numpy(x_train))).data.numpy()
    print(predicted)

[[ 0.52558196]
 [ 2.5939026 ]
 [ 4.662223  ]
 [ 6.7305436 ]
 [ 8.798864  ]
 [10.867185  ]
 [12.935505  ]
 [15.003825  ]
 [17.072145  ]
 [19.140467  ]
 [21.208788  ]]
