In [30]:
import torch
import numpy as np

In [10]:
# viable examples is rand, empty, zeros
# also different kind of dtypes can be applied for a torch

x = torch.zeros(2, 2, 2, 2, dtype=torch.int)
print(x)

tensor([[[[0, 0],
          [0, 0]],

         [[0, 0],
          [0, 0]]],


        [[[0, 0],
          [0, 0]],

         [[0, 0],
          [0, 0]]]], dtype=torch.int32)


In [11]:
print(x.size())

torch.Size([2, 2, 2, 2])


In [13]:
x = torch.tensor([2.3 , 0.1])
print(x)

tensor([2.3000, 0.1000])


In [15]:
x = torch.rand(2, 2)
y = torch.rand(2, 2)

print(x)
print(y)

tensor([[0.7715, 0.3189],
        [0.1398, 0.4551]])
tensor([[0.8221, 0.1515],
        [0.3794, 0.1570]])


In [16]:
z = x + y
print(z)

tensor([[1.5936, 0.4704],
        [0.5192, 0.6121]])


In [17]:
# all of the functions with _ does the operation for specific element
y.add_(x)
print(y)

tensor([[1.5936, 0.4704],
        [0.5192, 0.6121]])


In [19]:
x = torch.rand(5, 3)
print(x)
print(x[:, 0]) # for only the first colum, supports the same manipulation as arrays in np

tensor([[0.9563, 0.3391, 0.7982],
        [0.3381, 0.8495, 0.4383],
        [0.3808, 0.7480, 0.3231],
        [0.9498, 0.4871, 0.8331],
        [0.5974, 0.1904, 0.9855]])
tensor([0.9563, 0.3381, 0.3808, 0.9498, 0.5974])


In [29]:
# reshaping

x = torch.rand(3, 2)
print(x)
y = x.view(2, 3)
print(y)

tensor([[0.5544, 0.0409],
        [0.6090, 0.4307],
        [0.2884, 0.6987]])
tensor([[0.5544, 0.0409, 0.6090],
        [0.4307, 0.2884, 0.6987]])


In [34]:
a = np.zeros(2)
print(a)
b = torch.from_numpy(a)
print(b)

b.add_(2)
print(a)
# here we can see they share the same memory storage

[0. 0.]
tensor([0., 0.], dtype=torch.float64)
[2. 2.]


In [35]:
torch.cuda.is_available()

False

In [36]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    x = torch.ones(5, device=device)
    y = torch.ones(4)
    y = y.to(device=device)
    z = x + y # this will be performed on gpu if its present
    z = z.to('cpu') # this is beacuse transfering to numpy is available only via cpu

In [37]:
x = torch.ones(5, requires_grad=True)
# this by default is False, but if you know that this tensor will have its gradeient calculated later on, so for optimization it should be set as true
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


# gradients

In [39]:
# this grad variable is important if we want to calculate the backprop and if it is not present grad wont work
x = torch.randn(3, requires_grad=True)
print(x)

tensor([-0.2058, -1.6199, -0.3467], requires_grad=True)


In [40]:
y = x + 2

In [41]:
print(y)

tensor([1.7942, 0.3801, 1.6533], grad_fn=<AddBackward0>)


In [45]:
z = y * y * 2
print(z)

tensor([6.4385, 0.2890, 5.4666], grad_fn=<MulBackward0>)


In [43]:
z = z.mean()
print(z)

tensor(4.0647, grad_fn=<MeanBackward0>)


In [47]:
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # this is the jacobian product
print(x.grad)

tensor([3.1100, 2.0274, 2.2110])


In [48]:
# requires grad can be removed at all using
# x.requires_grad_
# x.detach()
# with torch.no_grad():

In [52]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward() # here we calculate the gradients
    print(weights.grad)

    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Gradients yra daliniu isvestiniu vektoriai ir apibreziai kaip funkcija pasikeicia su tam tikrais inputais, ar tai tam tikri features ar tai tam tikri skaicia, kurie ateina kaip input, taip parodant kaip svoriai turi pasikeisti, jog sumazetu loss funkcijos reiksme. 

In [56]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss

y_hat = w * x
loss = (y_hat - y)**2
print(loss)

# backward pass
loss.backward()
print(w.grad)

# update our wieghts
# next forward and backwards pass for several iteration to optimize wieghts

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


# everything from scarth

In [62]:
import numpy as np

# formula is 2 * x
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# model prediction calculation
def forward(x):
    return w * x

# loss
def loss(y, y_pred):
    return ((y_pred - y) * 2).mean()

# gradients
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N 2x (w*x - y)

def gardient(x, y, y_pred):
    return np.dot(2*x, y_pred-y).mean()

print(f'prediction before training: f(5) = {forward(5):.3f}')

# training
learning_rate = 0.01
epochs = 20

for epoch in range(epochs):
    # pred = foward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    dw = gardient(X, Y, y_pred)

    # w updates
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = 0.000
epoch 1: w = 1.200, loss = -10.00000000
epoch 3: w = 1.872, loss = -1.60000014
epoch 5: w = 1.980, loss = -0.25600022
epoch 7: w = 1.997, loss = -0.04096031
epoch 9: w = 1.999, loss = -0.00655347
epoch 11: w = 2.000, loss = -0.00104839
epoch 13: w = 2.000, loss = -0.00016820
epoch 15: w = 2.000, loss = -0.00002688
epoch 17: w = 2.000, loss = -0.00000411
epoch 19: w = 2.000, loss = -0.00000066
prediction after training: f(5) = 10.000


In [88]:
# after the manual calculation we can transfer it to torch


# formula is 2 * x
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction calculation
def forward(x):
    return w * x

# loss
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

# gradients
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N 2x (w*x - y)

def gardient(x, y, y_pred):
    return np.dot(2*x, y_pred-y).mean()

print(f'prediction before training: f(5) = {forward(5):.3f}')

# training
learning_rate = 0.01
epochs = 100

for epoch in range(epochs):
    # pred = foward pass
    y_pred = forward(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    l.backward() # this will calculate the gradient to loss

    # w updates
    with torch.no_grad():
        w -= learning_rate * w.grad

    # we must empty the gradients, so that we wouldnt accumulate them
    w.grad.zero_()

    if epoch % 5 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 6: w = 1.246, loss = 5.90623236
epoch 11: w = 1.665, loss = 1.16278565
epoch 16: w = 1.851, loss = 0.22892261
epoch 21: w = 1.934, loss = 0.04506890
epoch 26: w = 1.971, loss = 0.00887291
epoch 31: w = 1.987, loss = 0.00174685
epoch 36: w = 1.994, loss = 0.00034392
epoch 41: w = 1.997, loss = 0.00006770
epoch 46: w = 1.999, loss = 0.00001333
epoch 51: w = 1.999, loss = 0.00000262
epoch 56: w = 2.000, loss = 0.00000052
epoch 61: w = 2.000, loss = 0.00000010
epoch 66: w = 2.000, loss = 0.00000002
epoch 71: w = 2.000, loss = 0.00000000
epoch 76: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 86: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
epoch 96: w = 2.000, loss = 0.00000000
prediction after training: f(5) = 10.000


after adjusting gradient computations to autograd, next we will adjust loss function and parameter updates

In [95]:
import torch.nn as nn


# formula is 2 * x
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)
x_test = torch.tensor([[5]], dtype=torch.float32)

n_samples, n_features = X.shape

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size, output_size)

class LinearReggression(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearReggression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearReggression(input_size, output_size)

print(f'prediction before training: f(5) = {model(x_test).item():.3f}')

# training
learning_rate = 0.01
epochs = 200

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


for epoch in range(epochs):
    # pred = foward pass
    y_pred = model(X)

    # loss
    l = loss(Y, y_pred)

    # gradients
    l.backward() # this will calculate the gradient to loss

    # w updates
    optimizer.step()

    # we must empty the gradients, so that we wouldnt accumulate them
    optimizer.zero_grad()

    if epoch % 20 == 0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = -0.188
epoch 1: w = 2.000, loss = 31.66109276
epoch 21: w = 2.000, loss = 0.05807181
epoch 41: w = 2.000, loss = 0.03274921
epoch 61: w = 2.000, loss = 0.02903523
epoch 81: w = 2.000, loss = 0.02575358
epoch 101: w = 2.000, loss = 0.02284284
epoch 121: w = 2.000, loss = 0.02026111
epoch 141: w = 2.000, loss = 0.01797112
epoch 161: w = 2.000, loss = 0.01593998
epoch 181: w = 2.000, loss = 0.01413840
prediction after training: f(5) = 10.000
