In [2]:
import torch

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
my_tensor = torch.tensor([[1,2,3],[4,5,6]], device=device, requires_grad=True, dtype=torch.float)
print(my_tensor)
print(my_tensor.shape)
print(my_tensor.dtype)
print(my_tensor.requires_grad)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)
torch.Size([2, 3])
torch.float32
True


In [4]:
x = torch.empty(size=(3,3))
print(x)
x = torch.zeros(size=(3,3))
print(x)
x = torch.rand((3,3))
print(x)
x = torch.eye(3)
print(x.shape)
x = torch.arange(1,10,2)
print(x)

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 9.8091e-45, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.3861, 0.7243, 0.6378],
        [0.6355, 0.5319, 0.5843],
        [0.1040, 0.9269, 0.5314]])
torch.Size([3, 3])
tensor([1, 3, 5, 7, 9])


## Convert tensors to different types

In [5]:
t = torch.arange(4)
print(t.bool()) # to bool
print(t.float()) #float32
print(t.double()) # float64

tensor([False,  True,  True,  True])
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float64)


## Array to tensor and vice versa

In [6]:
import numpy as np
np_array = np.zeros((3,3))
tensor= torch.from_numpy(np_array) # to tensor
numpy = tensor.numpy() #to numpy
print(tensor)
print(numpy)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


## Tensor math

In [7]:
x = torch.tensor([1,2,3])
y = torch.tensor([4,5,7])
z = torch.empty(3)
torch.add(x,y,out=z)
print(x+y)
print(x/y)
print(z)

tensor([ 5,  7, 10])
tensor([0.2500, 0.4000, 0.4286])
tensor([ 5.,  7., 10.])


## inplace
underscore after operator

In [8]:
x = torch.tensor([1,2,3])
x.add_(x)
print(x)

tensor([2, 4, 6])


In [9]:
## exponent
x = torch.tensor([1,2,3])
y=x**2
print(y)

tensor([1, 4, 9])


In [10]:
#comparasion
x = torch.tensor([1,2,3])
z = x>2
print(z)

tensor([False, False,  True])


In [11]:
# matrix multiplication
x = torch.rand((2,2))
y = torch.rand((2,2))
z = torch.mm(x,y)
print(z)

tensor([[1.2037, 0.4075],
        [0.9940, 0.1616]])


In [12]:
# matrix multiplication
x = torch.ones((2,2))
z = x**2
print(x)

tensor([[1., 1.],
        [1., 1.]])


In [13]:
# elementwise multiplication (Hadaman product)
x = torch.rand((2,2))
y = torch.rand((2,2))
print('x', x)
print('y', y)
print('x*y', x*y)
print('MM', torch.mm(x,y))
print('dot', torch.dot(x[:,0],y[:,0]))


x tensor([[0.4043, 0.2669],
        [0.4329, 0.3080]])
y tensor([[0.8569, 0.6128],
        [0.8669, 0.6849]])
x*y tensor([[0.3464, 0.1635],
        [0.3753, 0.2109]])
MM tensor([[0.5778, 0.4305],
        [0.6380, 0.4762]])
dot tensor(0.7217)


In [14]:
0.8244*0.5079

0.41871276

In [15]:
## Batch matrix multiplication
batch = 2
p = 10
q = 5
r = 6
tensor1 = torch.rand((batch,p,q))
tensor2 = torch.rand((batch,q,r))
tensor3 = torch.bmm(tensor1,tensor2)
print(tensor3)

tensor([[[1.3669, 1.4317, 1.4266, 1.9602, 0.9246, 2.3821],
         [0.9943, 1.2601, 1.0503, 1.6554, 0.8719, 1.6180],
         [0.6277, 1.1764, 0.5245, 0.9831, 1.0619, 1.2530],
         [0.8913, 1.0824, 0.8982, 1.3575, 0.8366, 1.7261],
         [0.9804, 0.9426, 1.1262, 1.6299, 0.4847, 1.4991],
         [1.1019, 1.1951, 1.1214, 1.6683, 0.9324, 2.1028],
         [0.6191, 0.5937, 0.6887, 1.0554, 0.3689, 0.8930],
         [1.2190, 1.6845, 1.1463, 1.8267, 1.4064, 2.3176],
         [0.7256, 0.8091, 0.7527, 0.9627, 0.4572, 1.2068],
         [0.6350, 0.9116, 0.5739, 0.9875, 0.7973, 1.0451]],

        [[1.1294, 0.8574, 0.7147, 1.2580, 0.5445, 0.8397],
         [2.2424, 1.6424, 1.2594, 2.3753, 1.0609, 1.4496],
         [2.8146, 2.2225, 1.6820, 3.1515, 1.5649, 1.9485],
         [2.2723, 1.9786, 1.5930, 2.7220, 1.2241, 1.6923],
         [1.4958, 1.4091, 0.5970, 1.8307, 0.7948, 1.0305],
         [1.2920, 1.1067, 0.5353, 1.4862, 0.7620, 0.8380],
         [1.7591, 1.3844, 0.9675, 1.8833, 1.0300, 0.97

## Broadcasting

In [16]:
x = torch.rand((3,3))
y = torch.rand((1,3))

print('x', x)
print('y', y)
print('x-y', x-y)
print('x**y', x**y)

x tensor([[0.7223, 0.1175, 0.0398],
        [0.2455, 0.7528, 0.6102],
        [0.8631, 0.6835, 0.3924]])
y tensor([[0.7552, 0.9181, 0.6746]])
x-y tensor([[-0.0329, -0.8005, -0.6348],
        [-0.5097, -0.1653, -0.0644],
        [ 0.1079, -0.2346, -0.2823]])
x**y tensor([[0.7822, 0.1401, 0.1136],
        [0.3462, 0.7705, 0.7166],
        [0.8948, 0.7051, 0.5320]])


## Other operations

In [17]:
x = torch.rand((1,3))
sum_x = torch.sum(x,dim=0)
sum_y= torch.sum(x,dim=1)
print(x.shape)
print(sum_x)
print(sum_y)

value, indice = torch.max(x, dim=1)

print('value', value)
print('indx', indice)
print(torch.argmax(x))

torch.Size([1, 3])
tensor([0.3310, 0.7887, 0.6801])
tensor([1.7998])
value tensor([0.7887])
indx tensor([1])
tensor(1)


In [18]:
x = torch.rand((1,4))
print(torch.mean(x.float()))


tensor(0.4772)


In [19]:
x = torch.rand((1,4))
print(x)
print(torch.clamp(x,min=0.4, max=0.8))
print(torch.any(x))

tensor([[0.5329, 0.6062, 0.0108, 0.6047]])
tensor([[0.5329, 0.6062, 0.4000, 0.6047]])
tensor(True)


In [20]:
r = torch.rand((2,4))
print(r)
print(torch.einsum('ij->ji',r).shape)

tensor([[0.2734, 0.5167, 0.5722, 0.1782],
        [0.9019, 0.2451, 0.7864, 0.9180]])
torch.Size([4, 2])


## reshape matrix


In [21]:
x = torch.from_numpy(np.array([[1,2,3],[5,6,7]]))
print(x.shape)
y = x.view(3,-1)
print(y.shape)

torch.Size([2, 3])
torch.Size([3, 2])


## Autograd

In [22]:
x = torch.rand((1), requires_grad=True)
print(x)
y= x + 2
print('y ', y)
z = 2*x 
print(z)
s = 2*x + 4
print(s)
y.backward()
print('dy/dx ',y.grad)

tensor([0.4234], requires_grad=True)
y  tensor([2.4234], grad_fn=<AddBackward0>)
tensor([0.8468], grad_fn=<MulBackward0>)
tensor([4.8468], grad_fn=<AddBackward0>)
dy/dx  None


  return self._grad


## Remove grad

In [23]:
x = torch.rand(4,requires_grad=True)
y = torch.rand(4,requires_grad=True)
z = torch.rand(4,requires_grad=True)
print('x ', x)
print('y ', y)
print('z ', z)

# first method
x.requires_grad=False
print('new x ', x)

#second method
print(y.detach())

#third method
with torch.no_grad():
    print(z+2)

x  tensor([0.9829, 0.8345, 0.3054, 0.6207], requires_grad=True)
y  tensor([0.3428, 0.1822, 0.3914, 0.0169], requires_grad=True)
z  tensor([0.1914, 0.9667, 0.2959, 0.9281], requires_grad=True)
new x  tensor([0.9829, 0.8345, 0.3054, 0.6207])
tensor([0.3428, 0.1822, 0.3914, 0.0169])
tensor([2.1914, 2.9667, 2.2959, 2.9281])


In [24]:
x = torch.rand(3, requires_grad=True)
for epoch in range(3):
    model = (x*3).sum()
    model.backward()
    print(x.grad)
    x.grad.zero_()

tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])


## Backpropagation

In [25]:
y = torch.tensor(2)
x = torch.rand(1)
w = torch.tensor(1, dtype = torch.float, requires_grad=True)
y_hat = w*x
loss = (y-y_hat)**2
print(loss)


tensor([1.7360], grad_fn=<PowBackward0>)


## Regression in numpy


In [49]:
x = np.array([1,2,3,4], dtype=np.float32)
y= np.array([2,4,6,8], dtype=np.float32)
w= 0.0

def forward(X):
    return w * X

def loss(Y, Y_pred):
    return ((Y_pred-Y)**2).mean()

def gradient(X, Y, Y_pred):
    return np.dot(2*X, Y_pred-Y).mean()



n_iter = 20
lr = 0.01
for epoch in range(n_iter):
    pred = forward(x)
    l = loss(y,pred)
    dw = gradient(x,y,pred)
    w -= lr*dw
    if epoch %2 ==0:
        print(f'epoch {epoch+1} w {w} loss {l:3f}')

epoch 1 w 1.2 loss 30.000000
epoch 3 w 1.871999988555908 loss 0.768000
epoch 5 w 1.9795200133323667 loss 0.019661
epoch 7 w 1.9967231869697568 loss 0.000503
epoch 9 w 1.999475698471069 loss 0.000013
epoch 11 w 1.9999160599708554 loss 0.000000
epoch 13 w 1.9999865984916685 loss 0.000000
epoch 15 w 1.9999978351593015 loss 0.000000
epoch 17 w 1.9999996304512022 loss 0.000000
epoch 19 w 1.9999999165534972 loss 0.000000


## Regression in torch

In [56]:
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y= torch.tensor([2,4,6,8], dtype=torch.float32)
w= torch.tensor(0.0, requires_grad=True)

def forward(X):
    return w * X

def loss(Y, Y_pred):
    return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



n_iter = 1000
lr = 0.01
for epoch in range(n_iter):
    pred = forward(x)
    l = loss(y,pred)
    l.backward()
    with torch.no_grad():
        w -= lr*w.grad
    w.grad.zero_()
    if epoch %100 ==0:
        print(f'epoch {epoch+1} w {w} loss {l:3f}')
print(f"prediction after training {forward(5):.2f}")

epoch 1 w 0.29999998211860657 loss 30.000000
epoch 101 w 1.9999996423721313 loss 0.000000
epoch 201 w 1.9999996423721313 loss 0.000000
epoch 301 w 1.9999996423721313 loss 0.000000
epoch 401 w 1.9999996423721313 loss 0.000000
epoch 501 w 1.9999996423721313 loss 0.000000
epoch 601 w 1.9999996423721313 loss 0.000000
epoch 701 w 1.9999996423721313 loss 0.000000
epoch 801 w 1.9999996423721313 loss 0.000000
epoch 901 w 1.9999996423721313 loss 0.000000
prediction after training 10.00


## Replace parts with torch functions

In [68]:
import torch.nn as nn


n_iter = 10000
lr = 0.01

#x= torch.tensor([1,2,3,4], dtype=torch.float32)
#y= torch.tensor([2,4,6,8], dtype=torch.float32)
#w= torch.tensor(0.0, requires_grad=True)


x= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y= torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
x_test= torch.tensor([[5]], dtype=torch.float32)
n_samples, n_features  = x.shape
n_inputs, n_outputs = n_features,n_features
#w= torch.tensor(0.0, requires_grad=True)

model = nn.Linear(n_inputs, n_outputs)
#def forward(X):
#   return w * X

## replace loss with MSEloss
loss= nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr) # will be used to do update
#def loss(Y, Y_pred):
 #   return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



w_prev = 99999999
for epoch in range(n_iter):
    pred = model(x)
    l = loss(y,pred)
    l.backward()

    optimizer.step()
 #   with torch.no_grad():
 #       w -= lr*w.grad

    optimizer.zero_grad()
    #w.grad.zero_()
    w,b = model.parameters()
    w_now = w[0][0].item()
    if w_now == w_prev:
        print(f'last epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
        break
    w_prev = w_now
    if epoch %1000 ==0:
        w,b = model.parameters()
        print(f'epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
print(f"prediction after training {model(x_test).item():.2f}")

epoch 1 w 0.0009669065475463867 loss 55.290382
epoch 1001 w 2.0015087127685547 loss 0.000003
epoch 2001 w 2.0000765323638916 loss 0.000000
last epoch 2040 w 2.0000674724578857 loss 0.000000
prediction after training 10.00


## Create a custom class

In [69]:
import torch.nn as nn


n_iter = 10000
lr = 0.01

#x= torch.tensor([1,2,3,4], dtype=torch.float32)
#y= torch.tensor([2,4,6,8], dtype=torch.float32)
#w= torch.tensor(0.0, requires_grad=True)


x= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y= torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
x_test= torch.tensor([[5]], dtype=torch.float32)
n_samples, n_features  = x.shape
n_inputs, n_outputs = n_features,n_features
#w= torch.tensor(0.0, requires_grad=True)


class LinearRegression(nn.Module):
    def __init__(self,input_size, output_size):
        super(LinearRegression,self).__init__()
        self.lin = nn.Linear(input_size, output_size)

    def forward(self,x):
        return self.lin(x)

model = LinearRegression(n_inputs, n_outputs)
#def forward(X):
#   return w * X

## replace loss with MSEloss
loss= nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr) # will be used to do update
#def loss(Y, Y_pred):
 #   return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



w_prev = 99999999
for epoch in range(n_iter):
    pred = model(x)
    l = loss(y,pred)
    l.backward()

    optimizer.step()
 #   with torch.no_grad():
 #       w -= lr*w.grad

    optimizer.zero_grad()
    #w.grad.zero_()
    w,b = model.parameters()
    w_now = w[0][0].item()
    if w_now == w_prev:
        print(f'last epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
        break
    w_prev = w_now
    if epoch %1000 ==0:
        w,b = model.parameters()
        print(f'epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
print(f"prediction after training {model(x_test).item():.2f}")

epoch 1 w 1.0181353092193604 loss 10.668782
epoch 1001 w 1.9954642057418823 loss 0.000030
epoch 2001 w 1.9997735023498535 loss 0.000000
last epoch 2635 w 1.9999659061431885 loss 0.000000
prediction after training 10.00


## Putting it all together

In [72]:
import torch 
import torch.nn as nn
from sklearn import datasets
import matplotlib.pyplot as plt

## prepare the data
X_data, y_data = datasets()