In [66]:
import torch

In [67]:
torch.manual_seed(123) # specify random seed 

<torch._C.Generator at 0x1135f7450>

In [73]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' # specify device
my_tensor = torch.tensor([[1,2,3],[4,5,6]], device=device, requires_grad=True, dtype=torch.float)
print(my_tensor)
print('shape -->',my_tensor.shape)
print('dtype -->',my_tensor.dtype)
print('differentiable?-->',my_tensor.requires_grad)


tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)
shape --> torch.Size([2, 3])
dtype --> torch.float32
differentiable?--> True


In [74]:
x = torch.empty(size=(3,3))
print(x)
x = torch.zeros(size=(3,3))
print(x)
x = torch.rand((3,3))
print(x)
x = torch.eye(3)
print(x.shape)
x = torch.arange(1,10,2)
print(x)
x = torch.randint(1,3,(2,2))
print(x)

tensor([[1.0524e+21, 2.1062e+23, 3.2504e+21],
        [5.4077e+22, 2.6077e-09, 8.5078e-07],
        [2.5667e-09, 2.6250e-09, 1.0733e-08]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.2961, 0.5166, 0.2517],
        [0.6886, 0.0740, 0.8665],
        [0.1366, 0.1025, 0.1841]])
torch.Size([3, 3])
tensor([1, 3, 5, 7, 9])
tensor([[1, 2],
        [2, 1]])


## Convert tensors to different types

In [75]:
t = torch.arange(4)
print(t.bool()) # to bool
print(t.float()) #float32
print(t.double()) # float64

tensor([False,  True,  True,  True])
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float64)


## Array to tensor and vice versa

In [76]:
import numpy as np
np_array = np.zeros((3,3))
tensor= torch.from_numpy(np_array) # to tensor
numpy = tensor.numpy() #to numpy
print(tensor)
print(numpy)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


## Tensor math

In [77]:
x = torch.tensor([1,2,3])
y = torch.tensor([4,5,7])
z = torch.empty(3)
torch.add(x,y,out=z)
print(x+y)
print(x/y)
print(z)

tensor([ 5,  7, 10])
tensor([0.2500, 0.4000, 0.4286])
tensor([ 5.,  7., 10.])


## inplace
underscore after operator

In [78]:
x = torch.tensor([1,2,3])
x.add_(x)
print(x)

tensor([2, 4, 6])


In [79]:
## exponent
x = torch.tensor([1,2,3])
y=x**2
print(y)

tensor([1, 4, 9])


In [80]:
#comparasion
x = torch.tensor([1,2,3])
z = x>2
print(z)

tensor([False, False,  True])


In [81]:
# matrix multiplication
x = torch.rand((2,2))
y = torch.rand((2,2))
z = torch.mm(x,y)
print(z)

tensor([[0.3717, 0.3452],
        [0.4107, 0.2547]])


In [82]:
# matrix multiplication
x = torch.ones((2,2))
z = x**2
print(x)

tensor([[1., 1.],
        [1., 1.]])


In [84]:
# elementwise multiplication (Hadaman product)
x = torch.randint(1,10,(2,2))
y = torch.randint(1,10,(2,2))
print('x', x)
print('y', y)
print('x*y', x*y)
print('MM', torch.mm(x,y))
#x[:,0]
print('dot', torch.dot(x[:,0],y[:,0]))


x tensor([[1, 1],
        [2, 1]])
y tensor([[8, 4],
        [2, 6]])
x*y tensor([[8, 4],
        [4, 6]])
MM tensor([[10, 10],
        [18, 14]])
dot tensor(12)


In [85]:
## Batch matrix multiplication
batch = 2
p = 10
q = 5
r = 6
tensor1 = torch.rand((batch,p,q))
tensor2 = torch.rand((batch,q,r))
tensor3 = torch.bmm(tensor1,tensor2)
print(tensor3)

tensor([[[0.9933, 0.4830, 0.7707, 0.7305, 1.1820, 0.9965],
         [2.1352, 1.0474, 1.2415, 1.0830, 1.8570, 1.2905],
         [2.5442, 1.2166, 1.6466, 1.5063, 2.4719, 1.8105],
         [1.2928, 0.8484, 0.7431, 0.8570, 1.1538, 0.9195],
         [2.4484, 1.0700, 1.6383, 1.4939, 1.9658, 1.6023],
         [1.0497, 0.3138, 0.7818, 0.5896, 1.1073, 0.7808],
         [1.9676, 0.6707, 1.6358, 1.4153, 2.2471, 1.8249],
         [1.4078, 0.4564, 1.2238, 1.1506, 1.7396, 1.3090],
         [1.7316, 0.6368, 1.2803, 1.0737, 2.0214, 1.3978],
         [1.2671, 0.7145, 0.7536, 0.7071, 1.0613, 0.8993]],

        [[1.7797, 1.8384, 2.2031, 1.1907, 0.6372, 1.8469],
         [1.5220, 1.6340, 2.2625, 1.1988, 0.6432, 1.8475],
         [1.8601, 1.2881, 1.8931, 1.4941, 0.4552, 1.4391],
         [1.0040, 1.1169, 1.4871, 0.9475, 0.3064, 0.8988],
         [1.9357, 1.4987, 2.1200, 1.6230, 0.4611, 1.4532],
         [1.4224, 1.0971, 1.5018, 1.1886, 0.2993, 0.9650],
         [1.5960, 1.5484, 1.9864, 1.3976, 0.5238, 1.48

## Broadcasting

In [86]:
x = torch.rand((3,3))
y = torch.rand((1,3))

print('x', x)
print('y', y)
print('x-y', x-y)
print('x**y', x**y)

x tensor([[0.1735, 0.9247, 0.6166],
        [0.3608, 0.5325, 0.6559],
        [0.3232, 0.1126, 0.5034]])
y tensor([[0.5091, 0.5101, 0.4270]])
x-y tensor([[-0.3356,  0.4147,  0.1895],
        [-0.1483,  0.0224,  0.2288],
        [-0.1859, -0.3975,  0.0763]])
x**y tensor([[0.4100, 0.9609, 0.8134],
        [0.5951, 0.7251, 0.8352],
        [0.5627, 0.3282, 0.7459]])


## Other operations

In [87]:
x = torch.randint(1,3,(2,2))

sum_x = torch.sum(x,dim=0)
sum_y= torch.sum(x,dim=1)

print(f'x --> {x}')

print('x_shape --> ',x.shape)

print(f' sum_x --> {sum_x}')

print(f' sum_y --> {sum_y}')

value,indice = torch.max(x, dim=1)

print('value', value)
print('indx', indice)
print(torch.argmax(x))

x --> tensor([[2, 1],
        [1, 1]])
x_shape -->  torch.Size([2, 2])
 sum_x --> tensor([3, 2])
 sum_y --> tensor([3, 2])
value tensor([2, 1])
indx tensor([0, 0])
tensor(0)


In [88]:
x = torch.rand((1,4))
print(torch.mean(x.float()))


tensor(0.4871)


In [89]:
x = torch.rand((1,4))
print(x)
print(torch.clamp(x,min=0.4, max=0.8))
print(torch.any(x))

tensor([[0.2288, 0.5185, 0.5489, 0.0977]])
tensor([[0.4000, 0.5185, 0.5489, 0.4000]])
tensor(True)


## einsum
Can be used to make any time of matrix operation

In [92]:
x = torch.from_numpy(np.array([[1,2],[3,4]]))
y = torch.from_numpy(np.array([[5,6],[7,8]]))


In [93]:
print('x -->', x)
print('y -->', y)

x --> tensor([[1, 2],
        [3, 4]])
y --> tensor([[5, 6],
        [7, 8]])


In [101]:
print('colapse rows-->',torch.einsum('ab->b',x))
print('colapse cols-->',torch.einsum('ab->a',x))

colapse rows--> tensor([4, 6])
colapse cols--> tensor([3, 7])


In [114]:
print('\nHadamard product')
print(torch.einsum('ab,ab->ab',x,y))

print('\nSum of Hadamard product (dot product)')
print(torch.einsum('ab,ab->',x,y))

print('\nMatrix multiplication')
print(torch.einsum('ab,bc->ac',x,y))





Hadamard product
tensor([[ 5, 12],
        [21, 32]])

Sum of Hadamard product
tensor(70)

Matrix multiplication
tensor([[19, 22],
        [43, 50]])

Matrix multiplication
tensor(134)


In [117]:
r = torch.rand((2,4))
print(r)
print(torch.einsum('ij->ji',r).shape)

tensor([[0.1364, 0.6918, 0.3545, 0.7969],
        [0.0061, 0.2528, 0.0882, 0.6997]])
torch.Size([4, 2])


## reshape matrix


In [118]:
x = torch.from_numpy(np.array([[1,2,3],[5,6,7]]))
print(x.shape)
y = x.view(3,-1)
print(y.shape)

torch.Size([2, 3])
torch.Size([3, 2])


## Autograd

In [124]:
x = torch.rand((1), requires_grad=True)
print(x)
y= x**2 + 2
print('y ', y)
z = 2*x 
print(z)
s = 2*x + 4
print(s)



tensor([0.5125], requires_grad=True)
y  tensor([2.2626], grad_fn=<AddBackward0>)
tensor([1.0249], grad_fn=<MulBackward0>)
tensor([5.0249], grad_fn=<AddBackward0>)


## Remove grad

In [125]:
x = torch.rand(4,requires_grad=True)
y = torch.rand(4,requires_grad=True)
z = torch.rand(4,requires_grad=True)
print('x ', x)
print('y ', y)
print('z ', z)

# first method
x.requires_grad=False
print('new x ', x)

#second method
print(y.detach())

#third method
with torch.no_grad():
    print(z+2)

x  tensor([0.1549, 0.6881, 0.4900, 0.0164], requires_grad=True)
y  tensor([0.7690, 0.7674, 0.4058, 0.1548], requires_grad=True)
z  tensor([0.5201, 0.8773, 0.9577, 0.1226], requires_grad=True)
new x  tensor([0.1549, 0.6881, 0.4900, 0.0164])
tensor([0.7690, 0.7674, 0.4058, 0.1548])
tensor([2.5201, 2.8773, 2.9577, 2.1226])


## Backpropagation

In [126]:
y = torch.tensor(2)
x = torch.rand(1)
w = torch.tensor(1, dtype = torch.float, requires_grad=True)
y_hat = w*x
loss = (y-y_hat)**2
print(loss)


tensor([2.9785], grad_fn=<PowBackward0>)


## Regression in numpy


In [49]:
x = np.array([1,2,3,4], dtype=np.float32)
y= np.array([2,4,6,8], dtype=np.float32)
w= 0.0

def forward(X):
    return w * X

def loss(Y, Y_pred):
    return ((Y_pred-Y)**2).mean()

def gradient(X, Y, Y_pred):
    return np.dot(2*X, Y_pred-Y).mean()



n_iter = 20
lr = 0.01
for epoch in range(n_iter):
    pred = forward(x)
    l = loss(y,pred)
    dw = gradient(x,y,pred)
    w -= lr*dw
    if epoch %2 ==0:
        print(f'epoch {epoch+1} w {w} loss {l:3f}')

epoch 1 w 1.2 loss 30.000000
epoch 3 w 1.871999988555908 loss 0.768000
epoch 5 w 1.9795200133323667 loss 0.019661
epoch 7 w 1.9967231869697568 loss 0.000503
epoch 9 w 1.999475698471069 loss 0.000013
epoch 11 w 1.9999160599708554 loss 0.000000
epoch 13 w 1.9999865984916685 loss 0.000000
epoch 15 w 1.9999978351593015 loss 0.000000
epoch 17 w 1.9999996304512022 loss 0.000000
epoch 19 w 1.9999999165534972 loss 0.000000


## Regression in torch

In [56]:
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y= torch.tensor([2,4,6,8], dtype=torch.float32)
w= torch.tensor(0.0, requires_grad=True)

def forward(X):
    return w * X

def loss(Y, Y_pred):
    return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



n_iter = 1000
lr = 0.01
for epoch in range(n_iter):
    pred = forward(x)
    l = loss(y,pred)
    l.backward()
    with torch.no_grad():
        w -= lr*w.grad
    w.grad.zero_()
    if epoch %100 ==0:
        print(f'epoch {epoch+1} w {w} loss {l:3f}')
print(f"prediction after training {forward(5):.2f}")

epoch 1 w 0.29999998211860657 loss 30.000000
epoch 101 w 1.9999996423721313 loss 0.000000
epoch 201 w 1.9999996423721313 loss 0.000000
epoch 301 w 1.9999996423721313 loss 0.000000
epoch 401 w 1.9999996423721313 loss 0.000000
epoch 501 w 1.9999996423721313 loss 0.000000
epoch 601 w 1.9999996423721313 loss 0.000000
epoch 701 w 1.9999996423721313 loss 0.000000
epoch 801 w 1.9999996423721313 loss 0.000000
epoch 901 w 1.9999996423721313 loss 0.000000
prediction after training 10.00


## Replace parts with torch functions

In [68]:
import torch.nn as nn


n_iter = 10000
lr = 0.01

#x= torch.tensor([1,2,3,4], dtype=torch.float32)
#y= torch.tensor([2,4,6,8], dtype=torch.float32)
#w= torch.tensor(0.0, requires_grad=True)


x= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y= torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
x_test= torch.tensor([[5]], dtype=torch.float32)
n_samples, n_features  = x.shape
n_inputs, n_outputs = n_features,n_features
#w= torch.tensor(0.0, requires_grad=True)

model = nn.Linear(n_inputs, n_outputs)
#def forward(X):
#   return w * X

## replace loss with MSEloss
loss= nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr) # will be used to do update
#def loss(Y, Y_pred):
 #   return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



w_prev = 99999999
for epoch in range(n_iter):
    pred = model(x)
    l = loss(y,pred)
    l.backward()

    optimizer.step()
 #   with torch.no_grad():
 #       w -= lr*w.grad

    optimizer.zero_grad()
    #w.grad.zero_()
    w,b = model.parameters()
    w_now = w[0][0].item()
    if w_now == w_prev:
        print(f'last epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
        break
    w_prev = w_now
    if epoch %1000 ==0:
        w,b = model.parameters()
        print(f'epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
print(f"prediction after training {model(x_test).item():.2f}")

epoch 1 w 0.0009669065475463867 loss 55.290382
epoch 1001 w 2.0015087127685547 loss 0.000003
epoch 2001 w 2.0000765323638916 loss 0.000000
last epoch 2040 w 2.0000674724578857 loss 0.000000
prediction after training 10.00


## Create a custom class

In [69]:
import torch.nn as nn


n_iter = 10000
lr = 0.01

#x= torch.tensor([1,2,3,4], dtype=torch.float32)
#y= torch.tensor([2,4,6,8], dtype=torch.float32)
#w= torch.tensor(0.0, requires_grad=True)


x= torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y= torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)
x_test= torch.tensor([[5]], dtype=torch.float32)
n_samples, n_features  = x.shape
n_inputs, n_outputs = n_features,n_features
#w= torch.tensor(0.0, requires_grad=True)


class LinearRegression(nn.Module):
    def __init__(self,input_size, output_size):
        super(LinearRegression,self).__init__()
        self.lin = nn.Linear(input_size, output_size)

    def forward(self,x):
        return self.lin(x)

model = LinearRegression(n_inputs, n_outputs)
#def forward(X):
#   return w * X

## replace loss with MSEloss
loss= nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr) # will be used to do update
#def loss(Y, Y_pred):
 #   return ((Y_pred-Y)**2).mean()

# def gradient(X, Y, Y_pred):
#     return np.dot(2*X, Y_pred-Y).mean()



w_prev = 99999999
for epoch in range(n_iter):
    pred = model(x)
    l = loss(y,pred)
    l.backward()

    optimizer.step()
 #   with torch.no_grad():
 #       w -= lr*w.grad

    optimizer.zero_grad()
    #w.grad.zero_()
    w,b = model.parameters()
    w_now = w[0][0].item()
    if w_now == w_prev:
        print(f'last epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
        break
    w_prev = w_now
    if epoch %1000 ==0:
        w,b = model.parameters()
        print(f'epoch {epoch+1} w {w[0][0].item()} loss {l:3f}')
print(f"prediction after training {model(x_test).item():.2f}")

epoch 1 w 1.0181353092193604 loss 10.668782
epoch 1001 w 1.9954642057418823 loss 0.000030
epoch 2001 w 1.9997735023498535 loss 0.000000
last epoch 2635 w 1.9999659061431885 loss 0.000000
prediction after training 10.00


## Dataset and DataLoader
Datasets and Dataloaders are two basic building blocks used for handling data.

From the pytorch website, Datasets store samples (features) and their corresponding target values 

DataLoader makes the Dataset an iterable. Using these two is a way to prevent data leakage in an algorithm.


## Creating a a custom Dataset
The class for a custom Dataset should subclass the Dataset class. 

It should have three dunder methods: init, len and getitem.

- The init method will initialize and specify any necessary transform to be made on the data.

- The len method will return the number of samples.

- The getitem method will read the data, apply a specified transform if any and return a sample from the data at a given index.


As example, 



## Putting it all together
In torch, the template for creating a machine learning algorithm is as follows:
1. 

In [72]:
import torch 
import torch.nn as nn
from sklearn import datasets
import matplotlib.pyplot as plt

## prepare the data
X_data, y_data = datasets()