What is pytorch ?

Pytorch is a machine learning library developed by Facebook's Artficial intelligence group.
Pytorch provides tensor computation like numpy but with strong GPU acceleration as well as automatic differentiation.


#### Installs and checks

In [123]:
import torch
# How many GPUs are there?
print(torch.cuda.device_count())
# Which GPU Is The Current GPU?
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(torch.cuda.current_device()))
# Is PyTorch using a GPU?
print(torch.cuda.is_available())

1
0
GeForce GTX 1080
True


#### Pytorch Intro

1. Pytorch
2. What are tensors?
3. Initialising , resizing , reshaping tensors
4. Numpy and pytorch interfacing
5. GPU support for pytorch
6. Speed comparisions numpy, pytorch , pytorch-gpu
7. Autograd concepts and application
8. Writing a basic learning loop with autograd

In [37]:
import torch
import matplotlib.pyplot as plt
import numpy as np

#### Initialising tensors

In [12]:
x=torch.ones(3,2)
print(x)
x=torch.zeros(3,2)
print(x)
x=torch.rand(3,2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.3323, 0.8201],
        [0.2263, 0.2107],
        [0.6811, 0.2847]])


In [13]:
x=torch.empty(3,2)
print(x)
y=torch.zeros_like(x)
print(y)

tensor([[2.1571e+15, 4.5612e-41],
        [1.3498e+09, 3.0946e-41],
        [4.4842e-44, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [14]:
x=torch.linspace(0,1,5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [17]:
x=torch.tensor([[1,2],
               [3,4],
               [5,6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


#### Slicing Tensors

In [21]:
print(x.size())
print(x[:,1]) # pick the 1st column and all rows
print(x[0,:]) # pick the zeroth row and all columns

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [23]:
y=x[1,1]
print(y)
print(y.item())

tensor(4)
4


#### Reshaping tensors

In [25]:
print(x)
y=x.view(2,3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [26]:
y=x.view(6,-1) # -1 indicates you figure out how many columns you can make.
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


#### Simple tensor operations

In [31]:
x=torch.ones(3,2)
y=torch.ones(3,2)
z=x-y
print(z)
z=x+y                        #point wise operations
print(z)
z=x*y
print(z)

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [32]:
z=y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [33]:
# inplace add
z=y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


#### Numpy and Pytorch

In [35]:
x_np=x.numpy()
print(type(x))
print(type(x_np))
print(x_np)

<class 'torch.Tensor'>
<class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [39]:
a=np.random.randn(5)
a_pt=torch.from_numpy(a)
print(type(a))
print(type(a_pt))
print(a)
print(a_pt)

<class 'numpy.ndarray'>
<class 'torch.Tensor'>
[ 2.11032418  0.47158659  2.63139751 -0.29424993  0.60815801]
tensor([ 2.1103,  0.4716,  2.6314, -0.2942,  0.6082], dtype=torch.float64)


In [40]:
np.add(a,1,out=a)
print(a)                 #update to a also updates a_pt
print(a_pt)

[3.11032418 1.47158659 3.63139751 0.70575007 1.60815801]
tensor([3.1103, 1.4716, 3.6314, 0.7058, 1.6082], dtype=torch.float64)


In [59]:
%%time
for i in range(100):
    a=np.random.rand(100,100)
    b=np.random.rand(100,100)
    c=np.matmul(a,b)


CPU times: user 54.7 ms, sys: 0 ns, total: 54.7 ms
Wall time: 12.5 ms


In [60]:
%%time
for i in range(100):
    a_pt=torch.randn([100,100])
    b_pt=torch.randn([100,100])
    c_pt=torch.matmul(a_pt,b_pt)

CPU times: user 123 ms, sys: 0 ns, total: 123 ms
Wall time: 22.2 ms


In [66]:
%%time
for i in range(2):
    a=np.random.rand(10000,10000)
    b=np.random.rand(10000,10000)
    c=np.matmul(a,b)


CPU times: user 2min 3s, sys: 2.07 s, total: 2min 5s
Wall time: 22.9 s


In [67]:
%%time
for i in range(2):
    a_pt=torch.randn([10000,10000])
    b_pt=torch.randn([10000,10000])
    c_pt=torch.matmul(a_pt,b_pt)

CPU times: user 54.1 s, sys: 620 ms, total: 54.7 s
Wall time: 11.3 s


#### GPU

In [120]:
print(torch.cuda.device_count())
print(torch.cuda.get_device_name())

1
GeForce GTX 1080


In [121]:
cuda0=torch.device("cuda:0")

In [75]:
a=torch.randn(3,2,device=cuda0)
b=torch.randn(3,2,device=cuda0)
c=a+b
print(c)

tensor([[-0.4776, -2.0493],
        [ 0.3858,  1.8831],
        [ 1.4104,  0.7126]], device='cuda:0')


In [76]:
print(a,b)

tensor([[-0.3412, -1.9012],
        [ 0.1207,  1.9439],
        [ 2.3774, -0.8901]], device='cuda:0') tensor([[-0.1363, -0.1480],
        [ 0.2651, -0.0608],
        [-0.9670,  1.6027]], device='cuda:0')


In [80]:
%%time
for i in range(10):
    a=np.random.rand(10000,10000)
    b=np.random.rand(10000,10000)
    np.add(a,b)

CPU times: user 11 s, sys: 1.57 s, total: 12.6 s
Wall time: 12.6 s


In [81]:
%%time
for i in range(10):
    a_cp=torch.randn(10000,10000)
    b_cp=torch.randn(10000,10000)
    a_cp.add_(b_cp)

CPU times: user 17.9 s, sys: 1.43 s, total: 19.3 s
Wall time: 14.2 s


In [82]:
%%time
for i in range(10):
    a=torch.randn(10000,10000,device=cuda0)
    b=torch.randn(10000,10000,device=cuda0)
    a.add_(b)

CPU times: user 46.9 ms, sys: 16.1 ms, total: 63.1 ms
Wall time: 20.6 ms


#### Autograd

In [84]:
x=torch.ones(3,2,requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [86]:
y=x+5
y

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)

In [88]:
z=y*y+1
z

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)

In [89]:
t=torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [90]:
t.backward()

In [92]:
x.grad

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])

$t=\sum_i z_i$ , $z_i=y_i ^2 +1$ , $y_i=x_i+5$

 $\frac{dt}{dx_i}=\frac{dt}{dz_i} =\frac{dz_i}{dy_i} * \frac{dy_i}{dx_i}=2y_i=2*6=12$

In [93]:
x=torch.ones(3,2,requires_grad=True)
y=x+5
r=1/(1+torch.exp(-y))
print(r)
s=torch.sum(r)
s.backward()
print(x.grad)


tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$t=\sum_i z_i$ , $z_i=\frac{1}{1+e^{-y}}$ , $y_i=x_i+5$

 $\frac{dt}{dx_i}=\frac{dt}{dz_i} =\frac{dz_i}{dy_i} * \frac{dy_i}{dx_i}=r(1-r)=0.9975*0.0025=0.0025$

#### Autograd example for learning

In [95]:
x=torch.randn([20,1],requires_grad=True)
y=3*x-2
w=torch.tensor([1.],requires_grad=True)
b=torch.tensor([1.],requires_grad=True)
y_hat=w*x+b
loss=torch.sum((y_hat-y)**2)
loss.backward()
print(loss)
print(w.grad)
print(b.grad)

tensor(196.8944, grad_fn=<SumBackward0>)
tensor([-40.2264])
tensor([104.4453])


#### Loop this for learning

In [108]:
learning_rate=0.01
w=torch.tensor([1.],requires_grad=True)
b=torch.tensor([1.],requires_grad=True)
print("Initialized weights :",w.item(),b.item())
for i in range(10):
    x=torch.randn([20,1])
    y=3*x-2
    y_hat=w*x+b
    loss=torch.sum((y_hat-y)**2)
    loss.backward()
    with torch.no_grad():  ### we do this so that the equations below dont add to computational graph
        w-=learning_rate*w.grad
        b-=learning_rate*b.grad
        w.grad.zero_()
        b.grad.zero_()
    print(w.item(),b.item())

print("Final weights :",w.item(),b.item())       

Initialized weights : 1.0 1.0
2.131287097930908 -0.5133846998214722
2.7918286323547363 -1.1652002334594727
2.8715410232543945 -1.505526065826416
3.060063600540161 -1.731277585029602
2.9945945739746094 -1.8471484184265137
2.9810540676116943 -1.907751202583313
2.983646869659424 -1.9427721500396729
2.9856224060058594 -1.9649235010147095
2.9886159896850586 -1.9776057004928589
2.991253137588501 -1.9862788915634155
Final weights : 2.991253137588501 -1.9862788915634155


#### Doing it a for a larger problem

In [116]:
%%time
N=1000
learning_rate=0.001
epochs=2000
w=torch.rand([N],requires_grad=True)
b=torch.tensor([1.],requires_grad=True)
for i in range(epochs):
    x=torch.randn([N])
    y=torch.dot(3*torch.ones([N]),x)-2
    y_hat=torch.dot(w,x)+b
    loss=torch.sum((y_hat-y)**2)
    loss.backward()
    #print(loss)
    with torch.no_grad():
        w-=learning_rate*w.grad
        b-=learning_rate*b.grad
        w.grad.zero_()
        b.grad.zero_()
    print(torch.mean(w).item(),b.item())

0.5013208389282227 1.1208198070526123
0.5296053290367126 1.5138362646102905
0.5314335227012634 1.5888615846633911
0.5318478941917419 1.5366374254226685
0.5320925116539001 1.5868171453475952
0.5375974178314209 1.4232048988342285
0.5480119585990906 1.6577528715133667
0.5608153343200684 1.377814531326294
0.568581759929657 1.1656543016433716
0.569487452507019 1.2304662466049194
0.5700739622116089 1.1999480724334717
0.5707703232765198 1.1554310321807861
0.5826200842857361 0.9354685544967651
0.5845372080802917 1.0152592658996582
0.5845981240272522 1.0067459344863892
0.5851485133171082 0.9181948900222778
0.5869181156158447 1.0535471439361572
0.5885422229766846 0.9736653566360474
0.6034983396530151 0.7063541412353516
0.6306186318397522 1.0493489503860474
0.6380559802055359 0.8900023102760315
0.6508913636207581 1.1312775611877441
0.6516379117965698 1.301060438156128
0.6520256996154785 1.2054098844528198
0.6709405779838562 1.5497713088989258
0.6755732297897339 1.4155006408691406
0.67620557546615

2.5399508476257324 -0.8030853271484375
2.5395941734313965 -0.718655526638031
2.5382275581359863 -0.753660261631012
2.538604736328125 -0.5646763443946838
2.539383888244629 -0.6006175875663757
2.5394279956817627 -0.60929274559021
2.5364885330200195 -0.7767918109893799
2.534538984298706 -0.6937218904495239
2.536137342453003 -0.7314304709434509
2.535111904144287 -0.6306771039962769
2.5344839096069336 -0.6019673347473145
2.5337326526641846 -0.659623384475708
2.534963846206665 -0.5561810731887817
2.5315802097320557 -0.40584105253219604
2.5306344032287598 -0.3485601246356964
2.529803514480591 -0.3226706087589264
2.529649257659912 -0.42943432927131653
2.53049898147583 -0.4908830523490906
2.54079008102417 -0.8755321502685547
2.5406861305236816 -0.9081706404685974
2.5394747257232666 -0.8003361821174622
2.536078453063965 -0.71145099401474
2.5358338356018066 -0.7988453507423401
2.536120891571045 -0.828235387802124
2.5387885570526123 -0.6427630186080933
2.5286178588867188 -0.41239261627197266
2.545

3.059567451477051 -5.073233127593994
3.0503599643707275 -4.8793840408325195
3.0490128993988037 -4.835785865783691
3.040966749191284 -4.343697547912598
3.0392372608184814 -4.37405252456665
3.0382959842681885 -4.201631546020508
3.0386157035827637 -4.149473190307617
3.038357973098755 -4.189979553222656
3.0275397300720215 -4.347126007080078
3.031890630722046 -4.278517246246338
3.0326664447784424 -4.327003479003906
3.041374683380127 -4.13491678237915
3.0422585010528564 -4.158646583557129
3.04079270362854 -3.974705457687378
3.040956974029541 -4.114566802978516
3.0439224243164062 -4.0218095779418945
3.0430140495300293 -4.134176731109619
3.044529438018799 -4.108607292175293
3.061324119567871 -3.7811779975891113
3.054812431335449 -3.4339511394500732
3.059405565261841 -3.652580499649048
3.056755304336548 -3.5886635780334473
3.0568559169769287 -3.597266435623169
3.058947801589966 -3.6608388423919678
3.0555057525634766 -3.383107900619507
3.0545260906219482 -3.1417860984802246
3.0535483360290527 -2

In [117]:
%%time
N=1000000
learning_rate=0.001
epochs=200
w=torch.rand([N],requires_grad=True)
b=torch.tensor([1.],requires_grad=True)
for i in range(epochs):
    x=torch.randn([N])
    y=torch.dot(3*torch.ones([N]),x)-2
    y_hat=torch.dot(w,x)+b
    loss=torch.sum((y_hat-y)**2)
    loss.backward()
    #print(loss)
    with torch.no_grad():
        w-=learning_rate*w.grad
        b-=learning_rate*b.grad
        w.grad.zero_()
        b.grad.zero_()
    #print(torch.mean(w).item(),b.item())

CPU times: user 13.1 s, sys: 3.6 ms, total: 13.1 s
Wall time: 2.2 s


#### GPU Accelaration for learning

In [122]:
%%time
N=1000000
learning_rate=0.001
epochs=200
w=torch.rand([N],requires_grad=True,device=cuda0)
b=torch.tensor([1.],requires_grad=True,device=cuda0)
for i in range(epochs):
    x=torch.randn([N],device=cuda0)
    y=torch.dot(3*torch.ones([N],device=cuda0),x)-2
    y_hat=torch.dot(w,x)+b
    loss=torch.sum((y_hat-y)**2)
    loss.backward()
    #print(loss)
    with torch.no_grad():
        w-=learning_rate*w.grad
        b-=learning_rate*b.grad
        w.grad.zero_()
        b.grad.zero_()
    #print(torch.mean(w).item(),b.item())

CPU times: user 165 ms, sys: 20 ms, total: 185 ms
Wall time: 184 ms
