In [0]:
import torch
import numpy as np
import matplotlib.pyplot as plt

**Initializing Tensors**

In [0]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.0917, 0.6097],
        [0.6679, 0.2763],
        [0.5771, 0.3333]])


In [0]:
x = torch.empty(3, 2) ## creating tensor but not initializing it, initial values will be values stores at the address
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[2.1526e-36, 0.0000e+00],
        [4.4842e-44, 0.0000e+00],
        [       nan, 4.2320e+21]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [0]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [0]:
x = torch.tensor([[1, 2],
                  [3, 4],
                  [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


**Slicing Tensor**

In [0]:
print(x.size())
print(x[:, 1])
print(x[0,:])

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([1, 2])


In [0]:
y = x[1, 1]
print(y)
print(y.item()) # change tensor to numeric value

tensor(4)
4


**Reshaping Tensor**

In [0]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [0]:
y = x.reshape(6, -1)
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


**Simple Tensor Operation**

In [0]:
x = torch.ones(3, 2)
y = torch.ones(3, 2)
x[2,1] = 0
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 1.]])
tensor([[ 0.,  0.],
        [ 0.,  0.],
        [ 0., -1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 0.]])


In [0]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 1.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [0]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 1.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 1.]])


In [0]:
a = torch.tensor([5, 6])
z = y.add(a)
print(z)
print(y)

tensor([[7., 8.],
        [7., 8.],
        [7., 7.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 1.]])


**Numpy <-> Pytorch**

In [0]:
x_np = x.numpy() ## bridge between numpy tensor value and if you update 1 other one is also affected
print(x_np)
print(type(x_np), type(x))
x.add_(1)
print(x_np)
print(x) 
x += 1
print(x_np)
print(x)

[[2. 2.]
 [2. 2.]
 [2. 1.]]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
[[3. 3.]
 [3. 3.]
 [3. 2.]]
tensor([[3., 3.],
        [3., 3.],
        [3., 2.]])
[[4. 4.]
 [4. 4.]
 [4. 3.]]
tensor([[4., 4.],
        [4., 4.],
        [4., 3.]])


In [0]:
a = np.random.randn(5)
a_tr = torch.from_numpy(a)
print(type(a), type(a_tr))
print(a_tr)
print(a)
a_tr.add_(1)
print(a_tr)
print(a)

<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-0.4314, -0.1954,  0.7062, -0.5024,  0.0379], dtype=torch.float64)
[-0.43142351 -0.19541321  0.70618475 -0.50238696  0.03787427]
tensor([0.5686, 0.8046, 1.7062, 0.4976, 1.0379], dtype=torch.float64)
[0.56857649 0.80458679 1.70618475 0.49761304 1.03787427]


In [0]:
%%time
for i in range(100):
  a = np.random.randn(100, 100)
  b = np.random.randn(100, 100)
  # c = a * b
  c = np.matmul(a, b)

CPU times: user 154 ms, sys: 114 ms, total: 268 ms
Wall time: 150 ms


In [0]:
%%time
for i in range(100):
  a = torch.randn(100, 100)
  b = torch.randn(100, 100)
  # c = a * b
  c = torch.matmul(a, b)

CPU times: user 24.9 ms, sys: 1.86 ms, total: 26.8 ms
Wall time: 80.5 ms


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  c = a + b

CPU times: user 1min 35s, sys: 1.28 s, total: 1min 36s
Wall time: 1min 36s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000, 10000)
  b = torch.randn(10000, 10000)
  c = a + b

CPU times: user 16 s, sys: 5.92 ms, total: 16 s
Wall time: 16 s


**CUDA Support**

In [0]:
print(torch.cuda.device_count())

1


In [0]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name())

<torch.cuda.device object at 0x7f686bc939e8>
Tesla K80


In [0]:
cuda0 = torch.device('cuda:0')

In [0]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)
print(a)
print(b)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  np.add(a, b)

CPU times: user 1min 26s, sys: 563 ms, total: 1min 27s
Wall time: 1min 27s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000, 10000)
  b = torch.randn(10000, 10000)
  b.add_(a)

CPU times: user 17.5 s, sys: 7.75 ms, total: 17.5 s
Wall time: 17.5 s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000, 10000, device=cuda0)
  b = torch.randn(10000, 10000, device=cuda0)
  b.add_(a)

CPU times: user 2.4 ms, sys: 3.99 ms, total: 6.39 ms
Wall time: 11.5 ms


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  np.matmul(a, b)

CPU times: user 20min 29s, sys: 3.18 s, total: 20min 32s
Wall time: 11min 7s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000, 10000)
  b = torch.randn(10000, 10000)
  torch.matmul(a, b)

CPU times: user 4min 49s, sys: 195 ms, total: 4min 49s
Wall time: 4min 50s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000, 10000, device=cuda0)
  b = torch.randn(10000, 10000, device=cuda0)
  torch.matmul(a, b)

CPU times: user 9.28 ms, sys: 8.01 ms, total: 17.3 ms
Wall time: 17.7 ms


**Autograd**

In [0]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [0]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [0]:
z = y * y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [0]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [0]:
t.backward()

In [0]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


In [0]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1 / (1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [0]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


**Autograd example for what we have been doing**

In [0]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [0]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [0]:
print(loss)

tensor(173.3331, grad_fn=<SumBackward0>)


In [0]:
loss.backward()

In [0]:
print(w.grad, b.grad)

tensor([-31.8708]) tensor([94.3082])


**Do it in a loop**

In [0]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())
for i in range(10):
  x = torch.randn([20, 1])
  y = 3*x - 2

  y_hat = w*x + b

  loss = torch.sum((y_hat - y)**2)
  print(loss)

  loss.backward()

  with torch.no_grad(): ## we use no grad to make sure weight update equations are not part of computation graph generated for forward prop
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())

1.0 1.0
tensor(264.2288, grad_fn=<SumBackward0>)
1.788204312324524 -0.23605561256408691
tensor(112.2058, grad_fn=<SumBackward0>)
2.474776268005371 -1.0366086959838867
tensor(32.0036, grad_fn=<SumBackward0>)
2.847053050994873 -1.4980437755584717
tensor(5.8267, grad_fn=<SumBackward0>)
2.932461738586426 -1.7041802406311035
tensor(1.9337, grad_fn=<SumBackward0>)
2.9753899574279785 -1.82511305809021
tensor(0.6169, grad_fn=<SumBackward0>)
2.9805140495300293 -1.894938588142395
tensor(0.2372, grad_fn=<SumBackward0>)
2.9948835372924805 -1.9374244213104248
tensor(0.0771, grad_fn=<SumBackward0>)
2.993621587753296 -1.9621846675872803
tensor(0.0285, grad_fn=<SumBackward0>)
2.9947292804718018 -1.9770946502685547
tensor(0.0099, grad_fn=<SumBackward0>)
2.994443893432617 -1.9857807159423828


**Do it for large value**

In [0]:
%%time
learning_rate = 0.01
N = 1000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b

  loss = torch.sum((y_hat - y)**2)
  # print(loss)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(torch.mean(w).item(), b.item())

0.5238341093063354 1.0
0.5254179239273071 0.479810893535614
0.5614647269248962 1.9451277256011963
0.5345283150672913 3.800072431564331
0.5922037363052368 1.9026570320129395
0.5779885053634644 0.43874454498291016
0.6663611531257629 -3.1575396060943604
0.6707050800323486 -5.5738067626953125
0.6881402134895325 -4.782465934753418
0.5840356945991516 -0.6204061508178711
0.549179196357727 0.2868614196777344
0.632590651512146 2.80442476272583
0.8605751991271973 -7.764693737030029
1.4470272064208984 -17.75151824951172
0.6740983724594116 -1.9394607543945312
0.5829266309738159 -6.386473178863525
0.6323782205581665 -18.88169288635254
0.41367223858833313 -46.89295959472656
0.4325815439224243 -31.50178337097168
-0.013324722647666931 -9.17243766784668
0.6443884968757629 25.098642349243164
0.8476313352584839 33.65824890136719
1.4431465864181519 24.003257751464844
1.2425061464309692 12.566157341003418
1.0707595348358154 14.85265827178955
0.9679262638092041 5.286731719970703
1.4159374237060547 33.708763

In [0]:
%%time
learning_rate = 0.001
N = 1000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
  
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2
  
  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)
  
  loss.backward()
  
  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad
    
    w.grad.zero_()
    b.grad.zero_()

  print(torch.mean(w).item(), b.item())
  

0.5005515813827515 1.0
0.5006411671638489 1.7451366186141968
0.5009893178939819 0.033278703689575195
0.5011156797409058 -0.49509280920028687
0.5021746158599854 3.9337382316589355
0.5043640732765198 0.33821773529052734
0.5094436407089233 -8.63611125946045
0.502781331539154 -50.512115478515625
0.6924042701721191 -183.3279571533203
1.3362212181091309 -637.0899047851562
2.1953375339508057 235.3001708984375
-0.22361615300178528 -1779.9315185546875
-0.31165921688079834 -2466.015380859375
-0.2942468225955963 1070.5849609375
1.093262791633606 -3399.56494140625
0.3899768888950348 509.388916015625
10.90703010559082 6531.3837890625
19.239044189453125 -6258.56640625
16.307079315185547 3015.0390625
23.90019989013672 13971.861328125
21.4930477142334 28961.21875
28.341707229614258 76811.578125
32.91560745239258 -29101.765625
37.92844772338867 -53287.3359375
392.80010986328125 468729.40625
-386.80780029296875 -1031041.375
-1325.7750244140625 -1744273.375
-2412.51123046875 -2814413.5
-3603.55078125 -21

In [0]:
%%time
learning_rate = 0.01
N = 1000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
  x = torch.randn([N], device=cuda0)
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

  y_hat = torch.dot(w, x) + b

  loss = torch.sum((y_hat - y)**2)
  # print(loss)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(torch.mean(w).item(), b.item())

0.5132827162742615 1.0
0.5313652753829956 1.9489386081695557
0.5992146134376526 -0.17768144607543945
0.5984985828399658 -0.30758029222488403
0.598190426826477 -0.449088990688324
0.6057395935058594 0.8265195488929749
0.6344659328460693 3.3439412117004395
0.6466487646102905 3.579383373260498
0.9510200023651123 -1.2470111846923828
0.8970737457275391 -2.700594663619995
0.7667921185493469 1.505833387374878
0.7016636729240417 -2.0993030071258545
0.5110252499580383 -10.808579444885254
0.5442276000976562 -14.383378982543945
0.6028478741645813 -8.716846466064453
0.7213584184646606 0.07077312469482422
0.7276046872138977 -1.0910871028900146
0.6784216165542603 -6.633345603942871
0.6972595453262329 -5.215209484100342
0.7004274725914001 -2.855556011199951
0.5644440054893494 -5.868660926818848
0.6005802154541016 8.732505798339844
0.5059733986854553 6.295694351196289
1.0930827856063843 -17.172513961791992
0.9921697378158569 -38.546295166015625
0.5140179395675659 -28.59299087524414
0.3180336356163025 -