<a href="https://colab.research.google.com/github/siddhantjain07/DeepLearning/blob/master/PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

##Tensors

In [2]:
x = torch.ones(3 , 2)
print(x)
x = torch.zeros(3 , 2)
print(x)
x = torch.rand(3 , 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.3255, 0.9440],
        [0.2499, 0.1513],
        [0.6373, 0.5563]])


In [3]:
x = torch.empty(3 , 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[6.1820e-37, 0.0000e+00],
        [3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [4]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [5]:
x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
print(x)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])


##Slicing tensors

In [6]:
print(x.size())
print(x[:, 1])
print(x[0, :])

torch.Size([3, 2])
tensor([2., 4., 6.])
tensor([1., 2.])


In [7]:
y = x[1, 1]
print(y)
print(y.item())

tensor(4.)
4.0


##Reshaping tensors

In [8]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [9]:
y = x.view(6, -1)
print(y)

tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.]])


##Simple Tensor Operations

In [10]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z) 

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [11]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [12]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


##Numpy <> PyTorch

In [13]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [14]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[-1.30468504 -0.26565203  0.18380229  0.36808435 -1.75810486]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-1.3047, -0.2657,  0.1838,  0.3681, -1.7581], dtype=torch.float64)


In [15]:
np.add(a, 1, out=a)
print(a)
print(a_pt)

[-0.30468504  0.73434797  1.18380229  1.36808435 -0.75810486]
tensor([-0.3047,  0.7343,  1.1838,  1.3681, -0.7581], dtype=torch.float64)


In [24]:
%%time
for i in range(100):
  a = np.random.randn(100, 100)
  b = np.random.randn(100, 100)
  c = np.matmul(a, b)

CPU times: user 170 ms, sys: 98.3 ms, total: 268 ms
Wall time: 144 ms


In [25]:
%%time
for i in range(100):
  a = torch.rand([100, 100])
  b = torch.rand([100, 100])
  c = torch.matmul(a, b)

CPU times: user 24.5 ms, sys: 109 µs, total: 24.6 ms
Wall time: 25.7 ms


In [26]:
%%time
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  c = a + b

CPU times: user 1min 35s, sys: 1.39 s, total: 1min 37s
Wall time: 1min 37s


In [27]:
%%time
for i in range(10):
  a = torch.rand([10000, 10000])
  b = torch.rand([10000, 10000])
  c = a + b

CPU times: user 14.4 s, sys: 7.93 ms, total: 14.4 s
Wall time: 14.4 s


##Cuda Support

In [2]:
print(torch.cuda.device_count())

1


In [6]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))
print(torch.cuda.get_device_capability(0))
print(torch.cuda.get_device_properties(0))

<torch.cuda.device object at 0x7fe621cf74e0>
Tesla K80
(3, 7)
_CudaDeviceProperties(name='Tesla K80', major=3, minor=7, total_memory=11441MB, multi_processor_count=13)


In [7]:
cuda0 = torch.device('cuda:0')

In [8]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [9]:
print(a)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')


In [10]:
%%time 
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  np.add(b, a)

CPU times: user 1min 26s, sys: 562 ms, total: 1min 26s
Wall time: 1min 26s


In [13]:
%%time
for i in range(10):
  a_cpu = torch.rand([10000, 10000])
  b_cpu = torch.rand([10000, 10000])
  b_cpu.add_(a_cpu)

CPU times: user 16.3 s, sys: 21.7 ms, total: 16.3 s
Wall time: 16.3 s


In [15]:
%%time
for i in range(10):
  a = torch.rand([10000, 10000], device=cuda0)
  b = torch.rand([10000, 10000], device=cuda0)
  b.add_(a)

CPU times: user 2.02 ms, sys: 18 µs, total: 2.03 ms
Wall time: 1.57 ms


In [26]:
%%time 
for i in range(10):
  a = np.random.randn(10000, 10000)
  b = np.random.randn(10000, 10000)
  np.matmul(a, b)

CPU times: user 19min 23s, sys: 5.09 s, total: 19min 28s
Wall time: 10min 38s


In [35]:
%%time
for i in range(10):
  a_cpu = torch.rand([10000, 10000])
  b_cpu = torch.rand([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

CPU times: user 4min 20s, sys: 107 ms, total: 4min 20s
Wall time: 4min 20s


In [18]:
%%time 
for i in range(10):
  a = torch.rand([10000, 10000], device=cuda0)
  b = torch.rand([10000, 10000], device=cuda0)
  torch.matmul(a, b)

CPU times: user 8.33 ms, sys: 6 ms, total: 14.3 ms
Wall time: 16.6 ms


##Autograd

In [29]:
x = torch.ones(3, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [30]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [31]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [32]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [33]:
t.backward()

In [34]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$


At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [36]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1.0/(1.0 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [42]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


##Autograd for DL

In [48]:
x = torch.rand([20, 1], requires_grad=True)
y = 3*x - 2

In [49]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [50]:
print(loss)

tensor(90.5018, grad_fn=<SumBackward0>)


In [51]:
loss.backward()

In [52]:
print(w.grad, b.grad)

tensor([30.8924]) tensor([80.9295])


In [58]:
#doing it in loop

learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(10):

  x = torch.randn([20, 1])
  y = 3*x - 2

  y_hat = w*x + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item()) 

1.0 1.0
3.154468297958374 -0.5990597009658813
3.0293917655944824 -1.165163278579712
2.861570358276367 -1.504642128944397
2.9086711406707764 -1.6915650367736816
2.9641385078430176 -1.8223533630371094
2.991682529449463 -1.895116925239563
3.000699758529663 -1.937414288520813
2.9911563396453857 -1.9625502824783325
2.9955642223358154 -1.9778245687484741
2.998678684234619 -1.9868751764297485


In [None]:
%%time
learning_rate = 0.001
N = 1000
epochs = 2500

w = torch.rand([N], requires_grad=True)
b = torch.zeros([1], requires_grad=True)

print(torch.mean(w).item(), b.item())

for i in range(epochs):
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b

  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(torch.mean(w).item(), b.item())

In [73]:
%%time
learning_rate = 0.001
N = 1000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.zeros([1], requires_grad=True)

#print(torch.mean(w).item(), b.item())

for i in range(epochs):
  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b

  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()


  #print(torch.mean(w).item(), b.item())

CPU times: user 2.87 s, sys: 29 ms, total: 2.9 s
Wall time: 2.9 s


In [76]:
%%time
learning_rate = 0.001
N = 1000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.zeros([1], requires_grad=True, device=cuda0)

#print(torch.mean(w).item(), b.item())

for i in range(epochs):
  x = torch.randn([N], device=cuda0)
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

  y_hat = torch.dot(w, x) + b

  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()


  #print(torch.mean(w).item(), b.item())

CPU times: user 214 ms, sys: 47 ms, total: 261 ms
Wall time: 269 ms
