# <center>CS568:Deep Learning</center>  <center>Spring 2020</center> 

In [None]:
import torch

The Pytorch tensors can be copied from CPU to GPU. 

In [None]:
# set seed for repeatability
torch.manual_seed(22)
# create a pytorch tensor
x = torch.randn(2,2)
# copy x to GPU
y = x.cuda()
# copy x back to CPU
z = y.cpu()

try:
    print(y.numpy())
except RuntimeError as e:
    print("Error: ", e)

To perform any operation, both tensors should be on same device.


In [None]:
# a tensor on CPU
a = torch.rand(2,3)
# b tensor on GPU
b = torch.rand(3,2).cuda() 
try:
    result = torch.mm(a,b)  
    print(result)
except TypeError as e:
    print(e)

Check if GPU is available then move tensors to GPU.

In [None]:
a = torch.rand(2,2)
if torch.cuda.is_available():
    a = a.cuda()
    print(a)

GPU vs CPU 

In [None]:
from timeit import timeit 

def matmul():
    res = torch.mm(a, b) 

# create two random tensors 
a = torch.rand(1000, 1280)
b = torch.rand(1280, 1)
ite = 1000

# Time CPU takes for matrix multiplication
print('CPU: {} seconds'.format(timeit(matmul, number=ite)))
# Time GPU takes for matrix multiplication
a, b = a.cuda(), b.cuda()
print('GPU: {} seconds'.format(timeit(matmul, number=ite)))

Pytorch tensors provide automatic differentiation. If you set requires_grad=True pytorch will compute the gradient of this tensor. 

In [None]:
# construct a differentiable tensor
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
y = torch.tensor(torch.arange(1,5), requires_grad=True)
print("y", y.dtype)

The tensor y depends on x and x has discrete values. Therefore, we cannot compute gradient of tensor y. 

In [None]:
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

In [None]:
x = torch.tensor(torch.arange(1,5).float(), requires_grad=True)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

#### Basic building blocks to make a neural network in Pytorch

**Define model step**
Construct a network using torch.nn module

In [None]:
net = torch.nn.Linear(3,2)
print(net)

forward() function

In [None]:
x = torch.tensor(torch.arange(0, 3).float(), requires_grad=True)
y = net.forward(x) # or net(x)
print(y)

parameters() function

In [None]:
for param in net.parameters():
  print(param)

Create a model by constructing a class MyNetwork. This class will inherit from the nn.Module class of Pytorch.

In [None]:
class MyNetwork(torch.nn.Module):    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        """
        In this constructor we instantiate two nn.Linear modules and assign them as
        member variables.

        input_dim: input dimension
        hidden_dim: dimension of hidden layer
        output_dim: output dimension
        """
        self.layer1 = torch.nn.Linear(input_dim, hidden_dim)
        self.layer2 = torch.nn.Sigmoid()
        self.layer3 = torch.nn.Linear(hidden_dim , output_dim)

    def forward(self, x):
        """
        In this function we accept a Variable of input data and we must 
        return a Variable of output data. 
        """
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

net = MyNetwork(5, 50, 10)
print(net)

In [None]:
class parent:
    ''' Creates a parent class '''
    def __init__(self):
        print("initialize parent class")

class child1(parent):
    '''Inherits from parent, but does not run parent's init method '''
    def __init__(self):
        print("initialize child1 class")

class child2(parent):
    '''Inhereits from parent, but DOES run parents's init method'''
    def __init__(self):
        super().__init__()
        print("initialize child2 class")

c1 = child1()
c2 = child2()

In [None]:
def neural_net(input_size, hidden_size, output_size):
      return nn.Sequential(nn.Linear(input_size, hidden_size),
                       nn.ReLU(),
                       nn.Linear(hidden_size, output_size))
net = neural_net(5, 50, 10)

Now define loss function 

In [None]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()

for param in net.parameters():
    print("param ", param)

for param in net.parameters():
    print("param gradients", param.grad)

Pyotrch accumulate gradients

In [None]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()
for param in net.parameters():
    print("parameters gradients", param.grad)

# output = net(x)
# loss = criterion(output,y)
# loss.backward()
# for param in net.parameters():
#      print("parameters gradients", param.grad)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

In [None]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

print("Parameters before gradient descent :")
for param in net.parameters():
    print(param)

optimizer.step()

print("Parameters after gradient descent :")
for param in net.parameters():
    print(param)


In [None]:
iterations = 10
for i in range(iterations):
    optimizer.zero_grad() 
    output = net(x)
    loss = criterion(output,y)
    loss.backward()
    optimizer.step()
    print(loss)