# <center>CS568:Deep Learning</center>  <center>Spring 2020</center> 

In [10]:
import torch

The Pytorch tensors can be copied from CPU to GPU. 

In [None]:
# set seed for repeatability
torch.manual_seed(22)
# create a pytorch tensor
x = torch.randn(2,2)
# copy x to GPU
y = x.cuda()
# copy x back to CPU
z = y.cpu()

try:
    print(y.numpy())
except RuntimeError as e:
    print("Error: ", e)

To perform any operation, both tensors should be on same device.


In [0]:
# a tensor on CPU
a = torch.rand(2,3)
# b tensor on GPU
b = torch.rand(3,2).cuda() 
try:
    result = torch.mm(a,b)  
    print(result)
except TypeError as e:
    print(e)

Check if GPU is available then move tensors to GPU.

In [13]:
a = torch.rand(2,2)
if torch.cuda.is_available():
    a = a.cuda()
    print(a)

GPU vs CPU 

In [None]:
from timeit import timeit 

def matmul():
    res = torch.mm(a, b) 

# create two random tensors 
a = torch.rand(1000, 1280)
b = torch.rand(1280, 1)
ite = 1000

# Time CPU takes for matrix multiplication
print('CPU: {} seconds'.format(timeit(matmul, number=ite)))
# Time GPU takes for matrix multiplication
a, b = a.cuda(), b.cuda()
print('GPU: {} seconds'.format(timeit(matmul, number=ite)))

Pytorch tensors provide automatic differentiation. If you set requires_grad=True pytorch will compute the gradient of this tensor. 

In [30]:
# construct a differentiable tensor
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
y = torch.tensor(torch.arange(1,5), requires_grad=True)
print("y", y.dtype)

x torch.int64


  """Entry point for launching an IPython kernel.
  This is separate from the ipykernel package so we can avoid doing imports until


RuntimeError: ignored

The tensor y depends on x and x has discrete values. Therefore, we cannot compute gradient of tensor y. 

In [0]:
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

In [36]:
x = torch.tensor(torch.arange(1,5).float(), requires_grad=True)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

x torch.float32
x tensor([1., 2., 3., 4.], requires_grad=True)
y tensor([ 1.,  4.,  9., 16.], grad_fn=<PowBackward0>)
tensor([2., 4., 6., 8.])


  """Entry point for launching an IPython kernel.


#### Basic building blocks to make a neural network in Pytorch

**Define model step**
Construct a network using torch.nn module

In [44]:
net = torch.nn.Linear(3,2)
print(net)

Linear(in_features=3, out_features=2, bias=True)


forward() function

In [50]:
x = torch.tensor(torch.arange(0, 3).float(), requires_grad=True)
y = net.forward(x) # or net(x)
print(y)

tensor([0.2988, 1.4400], grad_fn=<AddBackward0>)


  """Entry point for launching an IPython kernel.


parameters() function

In [51]:
for param in net.parameters():
  print(param)

Parameter containing:
tensor([[ 0.5329, -0.1142,  0.4923],
        [ 0.5727,  0.1568,  0.4124]], requires_grad=True)
Parameter containing:
tensor([-0.5715,  0.4585], requires_grad=True)


Create a model by constructing a class MyNetwork. This class will inherit from the nn.Module class of Pytorch.

In [54]:
class MyNetwork(torch.nn.Module):    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        """
        In this constructor we instantiate two nn.Linear modules and assign them as
        member variables.

        input_dim: input dimension
        hidden_dim: dimension of hidden layer
        output_dim: output dimension
        """
        self.layer1 = torch.nn.Linear(input_dim, hidden_dim)
        self.layer2 = torch.nn.Sigmoid()
        self.layer3 = torch.nn.Linear(hidden_dim , output_dim)

    def forward(self, x):
        """
        In this function we accept a Variable of input data and we must 
        return a Variable of output data. 
        """
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

net = MyNetwork(5, 50, 10)
print(net)

MyNetwork(
  (layer1): Linear(in_features=5, out_features=50, bias=True)
  (layer2): Sigmoid()
  (layer3): Linear(in_features=50, out_features=10, bias=True)
)


In [17]:
class parent:
    ''' Creates a parent class '''
    def __init__(self):
        print("initialize parent class")

class child1(parent):
    '''Inherits from parent, but does not run parent's init method '''
    def __init__(self):
        print("initialize child1 class")

class child2(parent):
    '''Inhereits from parent, but DOES run parents's init method'''
    def __init__(self):
        super().__init__()
        print("initialize child2 class")

c1 = child1()
c2 = child2()

initialize child1 class
initialize parent class
initialize child2 class


In [0]:
def neural_net(input_size, hidden_size, output_size):
      return nn.Sequential(nn.Linear(input_size, hidden_size),
                       nn.ReLU(),
                       nn.Linear(hidden_size, output_size))
net = neural_net(5, 50, 10)

Now define loss function 

In [61]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()

for param in net.parameters():
    print("param ", param)

for param in net.parameters():
    print("param gradients", param.grad)

tensor(3.0262, grad_fn=<MseLossBackward>)
param  Parameter containing:
tensor([[ 0.4056,  0.4691,  0.1522],
        [-0.4018, -0.5687, -0.2492]], requires_grad=True)
param  Parameter containing:
tensor([0.3911, 0.0126], requires_grad=True)
param gradients tensor([[ 1.8942,  1.1702,  0.7440],
        [-1.0435, -0.7646, -0.9536]])
param gradients tensor([ 1.0664, -0.5170])


Pyotrch accumulate gradients

In [63]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()
for param in net.parameters():
    print("parameters gradients", param.grad)

# output = net(x)
# loss = criterion(output,y)
# loss.backward()
# for param in net.parameters():
#      print("parameters gradients", param.grad)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

tensor(2.7423, grad_fn=<MseLossBackward>)
parameters gradients tensor([[-0.0534, -1.8559,  0.2198],
        [-0.3589,  0.3408, -0.5430]])
parameters gradients tensor([ 0.3171, -0.6221])
tensor([[-0.0534, -1.8559,  0.2198],
        [-0.3589,  0.3408, -0.5430]])
tensor([ 0.3171, -0.6221])


In [19]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

print("Parameters before gradient descent :")
for param in net.parameters():
    print(param)

optimizer.step()

print("Parameters after gradient descent :")
for param in net.parameters():
    print(param)


parameters gradients tensor([[-0.0645, -0.2362,  0.6032],
        [ 0.1961,  0.0522, -0.1033]])
parameters gradients tensor([0.8882, 0.0626])
Parameters before gradient descent :
Parameter containing:
tensor([[-0.3553,  0.1486,  0.4576],
        [-0.1190, -0.0331, -0.3434]], requires_grad=True)
Parameter containing:
tensor([0.5579, 0.2272], requires_grad=True)
Parameters after gradient descent :
Parameter containing:
tensor([[-0.3547,  0.1509,  0.4516],
        [-0.1210, -0.0336, -0.3424]], requires_grad=True)
Parameter containing:
tensor([0.5490, 0.2266], requires_grad=True)


In [20]:
iterations = 10
for i in range(iterations):
    optimizer.zero_grad() 
    output = net(x)
    loss = criterion(output,y)
    loss.backward()
    optimizer.step()
    print(loss)

tensor(1.2451, grad_fn=<MseLossBackward>)
tensor(1.2330, grad_fn=<MseLossBackward>)
tensor(1.2212, grad_fn=<MseLossBackward>)
tensor(1.2099, grad_fn=<MseLossBackward>)
tensor(1.1990, grad_fn=<MseLossBackward>)
tensor(1.1885, grad_fn=<MseLossBackward>)
tensor(1.1783, grad_fn=<MseLossBackward>)
tensor(1.1684, grad_fn=<MseLossBackward>)
tensor(1.1589, grad_fn=<MseLossBackward>)
tensor(1.1497, grad_fn=<MseLossBackward>)
