# PyTorch 

PyTorch is another Machine Learning Framework, similar in many ways to tensorflow but with a few key differences.

 - PyTorch does not support `function` compilation in the same way that tensorflow does
 - PyTorch generally uses less memory than Tensorflow
 - PyTorch preserves a more `numpy`-like interface
 
 More information about pytorch can be found here: https://pytorch.org/
 
 In this short notebook, we'll cover the same topics as in tensorflow but in pytorch.

In [1]:
import torch


In [3]:
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
del tf

In [4]:
x_train.shape

(50000, 32, 32, 3)

In [5]:
batch_data = x_train[0:10].transpose((0,3,1,2))
batch_labels = y_train[0:10]

In [6]:
batch_data = torch.Tensor(batch_data)
batch_labels = torch.Tensor(batch_labels).long()

In [7]:
print(batch_labels.shape)
print(batch_labels.dtype)

torch.Size([10, 1])
torch.int64


## Creating Models

Pytorch's `nn` package allows an object-oriented way to create models, just like in tensorflow.  There is also a functional API that works similarily

In [8]:

class ResidualBlock(torch.nn.Module):

    def __init__(self):
        # Call the parent class's __init__ to make this class functional with training loops:
        super().__init__()
        self.conv1  = torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=[3,3], padding=[1,1])
        self.conv2  = torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=[3,3], padding=[1,1])

    def forward(self, inputs):
    
        # Apply the first weights + activation:
        outputs = torch.nn.functional.relu(self.conv1(inputs))
        # Apply the second weights:

        outputs = self.conv2(outputs)

        # Perform the residual step:

        outputs = outputs + inputs

        # Second activation layer:
        return torch.nn.functional.relu(outputs)



In [9]:
class MyModel(torch.nn.Module):
    
    def __init__(self):
        # Call the parent class's __init__ to make this class functional with training loops:
        super().__init__()
        
        self.conv_init = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=1)
        
        self.res1 = ResidualBlock()
        
        self.res2 = ResidualBlock()
        
        # 10 filters for each class:
        self.conv_final = torch.nn.Conv2d(in_channels=16, out_channels=10, kernel_size=1)
        
        self.pool = torch.nn.AvgPool2d(32,32)
        
    def forward(self, inputs):
        
        x = self.conv_init(inputs)
        
        x = self.res1(x)
        
        x = self.res2(x)
        
        x = self.conv_final(x)
        
        return self.pool(x).reshape((-1,10))

In [10]:
model = MyModel()

In [13]:
print(batch_labels.shape)

torch.Size([10, 1])


In [14]:
logits = model(batch_data)
print(logits.shape)

torch.Size([10, 10])


In [15]:
loss = torch.nn.functional.cross_entropy(logits, batch_labels.flatten())

In [16]:
print(loss)

tensor(51.1258, grad_fn=<NllLossBackward0>)


In [17]:
gradients = torch.autograd.grad(loss, model.parameters())

In [18]:
print(gradients)

(tensor([[[[  0.8536]],

         [[  0.6760]],

         [[  0.3510]]],


        [[[ -1.5588]],

         [[  0.0749]],

         [[  0.0999]]],


        [[[ 10.4692]],

         [[  9.5440]],

         [[  8.5711]]],


        [[[  0.4690]],

         [[  0.4789]],

         [[  0.2915]]],


        [[[  1.9958]],

         [[  4.1428]],

         [[  5.3985]]],


        [[[  1.3789]],

         [[  1.7216]],

         [[  1.7165]]],


        [[[ 23.2377]],

         [[ 22.2916]],

         [[ 21.8782]]],


        [[[-12.3544]],

         [[ -9.5522]],

         [[ -6.9636]]],


        [[[  8.1595]],

         [[  9.9153]],

         [[  8.1544]]],


        [[[ -0.2588]],

         [[ -0.0976]],

         [[ -0.1670]]],


        [[[  3.0586]],

         [[  4.0125]],

         [[  2.4225]]],


        [[[ -0.3784]],

         [[  0.2873]],

         [[  0.7080]]],


        [[[ 13.4589]],

         [[ 13.8552]],

         [[ 12.8589]]],


        [[[ -1.0314]],

         [[ -

In [19]:
input_grads = torch.autograd.grad(loss, batch_data)

RuntimeError: One of the differentiated Tensors does not require grad

In [24]:
logits = model(batch_data.requires_grad_())
loss = torch.nn.functional.cross_entropy(logits, batch_labels.flatten())

In [25]:
input_grads = torch.autograd.grad(loss, batch_data)[0]

In [26]:
input_grads

tensor([[[[ 2.4134e-05,  2.3808e-05,  2.0644e-05,  ...,  2.2808e-05,
            2.0544e-05,  2.0178e-05],
          [ 1.4541e-05, -3.2742e-05, -1.1840e-05,  ...,  1.8926e-05,
            1.4525e-05,  1.7029e-05],
          [ 1.6315e-05, -8.4507e-06,  1.9144e-05,  ...,  2.4422e-05,
            2.1237e-05,  1.6283e-05],
          ...,
          [ 2.4961e-05,  2.3251e-05,  1.8441e-05,  ...,  2.3605e-05,
            2.5801e-05,  1.8007e-05],
          [ 1.9031e-05,  1.7845e-05,  1.5363e-05,  ...,  1.6292e-05,
            2.1148e-05,  1.3955e-05],
          [ 2.2384e-05,  2.1780e-05,  2.1909e-05,  ...,  1.8875e-05,
            1.9938e-05,  2.1354e-05]],

         [[ 6.5610e-06,  5.2030e-06,  3.3479e-06,  ...,  7.1353e-06,
            5.9123e-06,  7.9489e-06],
          [ 5.4764e-06,  9.9231e-06,  2.2578e-05,  ...,  1.2052e-05,
            1.0294e-05,  9.0589e-06],
          [ 1.0585e-05,  9.6163e-06,  7.3156e-06,  ...,  4.9715e-06,
            5.9507e-06,  7.2503e-06],
          ...,
     