<a href="https://colab.research.google.com/github/chrishare/colab_deeplearning/blob/master/pytorch_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Some pytorch basics - first, a simple numpy-based regression problem
# that attempts to produce learn a,b,c,d such that a + bx + cx^2 + dx^3
# closely models sin(x) between -pi and +pi
import numpy as np
import math

# Create random input and output data
# Get a row vector of linearly spaced numbers between -pi and +pi
x = np.linspace(-math.pi, math.pi, 2000)
# Get a row vector of sin(x_i) for each i in x
y = np.sin(x)
# Get the 'size'of x - which will be (2000,) - a 1-dim row vector of size 2000
x.shape

# Randomly initialize weights - get 4 individual floats that are 
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

# Set learning rate to be 1 / 10^6
learning_rate = 1e-6

# For 2000 iterations (epochs)
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    # y is a 2000-el row vector based on the current weights
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss by calculating the square of every difference 
    # between the predication and actual answer (label), and summing
    # So loss is a scalar of the magnitude of the loss on the whole dataset
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    # a = sum(2 * (y_pred - y_actual)) for each el in y
    grad_a = grad_y_pred.sum()
    # b = sum(2 * (y_pred - y_actual) * x) for each el in y
    grad_b = (grad_y_pred * x).sum()
    # c = sum(2 * (y_pred - y_actual) * x^2) for each el in y
    grad_c = (grad_y_pred * x ** 2).sum()
    # d = sum(2 * (y_pred - y_actual) * x^3) for each el in y
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights - subtrack the gradient of the loss * learning rate
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 18.09889106562996
199 15.105687454472273
299 13.082607103470345
399 11.713518430560395
499 10.786040027133247
599 10.157057297824208
699 9.730041053359837
799 9.439820602584472
899 9.242353029135224
999 9.10784353510595
1099 9.0161151779539
1199 8.95348972519423
1299 8.910684511709542
1399 8.881393000359349
1499 8.86132584235104
1599 8.847562388040501
1699 8.838111690051994
1799 8.831615010272614
1899 8.827144003023928
1999 8.824063654947992
Result: y = 0.0022636170507037193 + 0.8582230470700756 x + -0.0003905116079966977 x^2 + -0.09354121965192531 x^3


In [None]:
# Next, a pytorch example - though it uses CPU

import torch
import math

# Get a reference to the torch float datatype
dtype = torch.float
# Get a reference to the local CPU device - you can get a cuda GPU too
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data, on the CPU using torch.float
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
# Get the ground truth sin(x) for every input x
y = torch.sin(x)

# Randomly initialize weights, again, on CPU using torch.floar
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

# The rest is as per numpy - it magically does the computation on the torch
# device and types acquired when setting up the variables/tensors
learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

In [23]:
# Finally, use pytorch functions to shorten the program and use out of the box
# optimisation

import torch
import math

# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Prepare the input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    # 3 input features, 1 output feature - a linear transform xA + b
    torch.nn.Linear(3, 1),
    # Flatten dims 0 and 1 into a tensor
    torch.nn.Flatten(0, 1)
)

# Define a lose function using mean squared error, and sum the output
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
# For 2000 iterations over the dataset (2000 epochs)
for t in range(2000):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(xx)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()


linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 25854.71484375
199 12355.208984375
299 6130.99951171875
399 3682.5263671875
499 2915.4697265625
599 2559.28125
699 2241.237060546875
799 1938.8804931640625
899 1663.5596923828125
999 1417.183837890625
1099 1196.697509765625
1199 998.4860229492188
1299 820.563720703125
1399 662.1236572265625
1499 522.6375732421875
1599 401.6993713378906
1699 299.1456604003906
1799 213.55767822265625
1899 145.0023956298828
1999 92.24667358398438
Result: y = -0.0008279461762867868 + 0.5763206481933594 x + -0.0008301403722725809 x^2 + -0.05408855527639389 x^3


In [22]:
import torch, torch.nn
import math
m = torch.nn.Linear(4, 5)
print(m)
print(m.weight)
print(m.bias)
input = torch.randn(10, 4)
print(input)
output = m(input)
print(output)
print(output.size())


Linear(in_features=4, out_features=5, bias=True)
Parameter containing:
tensor([[ 0.3601, -0.3429, -0.0460, -0.3918],
        [-0.3225,  0.3899, -0.0995,  0.2800],
        [-0.2407, -0.1779,  0.4001,  0.4539],
        [ 0.1469, -0.2761, -0.0304, -0.3208],
        [ 0.4796, -0.1443,  0.3001, -0.1271]], requires_grad=True)
Parameter containing:
tensor([-0.2080,  0.0932,  0.3847, -0.1223,  0.2236], requires_grad=True)
tensor([[ 0.8187, -0.3245,  0.4071, -1.1526],
        [ 0.0976,  0.7363,  0.4647, -0.2185],
        [-0.9918, -0.2333,  0.5036,  1.8562],
        [-0.6338,  1.0718, -0.7214, -0.3042],
        [ 0.3224, -1.9709,  0.6342,  1.0381],
        [ 0.3462, -0.7425, -1.0589, -0.7979],
        [-0.3125, -0.8376,  0.9666,  0.0416],
        [-1.1291, -1.3341,  0.7975, -0.7326],
        [-0.3605,  0.5124, -0.6584,  0.7430],
        [ 0.4290,  1.5329, -0.5346,  0.2783]])
tensor([[ 0.6310, -0.6606, -0.1150,  0.4450,  0.9318],
        [-0.3611,  0.2414,  0.3170, -0.2553,  0.3314],
        [-1

In [41]:
!git clone https://github.com/cocodataset/cocoapi.git
%cd cocoapi/PythonAPI
!pwd
!ls ./cocoapi/PythonAPI
!python setup.py build_ext install

fatal: destination path 'cocoapi' already exists and is not an empty directory.
/content/cocoapi/PythonAPI
/content/cocoapi/PythonAPI
ls: cannot access './cocoapi/PythonAPI': No such file or directory
running build_ext
cythoning pycocotools/_mask.pyx to pycocotools/_mask.c
  tree = Parsing.p_module(s, pxd, full_module_name)
building 'pycocotools._mask' extension
creating build
creating build/common
creating build/temp.linux-x86_64-3.7
creating build/temp.linux-x86_64-3.7/pycocotools
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fdebug-prefix-map=/build/python3.7-a56wZI/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -g -fdebug-prefix-map=/build/python3.7-a56wZI/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.7/dist-packages/numpy/core/include -I../common -I/usr/include/python3.7m -c ../common/maskApi.c -o build/temp

In [48]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

torch.manual_seed(42)
use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")

train_kwargs = {'batch_size': 64}
test_kwargs = {'batch_size': 100 }
if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                    'pin_memory': True,
                    'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
    ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                    transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
                    transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=0.1)

scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
for epoch in range(1, 14 + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

torch.save(model.state_dict(), "mnist_cnn.pt")




Test set: Average loss: 0.0869, Accuracy: 9740/10000 (97%)


Test set: Average loss: 0.0590, Accuracy: 9804/10000 (98%)


Test set: Average loss: 0.0509, Accuracy: 9824/10000 (98%)


Test set: Average loss: 0.0472, Accuracy: 9837/10000 (98%)


Test set: Average loss: 0.0451, Accuracy: 9845/10000 (98%)


Test set: Average loss: 0.0430, Accuracy: 9847/10000 (98%)



KeyboardInterrupt: ignored