In [97]:
# notes from https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html



## Packages it recommends:
# Text: NLTK (text wrangling), SpaCy (nlp models inc transfer learning from transformers - looks cutting edge)
# audio: scipy, librosa (music and audio analysis)
# images: Pillow (image wrangling), OpenCV (comp vision models)




import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import sys
import matplotlib.pyplot as plt


# from playing with options on pytorch site it looks like mac doesnt get CUDA (GPU acceleration), 
# tho the accompanying text on the site suggests it does, so may well be I'm missing something
# https://pytorch.org/get-started/locally/







In [2]:
# operation on np array linked to torch tensor updates torch tensor values too
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out = a)
print(a)
print(b)


[2. 2. 2. 2. 2.]


tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

In [6]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
x_data.dtype

torch.int64

In [9]:
shape = (2, 3, 3)
x = torch.rand(shape) # make arrays of random values, similar to np.random.uniform
print(x)
print(x.shape)
print(x.dtype)
print(x.device)  # where tensor is stored

tensor([[[0.4218, 0.8228, 0.6627],
         [0.4727, 0.5870, 0.1737],
         [0.4094, 0.9452, 0.5087]],

        [[0.1408, 0.6108, 0.5357],
         [0.4517, 0.9523, 0.5667],
         [0.3393, 0.2441, 0.3165]]])
torch.Size([2, 3, 3])
torch.float32
cpu


In [12]:
if torch.cuda.is_available():   # We move our tensor to the GPU if available
  x = x.to('cuda')
else:
    print('GPU no available')

GPU no available


In [22]:
print(x[1, 1:, 1:])  # slicing is same as numpy

print(torch.cat([x[1, 1:, 1:], x[1, 1:, 1:]], dim = 1))   # concat arrays along chosen dimension


### Two ways of doing element-wise multiplication
rand_tensor = torch.rand(2, 2)
print(rand_tensor)
print(x[1, 1:, 1:] * rand_tensor)
print(x[1, 1:, 1:].mul(rand_tensor))


tensor([[0.9523, 0.5667],
        [0.2441, 0.3165]])
tensor([[0.9523, 0.5667, 0.9523, 0.5667],
        [0.2441, 0.3165, 0.2441, 0.3165]])
tensor([[0.7821, 0.4966],
        [0.2723, 0.5283]])
tensor([[0.7448, 0.2814],
        [0.0665, 0.1672]])
tensor([[0.7448, 0.2814],
        [0.0665, 0.1672]])


In [23]:
### matrix multiplication
print(x[1, 1:, 1:] @ rand_tensor)
print(x[1, 1:, 1:].matmul(rand_tensor))

tensor([[0.8991, 0.7724],
        [0.2771, 0.2884]])
tensor([[0.8991, 0.7724],
        [0.2771, 0.2884]])


In [24]:
print(rand_tensor)
rand_tensor.add_(2)  # function that ends in _ performs inplace operation
print(rand_tensor)

tensor([[0.7821, 0.4966],
        [0.2723, 0.5283]])
tensor([[2.7821, 2.4966],
        [2.2723, 2.5283]])


In [25]:
model = torchvision.models.resnet18(pretrained=True)   # downloading pre-trained model


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /Users/apple/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [29]:
sys.getsizeof(model)  # seems unlikely it's only 48 bytes

48

In [42]:
## applying model to random data: more here https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
data = torch.rand(1, 3, 64, 64)    # single image I think, but not sure
labels = torch.rand(1, 1000)

prediction = model(data)  # forward pass

loss = (prediction - labels).sum()
print(loss)

tensor(-507.5631, grad_fn=<SumBackward0>)


In [44]:
loss.backward() # backward() begins backpropagation


RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward() or autograd.grad() the first time.

In [46]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # define optimiser

In [47]:
optim.step() # one iteration of gradient descent

In [49]:
print(optim)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)


In [51]:
# define ConvNet from:
# https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py
class Net(nn.Module):  # nn.Module is base class for all pytorch neural nets

    def __init__(self):
        super(Net, self).__init__()   # this means __init__ calls the __init__ function of the superclass
                            # in this case, nn.Module
            
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5) # 6 in and 16 out channels, 5x5 convolution
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension, 120 nodes
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)        # outputs 10 values

    def forward(self, x):
        """define forward pass nn architecutre, calling processes set out above"""
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        
        # If the size is a square, you can specify with a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)         # no activation function here
        return x


net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [66]:
params = list(net.parameters()) # getting model's parameters
params[0][0]   # first of several arrays of learned weights

tensor([[[-0.1302, -0.1424,  0.1806,  0.1944,  0.0090],
         [ 0.1023,  0.1441, -0.0005, -0.0689,  0.1372],
         [-0.0660,  0.1985, -0.1321,  0.1991, -0.1594],
         [-0.1626, -0.1843,  0.1169, -0.0697,  0.0135],
         [-0.1068, -0.0049,  0.0890, -0.1238, -0.1596]]],
       grad_fn=<SelectBackward>)

In [56]:
input = torch.randn(1, 1, 32, 32)  # single data input
out = net(input)              # apply model to data
print(out)                  # non-activated (dont sum to 1) outputs for each of 10 possible values

tensor([[ 0.0370, -0.1661, -0.0757, -0.1018,  0.0398, -0.1242, -0.0264,  0.0217,
          0.1132, -0.0134]], grad_fn=<AddmmBackward>)


In [64]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [60]:
out

tensor([[ 0.0370, -0.1661, -0.0757, -0.1018,  0.0398, -0.1242, -0.0264,  0.0217,
          0.1132, -0.0134]], grad_fn=<AddmmBackward>)

In [None]:
# torch.Tensor = ndarray with support for funcs like backward(). Contain's tensor's gradient info 
# nn.Module = NN superclass to inherit from 
# nn.Parameter = kind of tensor, automatically registered as a param when assigned as a Module attribute


In [67]:
# torch.nn includes a few loss functions
# 

output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()  # choose loss func

loss = criterion(output, target)  # apply loss func to single data (1*10 array as 10 possible classes)
print(loss)


tensor(0.9018, grad_fn=<MseLossBackward>)


In [77]:
loss.grad_fn  # says this shows the computation graph it went through, but cant see it

<MseLossBackward at 0x14e755c70>

In [90]:
print(loss.grad_fn.next_functions[0][0])  #

<AddmmBackward object at 0x14e6cbdc0>


In [94]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()    # it only lets you run this once, but it runs instantly so unsure if it's running the full grad desc

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)


conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])


RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward() or autograd.grad() the first time.

In [95]:
# create your optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update



### the above is a better version than the below, whcih is fine if you're using normal gradient descent
### without an optimiser
#learning_rate = 0.01
#for f in net.parameters():
#    f.data.sub_(f.grad.data * learning_rate)  # subtracts from f inplace, effectively: 
                #f = f - derivative_weight *learn_rate