In [1]:
import os
import math
import numpy as np
import time

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.notebook import tqdm

  set_matplotlib_formats('svg', 'pdf') # For export


In [2]:
import torch
print("Using torch", torch.__version__)

Using torch 2.2.1


In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x127423f90>

In [4]:
x = torch.Tensor(2, 3, 4)
print(x)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


In [5]:
# Create a tensor from a (nested) list
x = torch.Tensor([[1, 2], [3, 4]])
print(x)

tensor([[1., 2.],
        [3., 4.]])


In [6]:
# Create a tensor with random values between 0 and 1 with the shape [2, 3, 4]
x = torch.rand(2, 3, 4)
print(x)

tensor([[[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]],

        [[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]])


In [7]:
shape = x.shape
print("Shape:", x.shape)

size = x.size()
print("Size:", size)

dim1, dim2, dim3 = x.size()
print("Size:", dim1, dim2, dim3)

Shape: torch.Size([2, 3, 4])
Size: torch.Size([2, 3, 4])
Size: 2 3 4


In [8]:
np_arr = np.array(x)
print(np_arr)

[[[0.88226926 0.91500396 0.38286376 0.95930564]
  [0.3904482  0.60089535 0.25657248 0.7936413 ]
  [0.94077146 0.13318592 0.9345981  0.59357965]]

 [[0.86940444 0.5677153  0.74109405 0.4294045 ]
  [0.8854429  0.57390445 0.26658005 0.62744915]
  [0.26963168 0.44136357 0.29692084 0.8316855 ]]]


In [9]:
tensor = torch.from_numpy(np_arr)
print(tensor)

tensor([[[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]],

        [[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]])


In [10]:
tensor = torch.arange(10)
np_arr = tensor.numpy()

print("PyTorch tensor:", tensor)
print("Numpy array:", np_arr)

#conversion of tensor to numpy on tensor happens on CPU

PyTorch tensor: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Numpy array: [0 1 2 3 4 5 6 7 8 9]


In [11]:
x1 = torch.rand(2,3)
x2 = torch.rand(2,3)
y = x1+x2
print("X1", x1)
print("X2", x2)
print("Y", y)

X1 tensor([[0.1053, 0.2695, 0.3588],
        [0.1994, 0.5472, 0.0062]])
X2 tensor([[0.9516, 0.0753, 0.8860],
        [0.5832, 0.3376, 0.8090]])
Y tensor([[1.0569, 0.3448, 1.2448],
        [0.7826, 0.8848, 0.8151]])


In [12]:
#in place operation

x1 = torch.rand(2, 3)
x2 = torch.rand(2, 3)
print("X1 (before)", x1)
print("X2 (before)", x2)

x2.add_(x1)
print("X1 (after)", x1)
print("X2 (after)", x2)
x2.mul_(x1)
print(x2)

X1 (before) tensor([[0.5779, 0.9040, 0.5547],
        [0.3423, 0.6343, 0.3644]])
X2 (before) tensor([[0.7104, 0.9464, 0.7890],
        [0.2814, 0.7886, 0.5895]])
X1 (after) tensor([[0.5779, 0.9040, 0.5547],
        [0.3423, 0.6343, 0.3644]])
X2 (after) tensor([[1.2884, 1.8504, 1.3437],
        [0.6237, 1.4230, 0.9539]])
tensor([[0.7446, 1.6727, 0.7453],
        [0.2135, 0.9027, 0.3476]])


In [13]:
x = torch.arange(1,7)
print("X", x)

X tensor([1, 2, 3, 4, 5, 6])


In [14]:
x = x.view(2,3)
x.shape

torch.Size([2, 3])

In [15]:
x = x.permute(1, 0) # Swapping dimension 0 and 1
print("X", x)

X tensor([[1, 4],
        [2, 5],
        [3, 6]])


In [16]:
x = torch.arange(6)
x = x.view(2, 3)
print("X", x)

X tensor([[0, 1, 2],
        [3, 4, 5]])


In [17]:
W = torch.arange(9).view(3, 3) # We can also stack multiple operations in a single line
print("W", W)

W tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])


In [18]:
h = torch.matmul(x, W) # Verify the result by calculating it by hand too!
print("h", h)

h tensor([[15, 18, 21],
        [42, 54, 66]])


In [19]:
x = torch.arange(12).view(3, 4)
print("X", x)


X tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [20]:
print(x[:,1])

tensor([1, 5, 9])


In [21]:
print(x[0,:])

tensor([0, 1, 2, 3])


In [22]:
print(x[0])

tensor([0, 1, 2, 3])


In [23]:
print(x[2,:-1])

tensor([ 8,  9, 10])


In [24]:
print(x[1:3, :])

tensor([[ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [25]:
x = torch.ones((3,))
print(x.requires_grad)

False


In [26]:
x.requires_grad = True
print(x.requires_grad)

True


In [27]:
#in place grads
x.requires_grad_(True)
print(x.requires_grad)

True


In [28]:

x = torch.arange(3, dtype=torch.float32, requires_grad=True) # Only float tensors can have gradients
print("X", x)

X tensor([0., 1., 2.], requires_grad=True)


In [29]:
a = x+2
b = a**2
c = b+3
y = c.mean()
print(y)

tensor(12.6667, grad_fn=<MeanBackward0>)


In [30]:
y.backward()

In [31]:
print(x.grad)

tensor([1.3333, 2.0000, 2.6667])


In [32]:
device = torch.device("mps")
print(device)
x = torch.ones(1, device=device)
print (x)

mps
tensor([1.], device='mps:0')


In [33]:
x = torch.zeros(2, 3)
x = x.to(device)
print("X", x)

X tensor([[0., 0., 0.],
        [0., 0., 0.]], device='mps:0')


In [49]:

x = torch.randn(5000, 5000)

## CPU version
start_time = time.time()
_ = torch.matmul(x, x)
end_time = time.time()
print(f"CPU time: {(end_time - start_time):6.5f}s")
## GPU version
x = x.to(device)
_ = torch.matmul(x, x)  # First operation to 'burn in' GPU
# CUDA is asynchronous, so we need to use different timing functions
start = time.time()
_ = torch.matmul(x, x)
end = time.time()
print(f"GPU time: {(end - start):6.5f}s") # Milliseconds to seconds

CPU time: 0.16374s
GPU time: 0.00021s


In [51]:
import torch.nn as nn
import torch.nn.functional as F

In [52]:
class MyModule(nn.Module):

    def __init__(self):
        super().__init__()
        # Some init for my module

    def forward(self, x):
        # Function for performing the calculation of the module.
        pass

In [53]:
class SimpleClassifier(nn.Module):

    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        # Initialize the modules we need to build the network
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.act_fn = nn.Tanh()
        self.linear2 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        # Perform the calculation of the model to determine the prediction
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        return x

In [54]:
model = SimpleClassifier(num_inputs=2, num_hidden=4, num_outputs=1)
# Printing a module shows all its submodules
print(model)

SimpleClassifier(
  (linear1): Linear(in_features=2, out_features=4, bias=True)
  (act_fn): Tanh()
  (linear2): Linear(in_features=4, out_features=1, bias=True)
)
