In [1]:
# Step 0. Load libraries and custom functions
# Torch ----------------------------------------------------------------
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Commons --------------------------------------------------------------
import numpy as np
import matplotlib.pyplot as plt
# Call MPS as backend --------------------------------------------------
if torch.backends.mps.is_available():
    device = torch.device('mps')
    x = torch.ones(1, device=device)
    print (f'Use mps: {x}')
else:
    device = torch.device('cpu')
    print ("Use cpu")

Use mps: tensor([1.], device='mps:0')


In [2]:
# Step 1. Load data
# 1.1 Create training and test datasets
training_data = datasets.MNIST(
    root='../data/',
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root='../data',
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
# 1.2 Create dataloaders
batch_size = 64
train_dataloader = DataLoader(dataset=training_data, batch_size=batch_size)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size)

In [4]:
# 2. Create Neural Network architecture 
# 2.1 Create Neural Network class
class NN_Mnist(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
# 2.2 Instantiate a Neural Network object    
model = NN_Mnist().to(device) # You have to move your model to the GPU using .to(device)

In [5]:
# 2.3 Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
# 2.4 Create train function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'Loss: {loss:.8f}, [{(current/size)}]')

In [7]:
# 2.5 Create test function
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f'Accuracy: {correct*100:.3f}, average loss: {test_loss:>.8f}')

In [8]:
# 3. Execute neural network
epochs = 10
for t in range(epochs):
    print(f'Epoch: {t+1}')
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print('Done!')


Epoch: 1
Loss: 2.31038260, [0.0]
Loss: 0.33575094, [0.10666666666666667]
Loss: 0.26754546, [0.21333333333333335]
Loss: 0.29628628, [0.32]
Loss: 0.14645974, [0.4266666666666667]
Loss: 0.38989389, [0.5333333333333333]
Loss: 0.17012630, [0.64]
Loss: 0.32251811, [0.7466666666666667]
Loss: 0.26513380, [0.8533333333333334]
Loss: 0.25867477, [0.96]
Accuracy: 95.170, average loss: 0.15757724
Epoch: 2
Loss: 0.13301566, [0.0]
Loss: 0.08845728, [0.10666666666666667]
Loss: 0.12915169, [0.21333333333333335]
Loss: 0.15203749, [0.32]
Loss: 0.12301251, [0.4266666666666667]
Loss: 0.18013582, [0.5333333333333333]
Loss: 0.08382467, [0.64]
Loss: 0.37231672, [0.7466666666666667]
Loss: 0.20050558, [0.8533333333333334]
Loss: 0.17748952, [0.96]
Accuracy: 96.850, average loss: 0.10011967
Epoch: 3
Loss: 0.07846770, [0.0]
Loss: 0.14045615, [0.10666666666666667]
Loss: 0.14953044, [0.21333333333333335]
Loss: 0.11859259, [0.32]
Loss: 0.07067838, [0.4266666666666667]
Loss: 0.18378094, [0.5333333333333333]
Loss: 0.08

In [13]:
class SubModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = nn.Parameter(torch.randn(1, dtype=torch.float))
        self.W2 = nn.Parameter(torch.randn(1, dtype=torch.float))
    
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1,1, bias=False)
        self.sub = SubModel()

In [15]:
model = MyModel()

In [20]:
def count_parameters(model): 
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [23]:
print(torch.tensor([1,2,3]).size())

torch.Size([3])


In [24]:
features = torch.randn((1,3))
W1 = torch.randn(features.shape[1],3)
W2 = torch.randn(3,1)
B1 = torch.randn((1,3))
B2 = torch.randn((1,1))
print(f'{features} : {W1} : {W2} : {B1} : {B2}')

tensor([[ 0.6346,  1.7491, -0.9554]]) : tensor([[-0.3493,  0.4937,  0.2850],
        [ 1.1192,  2.6986,  0.1001],
        [-1.8762,  1.6090,  0.9161]]) : tensor([[ 0.0515],
        [ 1.1498],
        [-0.7431]]) : tensor([[-0.8660, -0.9546,  1.2260]]) : tensor([[-0.0506]])


In [26]:
features.shape

torch.Size([1, 3])

In [28]:
output_1 = torch.mm(features, W1 + 1) + B1
torch.mm(output_1, W2)

tensor([[3.1887]])

In [29]:
features =  torch.randn((2,6))
weights = torch.randn((2,6))

In [30]:
torch.mm(features, weights.reshape(6,2))

tensor([[ 1.4026,  0.5928],
        [-3.7924,  1.4471]])

In [31]:
torch.mm(features, weights.view(6,2))

tensor([[ 1.4026,  0.5928],
        [-3.7924,  1.4471]])

In [32]:
torch.mm(features,weights)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x6 and 2x6)