In [2]:
pip install torch torchvision numpy pillow torchviz nbconvert

Collecting torch
  Downloading torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl (89.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.7/89.7 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchvision
  Downloading torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy
  Downloading numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (13.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.9/13.9 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pillow
  Downloading pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torchviz
  Downloading 

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time
import numpy


# Define transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [4]:
# SimpleNet modespecificl
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv7 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(512, 1024)
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.relu(self.conv3(x))
        x = self.pool(torch.relu(self.conv4(x)))
        x = torch.relu(self.conv5(x))
        x = self.pool(torch.relu(self.conv6(x)))
        x = torch.relu(self.conv7(x))
        
        x = x.view(x.size(0), -1)  # Flatten the feature map
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
# Initialize the model, define the loss function and the optimizer
model = SimpleNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with progress output and model saving
def train(model, train_loader, criterion, optimizer, epochs=5, save_path='simple_net.pth'):
    model.train()  # Set model to training mode
    for epoch in range(epochs):
        start_time = time.time()
        running_loss = 0.0
        for batch_idx, (images, labels) in enumerate(train_loader):
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights
            
            running_loss += loss.item()
            
            if batch_idx % 100 == 0:  # Print progress every 100 batches
                print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}/{len(train_loader)}], Loss: {loss.item():.4f}")
        
        epoch_time = time.time() - start_time
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Time: {epoch_time:.2f} seconds")
    
    # Save the model
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")

In [6]:
# Test loop after loading the saved model
def test(model, test_loader, save_path='simple_net.pth'):
    # Load the saved model
    model.load_state_dict(torch.load(save_path))
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')

In [5]:
# Train the model
train(model, train_loader, criterion, optimizer)

Epoch [1/5], Batch [0/938], Loss: 2.3011
Epoch [1/5], Batch [100/938], Loss: 0.6268
Epoch [1/5], Batch [200/938], Loss: 0.2710
Epoch [1/5], Batch [300/938], Loss: 0.0516
Epoch [1/5], Batch [400/938], Loss: 0.0427
Epoch [1/5], Batch [500/938], Loss: 0.0240
Epoch [1/5], Batch [600/938], Loss: 0.1251
Epoch [1/5], Batch [700/938], Loss: 0.0288
Epoch [1/5], Batch [800/938], Loss: 0.1199
Epoch [1/5], Batch [900/938], Loss: 0.0774
Epoch [1/5], Loss: 0.2907, Time: 582.59 seconds
Epoch [2/5], Batch [0/938], Loss: 0.0524
Epoch [2/5], Batch [100/938], Loss: 0.0464
Epoch [2/5], Batch [200/938], Loss: 0.0666
Epoch [2/5], Batch [300/938], Loss: 0.0110
Epoch [2/5], Batch [400/938], Loss: 0.0172
Epoch [2/5], Batch [500/938], Loss: 0.0463
Epoch [2/5], Batch [600/938], Loss: 0.1779
Epoch [2/5], Batch [700/938], Loss: 0.0129
Epoch [2/5], Batch [800/938], Loss: 0.0183
Epoch [2/5], Batch [900/938], Loss: 0.0649
Epoch [2/5], Loss: 0.0626, Time: 441.74 seconds
Epoch [3/5], Batch [0/938], Loss: 0.0609
Epoch [

In [7]:
# Test the model by reloading it
test(model, test_loader)

  model.load_state_dict(torch.load(save_path))


Accuracy: 98.99%


In [9]:
from torchviz import make_dot
import torch
from IPython.display import Image, display

# Use a dummy input to visualize the model's architecture
x = torch.randn(1, 1, 28, 28)
y = model(x)

# Generate the architecture graph
dot = make_dot(y, params=dict(model.named_parameters()))

# Render the diagram as a PNG and display it inline in the notebook
dot.format = 'png'
dot.render('model_architecture')

# Display the image in the Jupyter notebook
display(Image('model_architecture.png'))


'model_architecture.png'

In [13]:
# Use only a subset of the layers for a simpler visualization
class SimpleNetPruned(nn.Module):
    def __init__(self):
        super(SimpleNetPruned, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # We'll initialize fc1 after determining the output size of the convolutional layers
        self.fc1 = None  # Placeholder, will be initialized later
        self.fc2 = nn.Linear(10, 10)  # Modify as needed for your final output

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(torch.relu(self.conv2(x)))

        if self.fc1 is None:
            # Dynamically calculate the input size for fc1
            num_features = x.view(x.size(0), -1).size(1)
            self.fc1 = nn.Linear(num_features, 10).to(x.device)

        x = x.view(x.size(0), -1)  # Flatten the feature map
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create a simplified model
model_pruned = SimpleNetPruned()

# Use a dummy input to visualize the pruned model's architecture
x = torch.randn(1, 1, 28, 28)
y = model_pruned(x)

# Generate the architecture graph
dot = make_dot(y, params=dict(model_pruned.named_parameters()))

# Save the diagram as an SVG or PNG image
dot.format = 'png'
dot.render('model_architecture_pruned')


'model_architecture_pruned.png'

In [14]:
from torchviz import make_dot
import torch

# Assuming SimpleNet is already defined
model = SimpleNet()

# Use a dummy input to pass through the model
x = torch.randn(1, 1, 28, 28)
y = model(x)

# Generate the architecture graph
dot = make_dot(y, params=dict(model.named_parameters()))

# Save the diagram as an SVG or PNG image, SVG can be resized easily
dot.format = 'png'
dot.render('simplenet_architecture')


'simplenet_architecture.png'

In [18]:
import subprocess

def convert_notebook_to_pdf(notebook_path):
    try:
        subprocess.run(['jupyter', 'nbconvert', '--to', 'pdf', notebook_path], check=True)
        print(f"Notebook {notebook_path} has been successfully converted to PDF.")
    except subprocess.CalledProcessError as e:
        print(f"Error occurred while converting notebook to PDF: {e}")

# Example usage
convert_notebook_to_pdf('simple_net_5_2.ipynb')


[NbConvertApp] Converting notebook simple_net_5_2.ipynb to pdf
[NbConvertApp] Writing 53698 bytes to notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', 'notebook.tex', '-quiet']


Error occurred while converting notebook to PDF: Command '['jupyter', 'nbconvert', '--to', 'pdf', 'simple_net_5_2.ipynb']' returned non-zero exit status 1.


[NbConvertApp] CRITICAL | xelatex failed: ['xelatex', 'notebook.tex', '-quiet']
b"This is XeTeX, Version 3.141592653-2.6-0.999994 (TeX Live 2022/Debian) (preloaded format=xelatex)\n restricted \\write18 enabled.\nentering extended mode\n(./notebook.tex\nLaTeX2e <2022-11-01> patch level 1\nL3 programming layer <2023-01-16>\n(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls\nDocument Class: article 2022/07/02 v1.4n Standard LaTeX document class\n(/usr/share/texlive/texmf-dist/tex/latex/base/size11.clo))\n(/usr/share/texlive/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty\n(/usr/share/texlive/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty\n(/usr/share/texlive/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty\n(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex)\n(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def)\n(/usr/share/texlive/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex\n(/usr/share/texlive/texmf-dist/tex/generic/pgf/pgf.revisi