In [14]:
import os
import torch
from torch import nn
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision


In [10]:
class TheModelClass(nn.Module):
    def __init__(self):
        super(TheModelClass, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(nn.ReLU(self.conv1(x)))
        x = self.pool(nn.ReLU(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = nn.ReLU(self.fc1(x))
        x = nn.ReLU(self.fc2(x))
        x = self.fc3(x)
        return x
        

In [11]:
model = TheModelClass()


In [12]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criterion = torch.nn.CrossEntropyLoss()

In [13]:
print(f"Model state dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print(f"Optimizer state dict:")
for var_tensor in optimizer.state_dict():
    print(var_tensor, "\t", optimizer.state_dict()[var_tensor])

Model state dict:
conv1.weight 	 torch.Size([6, 3, 5, 5])
conv1.bias 	 torch.Size([6])
conv2.weight 	 torch.Size([16, 6, 5, 5])
conv2.bias 	 torch.Size([16])
fc1.weight 	 torch.Size([120, 400])
fc1.bias 	 torch.Size([120])
fc2.weight 	 torch.Size([84, 120])
fc2.bias 	 torch.Size([84])
fc3.weight 	 torch.Size([10, 84])
fc3.bias 	 torch.Size([10])
Optimizer state dict:
state 	 {}
param_groups 	 [{'lr': 0.1, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}]


# Save and Load Models in PyTorch

In [15]:
# Define transformation to apply to the data
data_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the pixel values to range [-1, 1]
])
 
# Download MNIST dataset and apply the transformation
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=data_transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=data_transform, download=True)
 
 
# Define data loaders to load the data in batches during training and testing
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:12<00:00, 790705.68it/s] 


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 66208.55it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:09<00:00, 179756.86it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1311839.19it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



In [17]:
# Here we are adding convolution layer and fully connected layers in neural network
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1_layer = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2_layer = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1_layer = nn.Linear(32 * 7 * 7, 128)
        self.fc2_layer = nn.Linear(128, 10)
 
    # Adding ReLU Activation function Max Pooling Layer
    def forward(self, inputs):
        new_input = torch.relu(self.conv1_layer(inputs))
        new_input = torch.max_pool2d(new_input, kernel_size=2, stride=2)
        new_input = torch.relu(self.conv2_layer(new_input))
        new_input = torch.max_pool2d(new_input, kernel_size=2, stride=2)
        new_input = new_input.view(-1, 32 * 7 * 7)
        new_input = torch.relu(self.fc1_layer(new_input))
        new_input = self.fc2_layer(new_input)
        return new_input
 
 
# Creating Model Instance
cnn_model = SimpleCNN()
cnn_model

SimpleCNN(
  (conv1_layer): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_layer): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1_layer): Linear(in_features=1568, out_features=128, bias=True)
  (fc2_layer): Linear(in_features=128, out_features=10, bias=True)
)

In [18]:
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr=0.01)

In [39]:
from tqdm import tqdm
# Train model

for epoch in range(3):  # Train for 5 epochs
    cnn_model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader):
        optimizer.zero_grad()  # Zero the gradients
        outputs = cnn_model(inputs)  # Forward pass
        loss = loss_func(outputs, labels)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
 
 
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

100%|██████████| 938/938 [00:33<00:00, 28.26it/s]


Epoch 1, Loss: 0.05812123128955741


100%|██████████| 938/938 [00:35<00:00, 26.66it/s]


Epoch 2, Loss: 0.052178752166961


100%|██████████| 938/938 [00:39<00:00, 23.52it/s]

Epoch 3, Loss: 0.05753555627179768





In [21]:
correct_predictions = 0 
total_samples = 0

cnn_model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = cnn_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()
        
print(f"Accuracy of test set: {100 * correct_predictions / total_samples}%")

Accuracy of test set: 98.07%


In [43]:
# save model (recomendation from docs)
torch.save(cnn_model.state_dict(), '../models/cnn_model.pth')

In [44]:
# loading model
loaded_model = SimpleCNN()
loaded_model.load_state_dict(torch.load("../models/cnn_model.pth"))
loaded_model.eval()
loaded_model

SimpleCNN(
  (conv1_layer): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_layer): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1_layer): Linear(in_features=1568, out_features=128, bias=True)
  (fc2_layer): Linear(in_features=128, out_features=10, bias=True)
)

# Saving and Loading using the Checkpoint

In [40]:
checkpoint = {
    'epoch': epoch,
    'model_state_dict': cnn_model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}

torch.save(checkpoint, "../models/checkpoint.pth")

In [41]:
cnn_model = SimpleCNN()
cnn_model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
epoch = checkpoint["epoch"]
loss = checkpoint["loss"]

In [42]:
cnn_model

SimpleCNN(
  (conv1_layer): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_layer): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1_layer): Linear(in_features=1568, out_features=128, bias=True)
  (fc2_layer): Linear(in_features=128, out_features=10, bias=True)
)