# Install required packages 

In [1]:
# install required packages for the project

%pip install torch torchvision torchaudio -quit --index-url https://download.pytorch.org/whl/cpu
%pip install -quit matplotlib
%pip install -quit safetensors


Usage:   
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] <requirement specifier> [package-index-options] ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] -r <requirements file> [package-index-options] ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] [-e] <vcs project url> ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] [-e] <local project path> ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] <archive url/path> ...

no such option: -u
Note: you may need to restart the kernel to use updated packages.

Usage:   
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] <requirement specifier> [package-index-options] ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] -r <requirements file> [package-index-options] ...
  /home/ram/Desktop/DeepLearning/env/bin/python -m pip install [options] [-e] <vcs project url>

##   import necessary libraries

In [None]:
# import necessary libraries
import time
import sys

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
import matplotlib.pyplot as plt

## Load dataset

In [6]:
train_dataset = datasets.MNIST(
    root='.',
    train=True,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
)

In [7]:
test_dataset = datasets.MNIST(
    root='.',
    train=False,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
)

In [10]:
print(len(train_dataset), len(test_dataset))

60000 10000


## Converting data in to batches

In [11]:
batch_size = 64

train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

print(len(train_loader), len(test_loader))

938 157


In [13]:
 # Train the model
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)
# Evaluate the model
def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    accuracy = correct / len(test_loader.dataset)
    return total_loss / len(test_loader), accuracy


In [None]:

class LogisticRegression(nn.Module):
    def __init__(self,in_dim,n_class):
        super().__init__()
        self.in_dim = in_dim
        self.n_class = n_class
        self.linear = nn.Linear(in_dim, n_class)

    def forward(self, x):
        out=self.linear(x)
        return out



In [15]:
model = LogisticRegression(28*28,10)
print(model)

LogisticRegression(
  (linear): Linear(in_features=784, out_features=10, bias=True)
)


In [21]:
model.state_dict()
print(model.state_dict().keys())
print(model.state_dict()['linear.weight'].shape)
print(model.state_dict()['linear.bias'].shape)
# Set device

odict_keys(['linear.weight', 'linear.bias'])
torch.Size([10, 784])
torch.Size([10])


In [23]:
print(list(model.parameters()))

[Parameter containing:
tensor([[-0.0039,  0.0246,  0.0163,  ...,  0.0170, -0.0328,  0.0249],
        [ 0.0297,  0.0210, -0.0226,  ...,  0.0204,  0.0281,  0.0152],
        [ 0.0291, -0.0096, -0.0228,  ...,  0.0060, -0.0263, -0.0275],
        ...,
        [-0.0101, -0.0120,  0.0347,  ...,  0.0341,  0.0138, -0.0268],
        [-0.0073,  0.0138, -0.0247,  ..., -0.0211, -0.0249,  0.0287],
        [-0.0243,  0.0280, -0.0356,  ...,  0.0003,  0.0271, -0.0198]],
       requires_grad=True), Parameter containing:
tensor([-0.0019,  0.0347, -0.0093,  0.0172, -0.0102, -0.0055, -0.0233,  0.0163,
        -0.0055, -0.0214], requires_grad=True)]


In [None]:
# For CuDA support, check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print(device)

cpu


In [None]:
# for MPS support (Apple Silicon Macs)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = model.to(device)
print(device)

cpu


In [None]:
# Loss function and optimizer
learning_rate = 1e-2
momentum = 0.9
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)


# Building Training loop

In [31]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    print("*"*20)
    start_time = time.time()
    running_loss=0
    running_acc=0
    model.train()
    for i ,data in enumerate(train_loader,1):
        images, labels = data
        # Flatten the images
        images = images.view(images.size(0), -1)  # Flatten the images
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        # running_acc += (predicted == labels).sum().item()
        running_acc += (predicted == labels).float().mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(train_loader)}], Loss: {loss.item():.4f}, Accuracy: {running_acc / i:.4f}")
    end_time = time.time()
    print(f"Epoch [{epoch+1}/{num_epochs}] completed in {end_time - start_time:.2f} seconds, Loss: {running_loss / len(train_loader):.4f}, Accuracy: {running_acc / len(train_loader):.4f}")


********************
Epoch [1/5], Step [100/938], Loss: 0.1266, Accuracy: 0.9261
Epoch [1/5], Step [200/938], Loss: 0.3006, Accuracy: 0.9265
Epoch [1/5], Step [300/938], Loss: 0.2388, Accuracy: 0.9244
Epoch [1/5], Step [400/938], Loss: 0.1688, Accuracy: 0.9239
Epoch [1/5], Step [500/938], Loss: 0.1805, Accuracy: 0.9243
Epoch [1/5], Step [600/938], Loss: 0.0855, Accuracy: 0.9240
Epoch [1/5], Step [700/938], Loss: 0.3547, Accuracy: 0.9234
Epoch [1/5], Step [800/938], Loss: 0.2096, Accuracy: 0.9235
Epoch [1/5], Step [900/938], Loss: 0.3291, Accuracy: 0.9231
Epoch [1/5] completed in 26.54 seconds, Loss: 0.2755, Accuracy: 0.9233
********************
Epoch [2/5], Step [100/938], Loss: 0.2530, Accuracy: 0.9223
Epoch [2/5], Step [200/938], Loss: 0.3298, Accuracy: 0.9241
Epoch [2/5], Step [300/938], Loss: 0.2981, Accuracy: 0.9221
Epoch [2/5], Step [400/938], Loss: 0.2728, Accuracy: 0.9223
Epoch [2/5], Step [500/938], Loss: 0.1497, Accuracy: 0.9225
Epoch [2/5], Step [600/938], Loss: 0.3133, Accu

In [38]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    print("*" * 20)
    start_time = time.time()
    running_loss = 0
    total_correct = 0
    total_samples = 0

    model.train()
    for i, data in enumerate(train_loader, 1):
        images, labels = data
        images = images.view(images.size(0), -1)  # Flatten the images
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        # print(outputs, labels.data,outputs.shape)
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
        # print(total_correct, total_samples)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            accuracy = total_correct / total_samples
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i}/{len(train_loader)}], "f"Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}")

    end_time = time.time()
    # print(f"Outputs : {outputs}")
    # print(f"Outputs Data : {outputs.data}")
    print(total_correct, total_samples)
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = total_correct / total_samples
    print(f"Epoch [{epoch+1}/{num_epochs}] completed in {end_time - start_time:.2f} seconds, "f"Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")


********************
Epoch [1/5], Step [100/938], Loss: 0.1936, Accuracy: 0.9258
Epoch [1/5], Step [200/938], Loss: 0.1651, Accuracy: 0.9269
Epoch [1/5], Step [300/938], Loss: 0.1411, Accuracy: 0.9254
Epoch [1/5], Step [400/938], Loss: 0.4887, Accuracy: 0.9248
Epoch [1/5], Step [500/938], Loss: 0.1467, Accuracy: 0.9245
Epoch [1/5], Step [600/938], Loss: 0.1807, Accuracy: 0.9247
Epoch [1/5], Step [700/938], Loss: 0.1508, Accuracy: 0.9251
Epoch [1/5], Step [800/938], Loss: 0.5002, Accuracy: 0.9257
Epoch [1/5], Step [900/938], Loss: 0.2957, Accuracy: 0.9263
55575 60000
Epoch [1/5] completed in 24.74 seconds, Loss: 0.2660, Accuracy: 0.9263
********************
Epoch [2/5], Step [100/938], Loss: 0.3516, Accuracy: 0.9272
Epoch [2/5], Step [200/938], Loss: 0.3065, Accuracy: 0.9245
Epoch [2/5], Step [300/938], Loss: 0.3444, Accuracy: 0.9258
Epoch [2/5], Step [400/938], Loss: 0.2819, Accuracy: 0.9261
Epoch [2/5], Step [500/938], Loss: 0.2135, Accuracy: 0.9261
Epoch [2/5], Step [600/938], Loss: 

In [None]:
# # Test the model
# model.eval()
# eval_loss=0
# eval_acc=0
# with torch.no_grad():
#     predicted=model(x_train)

# # Make predictions
# print(predicted)

In [None]:
# Define the model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
# Initialize the model, criterion, optimizer, and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')


In [None]:
# Save the model
torch.save(model.state_dict(), 'mnist_cnn.pth')
# Load the model
model = SimpleCNN().to(device)
model.load_state_dict(torch.load('mnist_cnn.pth', map_location=device))
# Evaluate the loaded model
test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)
print(f'Loaded Model - Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')


In [None]:
# Visualize some predictions
def visualize_predictions(model, test_loader, device):
    model.eval()
    images, labels = next(iter(test_loader))
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)

    fig, axes = plt.subplots(1, 10, figsize=(15, 3))
    for i in range(10):
        ax = axes[i]
        ax.imshow(images[i].cpu().squeeze(), cmap='gray')
        ax.set_title(f'Pred: {predicted[i].item()}\nTrue: {labels[i].item()}')
        ax.axis('off')
    plt.show()
visualize_predictions(model, test_loader, device)


In [None]:
# Save the model in safetensors format
import safetensors.torch
safetensors.save_model(model, 'mnist_cnn.safetensors')
# Load the model from safetensors format
model = safetensors.load_model(SimpleCNN(), 'mnist_cnn.safetensors', map_location=device)
# Evaluate the loaded model from safetensors
test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)
print(f'Loaded Safetensors Model - Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
# Clean up      
import os  
os.remove('mnist_cnn.pth')
os.remove('mnist_cnn.safetensors')
print("Cleaned up model files.")
# End of the script
if __name__ == "__main__":
    print("Script executed successfully.")
    sys.exit(0)
# End of the script
# End of the script 