In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets, models
import matplotlib.pyplot as plt


## 1. Data Normalization:
Both `transform_train` and `transform_test` include a `transforms.Normalize` step, which is used to normalize the pixel values of images. For the CIFAR-10 dataset, this typically involves subtracting the mean and dividing by the standard deviation of each channel (R, G, B).

This normalization helps in stabilizing the training process by ensuring that the input features have a consistent scale, which in turn can speed up convergence and improve model performance.

## 2. Data Augmentation (transform_train only):
The `transform_train` includes additional transformations like `RandomHorizontalFlip` and `RandomCrop`, which are examples of data augmentation techniques. Data augmentation is used only on the training set to artificially increase the diversity of the training data, thereby reducing overfitting.

- **RandomHorizontalFlip**: This randomly flips the image horizontally with a probability of 50%, helping the model learn features invariant to horizontal orientation.
- **RandomCrop**: This randomly crops a part of the image and resizes it to the original dimensions, which helps the model become robust to spatial translations and learn better features.

## 3. Converting to Tensor:
Both `transform_train` and `transform_test` include the `transforms.ToTensor()` transformation, which converts the images from PIL format to PyTorch tensors. This is necessary because PyTorch models expect inputs as tensors.


In [4]:
# Define transformations for the training and validation datasets
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load the datasets
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Create DataLoader for both training and testing datasets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 15377369.20it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## 1. Conv2d (Convolutional Layer)
- **Purpose**: Extracts features from the input image, like edges, textures, and patterns.
- **Operation**: Applies multiple filters (kernels) to the input image, producing feature maps. Each filter focuses on different aspects of the image.
- **Output**: A set of feature maps highlighting different features detected by the filters.

## 2. MaxPool2d (Max Pooling Layer)
- **Purpose**: Reduces the spatial dimensions (height and width) of the feature maps.
- **Operation**: Takes the maximum value from small regions (e.g., 2x2) of the feature maps, which helps in downsampling and making the network more computationally efficient.
- **Output**: Smaller, more manageable feature maps that retain the most important information.

## 3. Linear (Fully Connected Layer)
- **Purpose**: Combines the features extracted by the convolutional layers to make final predictions.
- **Operation**: Takes the flattened output from the previous layers and maps it to the desired output classes. Each neuron in the layer is connected to all the outputs of the previous layer.
- **Output**: Final scores (logits) for each class in the classification task.

## 4. Forward Pass
- **Purpose**: Defines the flow of data through the network, specifying how the input transforms into the output step-by-step.
- **Operation**:
  - **Convolution + ReLU + Pooling**: Extracts and condenses features.
  - **Flattening**: Converts the 2D feature maps into a 1D vector.
  - **Fully Connected Layers**: Combines features to classify the input.
- **Output**: A vector representing the predicted class probabilities.


In [5]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate the model
model = SimpleCNN()


In [6]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [7]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for i, (inputs, labels) in enumerate(train_loader):
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print("Finished Training")


[1, 100] loss: 1.994
[1, 200] loss: 1.701
[1, 300] loss: 1.546
[1, 400] loss: 1.485
[1, 500] loss: 1.448
[1, 600] loss: 1.358
[1, 700] loss: 1.301
[2, 100] loss: 1.241
[2, 200] loss: 1.204
[2, 300] loss: 1.179
[2, 400] loss: 1.129
[2, 500] loss: 1.089
[2, 600] loss: 1.064
[2, 700] loss: 1.053
[3, 100] loss: 1.007
[3, 200] loss: 0.984
[3, 300] loss: 0.949
[3, 400] loss: 0.930
[3, 500] loss: 0.914
[3, 600] loss: 0.921
[3, 700] loss: 0.927
[4, 100] loss: 0.855
[4, 200] loss: 0.857
[4, 300] loss: 0.856
[4, 400] loss: 0.866
[4, 500] loss: 0.820
[4, 600] loss: 0.837
[4, 700] loss: 0.813
[5, 100] loss: 0.790
[5, 200] loss: 0.774
[5, 300] loss: 0.756
[5, 400] loss: 0.783
[5, 500] loss: 0.778
[5, 600] loss: 0.766
[5, 700] loss: 0.756
[6, 100] loss: 0.726
[6, 200] loss: 0.741
[6, 300] loss: 0.770
[6, 400] loss: 0.723
[6, 500] loss: 0.717
[6, 600] loss: 0.701
[6, 700] loss: 0.704
[7, 100] loss: 0.686
[7, 200] loss: 0.685
[7, 300] loss: 0.663
[7, 400] loss: 0.684
[7, 500] loss: 0.665
[7, 600] loss

In [8]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the model on the 10000 test images: {100 * correct / total:.2f}%")


Accuracy of the model on the 10000 test images: 78.76%


In [11]:
# Example: Inference on a single image
from PIL import Image

def predict(model, image_path):
    model.eval()
    transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])
    
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0)  # Add batch dimension
    
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    
    return predicted.item()

# Example usage
image_path = '1.jpg'
predicted_class = predict(model, image_path)
print(f'Predicted class: {predicted_class}')


Predicted class: 0


In [12]:
# Save the model
torch.save(model.state_dict(), 'simple_cnn.pth')

# Load the model
model = SimpleCNN()
model.load_state_dict(torch.load('simple_cnn.pth'))


  model.load_state_dict(torch.load('simple_cnn.pth'))


<All keys matched successfully>