# Image Recognition and Prediction
Recognizing handwritten 28x28 character and digit using a trained Convolutional Neural Network model

## Imports

In [1]:
from PIL import Image, ImageOps
import plotly.express as px
import pandas as pd
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.nn as nn
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Model Architecture - Class
Structure of Convolutional Neural Network model

In [2]:
class CNNModel(nn.Module):
    def __init__(self, printtoggle, output_layers):
        super().__init__()
        self.print = printtoggle

        # First Convolution Layer: (Input channel, output channels, kernel size)
        self.conv1 = nn.Conv2d(1, 6, 3, padding=1)
        self.bnorm1 = nn.BatchNorm2d(6)

        # Second Convolution Layer: (Input channel, output channels, kernel size)
        self.conv2 = nn.Conv2d(6, 6, 3, padding=1)
        self.bnorm2 = nn.BatchNorm2d(6)

        # Neural Network Input Layer: Input neurons to hidden neurons
        self.fc1 = nn.Linear(7 * 7 * 6, 50)
        # Neural Network Hidden Layer: Hidden neurons to output neurons
        self.fc2 = nn.Linear(50, output_layers)

    def forward(self, x):
        if self.print:
            print(f'Input: {list(x.shape)}')

        # First Block: Convolution -> Max Pooling -> Batch Normalization -> Relu Activation
        x = F.max_pool2d(self.conv1(x), 2) # Kernel size = 2, Stride = 2
        x = F.leaky_relu(self.bnorm1(x))
        if self.print:
            print(f'First CPR block: {list(x.shape)}')

        # Second Block: Convolution -> Max Pooling -> Batch Normalization -> Relu Activation
        x = F.max_pool2d(self.conv2(x), 2) # Kernel size = 2, Stride = 2
        x = F.leaky_relu(self.bnorm2(x))
        if self.print:
            print(f'Second CPR block: {list(x.shape)}')

        # Reshapes the (7x7) 6 feature map to 7 x 7 x 6 input features for Neural Network
        n_units = x.shape.numel() / x.shape[0]
        x = x.view(-1, int(n_units))
        if self.print:
            print(f'Vectorized: {list(x.shape)}')

        # Neural Network Input Layer: Input neurons to hidden neurons
        x = F.leaky_relu(self.fc1(x))
        # Neural Network Hidden Layer: Hidden neurons to output neurons
        x = self.fc2(x)
        if self.print:
            print(f'Final output: {list(x.shape)}')
        
        return x

## Image Processor - Functions
- Process the image to predict
- Predict the class of a given image
- Output the class probability of a given image 

In [6]:
def plot_proba(classes: list[str], probs: list[float]):
    df = pd.concat([pd.DataFrame(classes, columns=['Class']), pd.DataFrame(probs, columns=['Probability'])], axis=1)
    px.bar(df, x='Class', y='Probability').show()

def preprocess_image(image_path: str):
    # Define the transformations
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1), # Ensure grayscale
        transforms.Resize((28, 28)), # Resize to 28x28
        transforms.ToTensor(), # Convert to tensor and scale pixel values to [0, 1]
    ])

    # Open the image
    image = Image.open(image_path)
    # Invert the colors 
    img_inverted = ImageOps.invert(image.convert('RGB'))
    # Transpose the image
    img_transposed = img_inverted.transpose(Image.TRANSPOSE)
    # Apply transformations  
    image_tensor = transform(img_transposed)
    # Convert into a batched dimension: (1, 1, 28, 28)
    image_tensor = image_tensor.unsqueeze(0)
    return image_tensor

def predict_png(model: CNNModel, image_path: str):
    model.eval() # Set the model to evaluation mode

    # Pre-process the image
    image_tensor = preprocess_image(image_path).to(device)

    with torch.no_grad():
        logits = model(image_tensor) # Forward pass
        probabilities = F.softmax(logits, dim=1) # Convert logits to probabilities
        predicted_class = torch.argmax(probabilities, axis=1).item() # Get the predicted class

    return predicted_class, [float(i) for i in probabilities.cpu().numpy()[0]]

## Predict Image

### Initialization

In [4]:
# Initialize the model and list of classes
classes = 'a b c d e f g h i j k l m n o p q r s t u v w x y z 0 1 2 3 4 5 6 7 8 9'.split()
model = CNNModel(printtoggle=False, output_layers=36)  # Reinitialize the model structure
model.load_state_dict(torch.load('models\model1.pth', weights_only=True))  # Load the saved parameters
model.to(device)  # Send the model to GPU/CPU

CNNModel(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bnorm1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bnorm2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=294, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=36, bias=True)
)

### Prediction and Probability

In [15]:
# Predict and output the probability of class from the given image
image_path = 'images/test.png' # An image of a digit '7'
predicted_class, probabilities = predict_png(model, image_path)
print(f'Predicted Class: {classes[predicted_class]}')
print(f'Class Probabilities:')
plot_proba(classes=classes, probs=probabilities)

Predicted Class: 7
Class Probabilities:
