In [1]:
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

from helper_functions.preprocessing import replace_black_with_median, preprocess_image
from helper_functions.encode import prepare_training_data
from helper_functions.encode import CHARACTERS
from helper_functions.encode import IMG_HEIGHT, IMG_WIDTH
from helper_functions.recognition import HybridResNet50

# Paths
TRAIN_FOLDER_PATH = "../data/train/combine"
TEST_FOLDER_PATH = "../data/test/combine"

In [10]:
# CNN parameters
BATCH_SIZE = 64
EPOCHS = 15
VALIDATION_SPLIT = 0.2

In [3]:
img = cv2.imread('../data/train/combine/000s-0.png')
processed_img = preprocess_image(img)

cv2.imshow('image', processed_img)

# Wait for a key press (0 means indefinite wait)
cv2.waitKey(0)

# Close all OpenCV windows
cv2.destroyAllWindows()

# Load Training Data

In [2]:
print("Preparing training data...")
X_img, X_features, y, feature_names, scaler = prepare_training_data(TRAIN_FOLDER_PATH)

Preparing training data...


Preparing Training Data: 100%|████████████████████████████████████████████████████| 7437/7437 [00:27<00:00, 272.90it/s]


# RESNET Model


In [17]:
class HybridResNet50(nn.Module):
    def __init__(self, input_size = (IMG_HEIGHT, IMG_WIDTH), output_size = len(CHARACTERS), pretrained=True):
        super(HybridResNet50, self).__init__()

        # Initialize ResNet50 with pretrained weights (optional)
        self.resnet = models.resnet50(pretrained=pretrained)

        # Modify output layer
        num_ftrs = self.resnet.fc.in_features  # Get the number of input features to the FC layer
        self.resnet.fc = nn.Linear(num_ftrs, output_size)  # Replace FC layer with 128 output units

        # Modify first conv layer
        self.resnet.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
 
    def forward(self, x):
        return self.resnet(x)

    def train_model(self, train_loader, num_epochs=5):
        self.train()  # Set to training mode

        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()  # For multi-class classification
        optimizer = optim.Adam(self.parameters(), lr=0.001)  # Adam optimizer

        for epoch in range(num_epochs):
            running_loss = 0.0
            for images, labels in train_loader:
                optimizer.zero_grad()  # Zero the gradients for the optimizer
                outputs = self(images)  # Forward pass
                loss = criterion(outputs, labels)  # Compute the loss
                loss.backward()  # Backpropagate the loss
                optimizer.step()  # Update the model parameters

                running_loss += loss.item()  # Accumulate the loss

            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

        print("Training complete!")

    def evaluate(self, test_loader):
        self.eval()  # Set to evaluation mode
        
        correct = 0
        total = 0
        running_loss = 0.0
        criterion = nn.CrossEntropyLoss()

        with torch.no_grad():  # No need to track gradients during evaluation
            for images, labels in test_loader:
                outputs = self(images)  # Forward pass
                loss = criterion(outputs, labels)  # Compute the loss
                running_loss += loss.item()  # Accumulate the loss

                # Get the predicted class
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Test Loss: {running_loss/len(test_loader):.4f}")
        print(f"Accuracy: {100 * correct / total:.2f}%")

In [3]:
model = HybridResNet50()

In [4]:
model(torch.from_numpy(X_img[0]).unsqueeze(0))

tensor([[-0.0616, -0.3025,  0.3264, -0.0740, -0.2705, -0.0918, -0.2742, -0.1718,
          0.0908, -0.1919,  0.1388,  0.1345,  0.0328, -0.1526, -0.2739, -0.4799,
         -0.2403,  0.2811, -0.1265,  0.1999,  0.2659, -0.2771, -0.1506, -0.3430,
         -0.1190, -0.1869,  0.0074, -0.0092, -0.1018, -0.0797,  0.2677,  0.1126,
          0.1842,  0.0562, -0.1203,  0.0463]], grad_fn=<AddmmBackward0>)