In [96]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)


# Custom dataset class
class GurmukhiDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)


def load_images_from_folder(base_folder):
    X_data = []
    y_data = []

    for digit in range(10):
        folder_path = os.path.join(base_folder, str(digit))
        for filename in os.listdir(folder_path):
            img = cv2.imread(os.path.join(folder_path, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                X_data.append(img.flatten())
                y_data.append(digit)

    return np.array(X_data), np.array(y_data)


# Load the data
X_train_data, y_train_data = load_images_from_folder("train")
X_val_data, y_val_data = load_images_from_folder("val")

# Normalize the data
X_train_data = X_train_data / 255.0
X_val_data = X_val_data / 255.0

# Create dataset and dataloader
train_dataset = GurmukhiDataset(X_train_data, y_train_data)
val_dataset = GurmukhiDataset(X_val_data, y_val_data)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


# Define the neural network architecture

class gurmukhi_digit_classifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_p):
        super().__init__()
        self.hidden1 = nn.Linear(input_size, hidden_size[0])
        self.hidden2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.hidden3 = nn.Linear(hidden_size[1], hidden_size[2])
        self.dropout = nn.Dropout(dropout_p)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_size[2], output_size)

    def forward(self, x):
        x = self.hidden1(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.hidden2(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.hidden3(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.output(x)
        return x


# Initialize the model, loss, and optimizer
input_size = X_train_data.shape[1]
hidden_size = [128, 64, 32]
output_size = 10
dropout_p = 0.4  # Increased dropout probability

model = gurmukhi_digit_classifier(input_size, hidden_size, output_size, dropout_p)

# # Add weight decay to the optimizer for L2 regularization (increased value)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-3, weight_decay=9e-4)

# Train the neural network
epochs = 100
train_losses = []
val_losses = []
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images = images
        labels = labels

        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    model.eval()

    val_loss = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images
            labels = labels

            output = model(images)
            loss = loss_fn(output, labels)
            val_loss += loss.item()
        val_loss /= len(val_loader)
        val_losses.append(val_loss)

    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')


Epoch 1/100, Train Loss: 1.9565, Validation Loss: 1.3555
Epoch 2/100, Train Loss: 0.9383, Validation Loss: 0.6811
Epoch 3/100, Train Loss: 0.4311, Validation Loss: 0.3691
Epoch 4/100, Train Loss: 0.3274, Validation Loss: 0.3780
Epoch 5/100, Train Loss: 0.2299, Validation Loss: 0.2335
Epoch 6/100, Train Loss: 0.1917, Validation Loss: 0.2485
Epoch 7/100, Train Loss: 0.1829, Validation Loss: 0.2478
Epoch 8/100, Train Loss: 0.1259, Validation Loss: 0.2070
Epoch 9/100, Train Loss: 0.0886, Validation Loss: 0.2679
Epoch 10/100, Train Loss: 0.1499, Validation Loss: 0.2634
Epoch 11/100, Train Loss: 0.1093, Validation Loss: 0.3260
Epoch 12/100, Train Loss: 0.1434, Validation Loss: 0.2984
Epoch 13/100, Train Loss: 0.0659, Validation Loss: 0.2419
Epoch 14/100, Train Loss: 0.0863, Validation Loss: 0.2262
Epoch 15/100, Train Loss: 0.1038, Validation Loss: 0.2606
Epoch 16/100, Train Loss: 0.0581, Validation Loss: 0.3508
Epoch 17/100, Train Loss: 0.0795, Validation Loss: 0.2350
Epoch 18/100, Train Los

In [99]:
model

gurmukhi_digit_classifier(
  (hidden1): Linear(in_features=1024, out_features=256, bias=True)
  (hidden2): Linear(in_features=256, out_features=128, bias=True)
  (hidden3): Linear(in_features=128, out_features=64, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (relu): ReLU()
  (output): Linear(in_features=64, out_features=10, bias=True)
)

In [122]:


def preprocess_image_unseeen(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    resized_image = cv2.resize(image, (32, 32))
    image_array = np.array(resized_image).flatten()
    image_array = image_array / 255.0
    return image_array

def predict(image_path, model):
    model.eval()
    image_array = preprocess_image_unseeen(image_path)
    input_tensor = torch.tensor(image_array, dtype=torch.float32).unsqueeze(0)
    output = model(input_tensor)
    _, predicted_digit = torch.max(output, 1)
    return predicted_digit.item()

# Example usage
image_path = './val/8/6.tiff'
predicted_digit = predict(image_path, model)
print(f"Predicted digit: {predicted_digit}")

Predicted digit: 8
