In [3]:
# prompt: get this datasethttps://www.kaggle.com/datasets/inspiring-lab/nepali-number-plate-characters-dataset/data

import requests
from io import BytesIO
from zipfile import ZipFile



# Create a .kaggle directory if it doesn't exist
!mkdir -p ~/.kaggle



# Download the dataset
!kaggle datasets download -d inspiring-lab/nepali-number-plate-characters-dataset

# Extract the dataset
with ZipFile('nepali-number-plate-characters-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('nepali_number_plate_dataset')

# Optionally remove the zip file after extraction
!rm nepali-number-plate-characters-dataset.zip

print("Dataset downloaded and extracted successfully.")

Dataset URL: https://www.kaggle.com/datasets/inspiring-lab/nepali-number-plate-characters-dataset
License(s): Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)
Downloading nepali-number-plate-characters-dataset.zip to /content
 89% 28.0M/31.6M [00:03<00:00, 13.9MB/s]
100% 31.6M/31.6M [00:03<00:00, 10.5MB/s]
Dataset downloaded and extracted successfully.


In [17]:
import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
from PIL import Image
from torchvision import models

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score

In [6]:
class CharacterDataset(Dataset):
  def __init__(self, root_dir, transform=None):
      self.root_dir = root_dir
      self.transform = transform
      self.classes = os.listdir(root_dir)
      self.image_paths = []
      self.labels = []

      for label, class_name in enumerate(self.classes):
          class_dir = os.path.join(root_dir, class_name)
          for image_name in os.listdir(class_dir):
              image_path = os.path.join(class_dir, image_name)
              self.image_paths.append(image_path)
              self.labels.append(label)

  def __len__(self):
      return len(self.image_paths)

  def __getitem__(self, idx):
      image_path = self.image_paths[idx]
      image = Image.open(image_path).convert('RGB')
      label = self.labels[idx]

      if self.transform:
          image = self.transform(image)
      return image, label

In [11]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
])



In [12]:
data_dir = "./data/character_ocr"
dataset = CharacterDataset(root_dir=data_dir, transform=transform)

In [21]:
train_indices, test_indices = train_test_split(
    list(range(len(dataset))),
    test_size = 0.2,
    stratify = [dataset[i][1] for i in range(len(dataset))],
    random_state=42
)

# Create train and test subsets
train_subset = Subset(dataset, train_indices)
test_subset = Subset(dataset, test_indices)

# Data loaders
train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=128, shuffle=False)

In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using", device)

Using cuda


In [23]:
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(dataset.classes))
model = model.to(device)
optimizer  = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()



In [24]:
for epoch in range(5):
  model.train()
  running_loss = 0.0
  all_preds_train = []
  all_labels_train = []

  for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
    _, preds = torch.max(outputs, 1)
    all_preds_train.extend(preds.cpu().numpy())
    all_labels_train.extend(labels.cpu().numpy())

  epoch_loss = running_loss / len(train_loader)
  print(f'Epoch {epoch+1}')

  model.eval()
  val_loss=0.0
  all_preds_val = []
  all_labels_val = []
  with torch.no_grad():
    for images, labels in test_loader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      loss = criterion(outputs, labels)
      val_loss += loss.item()
      _, preds = torch.max(outputs, 1)
      all_preds_val.extend(preds.cpu().numpy())
      all_labels_val.extend(labels.cpu().numpy())
  val_loss /= len(test_loader)
  precision_val = precision_score(all_labels_val, all_preds_val, average='weighted')
  recall_val = recall_score(all_labels_val, all_preds_val, average='weighted')
  f1_val = f1_score(all_labels_val, all_preds_val, average='weighted')
  print(f'Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, Precision: {precision_val:.4f}, Recall: {recall_val:.4f}, F1 Score: {f1_val:.4f}')

  print(f'Training Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch 1
Train Loss: 0.3035, Val Loss: 0.1626, Precision: 0.9602, Recall: 0.9555, F1 Score: 0.9555
Training Loss: 0.3035, Validation Loss: 0.1626
Epoch 2
Train Loss: 0.0818, Val Loss: 0.0923, Precision: 0.9745, Recall: 0.9725, F1 Score: 0.9728
Training Loss: 0.0818, Validation Loss: 0.0923
Epoch 3
Train Loss: 0.0629, Val Loss: 0.1225, Precision: 0.9654, Recall: 0.9629, F1 Score: 0.9632
Training Loss: 0.0629, Validation Loss: 0.1225
Epoch 4
Train Loss: 0.0509, Val Loss: 0.0721, Precision: 0.9801, Recall: 0.9798, F1 Score: 0.9798
Training Loss: 0.0509, Validation Loss: 0.0721
Epoch 5
Train Loss: 0.0464, Val Loss: 0.1327, Precision: 0.9674, Recall: 0.9633, F1 Score: 0.9629
Training Loss: 0.0464, Validation Loss: 0.1327


In [25]:

torch.save(model.state_dict(), 'ocr.pth')
from google.colab import files
files.download('ocr.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>