In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import zipfile
import os

zip_path = "/content/drive/MyDrive/ColabData/tamil_character_dataset.zip"
extract_path = "/content/tamil_dataset"

# Unzip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Dataset extracted to:", extract_path)



✅ Dataset extracted to: /content/tamil_dataset


In [3]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [4]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),  # Scales to [0,1]
    transforms.Normalize((0.5,), (0.5,))  # Optional: Normalize to [-1, 1]
])


In [5]:
train_dir = "/content/tamil_dataset/train"
test_dir = "/content/tamil_dataset/test"

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [8]:
print("Classes:", train_dataset.classes)
num_classes = len(train_dataset.classes)

Classes: ['ஃ', 'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ', 'க', 'கா', 'கி', 'கீ', 'கு', 'கூ', 'கெ', 'கே', 'கை', 'கொ', 'கோ', 'கௌ', 'க்', 'ங', 'ஙா', 'ஙி', 'ஙீ', 'ஙு', 'ஙூ', 'ஙெ', 'ஙே', 'ஙை', 'ஙொ', 'ஙோ', 'ஙௌ', 'ங்', 'ச', 'சா', 'சி', 'சீ', 'சு', 'சூ', 'செ', 'சே', 'சை', 'சொ', 'சோ', 'சௌ', 'ச்', 'ஞ', 'ஞா', 'ஞி', 'ஞீ', 'ஞு', 'ஞூ', 'ஞெ', 'ஞே', 'ஞை', 'ஞொ', 'ஞோ', 'ஞௌ', 'ஞ்', 'ட', 'டா', 'டி', 'டீ', 'டு', 'டூ', 'டெ', 'டே', 'டை', 'டொ', 'டோ', 'டௌ', 'ட்', 'ண', 'ணா', 'ணி', 'ணீ', 'ணு', 'ணூ', 'ணெ', 'ணே', 'ணை', 'ணொ', 'ணோ', 'ணௌ', 'ண்', 'த', 'தா', 'தி', 'தீ', 'து', 'தூ', 'தெ', 'தே', 'தை', 'தொ', 'தோ', 'தௌ', 'த்', 'ந', 'நா', 'நி', 'நீ', 'நு', 'நூ', 'நெ', 'நே', 'நை', 'நொ', 'நோ', 'நௌ', 'ந்', 'ன', 'னா', 'னி', 'னீ', 'னு', 'னூ', 'னெ', 'னே', 'னை', 'னொ', 'னோ', 'னௌ', 'ன்', 'ப', 'பா', 'பி', 'பீ', 'பு', 'பூ', 'பெ', 'பே', 'பை', 'பொ', 'போ', 'பௌ', 'ப்', 'ம', 'மா', 'மி', 'மீ', 'மு', 'மூ', 'மெ', 'மே', 'மை', 'மொ', 'மோ', 'மௌ', 'ம்', 'ய', 'யா', 'யி', 'யீ', 'யு', 'யூ', 'யெ', 'யே', 'யை', 'யொ', 'யோ', 'யௌ', 'ய்', '

CNN


In [9]:
import torch.nn as nn
import torch.nn.functional as F

class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # -> [B, 32, 32, 32]
        x = self.pool(F.relu(self.conv2(x)))  # -> [B, 64, 16, 16]
        x = x.view(-1, 64 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(10):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"📘 Epoch [{epoch+1}/10], Loss: {total_loss:.4f}")


📘 Epoch [1/10], Loss: 545.1942
📘 Epoch [2/10], Loss: 31.8725
📘 Epoch [3/10], Loss: 17.9415
📘 Epoch [4/10], Loss: 12.7143
📘 Epoch [5/10], Loss: 11.4501
📘 Epoch [6/10], Loss: 8.7284
📘 Epoch [7/10], Loss: 6.4908
📘 Epoch [8/10], Loss: 5.8999
📘 Epoch [9/10], Loss: 7.0241
📘 Epoch [10/10], Loss: 8.8369


In [None]:
from sklearn.metrics import classification_report

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))


In [12]:

torch.save(model.state_dict(), "vit_tamil_model.pth")


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import numpy as np
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from google.colab import files
import io

# Define CNN architecture (same as your training)
class CNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [B, 32, 32, 32]
        x = self.pool(F.relu(self.conv2(x)))  # [B, 64, 16, 16]
        x = x.view(-1, 64 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 🔧 Preprocessing function for raw image
def clean_image_pil(pil_img, canvas_size=64, char_size=40):
    # Convert to grayscale numpy array
    img = np.array(pil_img.convert("L"))

    # Invert if white background
    if np.mean(img) > 127:
        img = cv2.bitwise_not(img)

    # Binarize
    _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

    # Crop to bounding box
    coords = cv2.findNonZero(binary)
    x, y, w, h = cv2.boundingRect(coords)
    cropped = binary[y:y+h, x:x+w]

    # Resize character
    resized = cv2.resize(cropped, (char_size, char_size), interpolation=cv2.INTER_AREA)

    # Place on black canvas
    canvas = np.zeros((canvas_size, canvas_size), dtype=np.uint8)
    start_x = (canvas_size - char_size) // 2
    start_y = (canvas_size - char_size) // 2
    canvas[start_y:start_y+char_size, start_x:start_x+char_size] = resized

    # Ensure white on black
    if np.mean(canvas) > 127:
        canvas = cv2.bitwise_not(canvas)

    return canvas

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
label_classes = train_dataset.classes  # Make sure this is loaded from your dataset
num_classes = len(label_classes)

model = CNNClassifier(num_classes).to(device)
model.load_state_dict(torch.load("vit_tamil_model.pth", map_location=device))
model.eval()

# Upload raw image
uploaded = files.upload()
image_path = list(uploaded.keys())[0]
raw_img = Image.open(io.BytesIO(uploaded[image_path]))

# Clean the image
cleaned = clean_image_pil(raw_img)

# Show cleaned image
plt.imshow(cleaned, cmap='gray')
plt.title("Preprocessed Input")
plt.axis('off')
plt.show()

# Prepare for model input
img_tensor = torch.tensor(cleaned, dtype=torch.float32).unsqueeze(0).unsqueeze(0) / 255.0
img_tensor = (img_tensor - 0.5) / 0.5  # normalize
img_tensor = img_tensor.to(device)

# Predict
with torch.no_grad():
    output = model(img_tensor)
    pred_idx = output.argmax(dim=1).item()
    confidence = F.softmax(output, dim=1)[0][pred_idx].item() * 100
    pred_label = label_classes[pred_idx]

# ✅ Final Output
print(f"\n🧠 Predicted Tamil Character: **{pred_label}**")
print(f"🔍 Confidence: {confidence:.2f}%")
