In [1]:
from google.colab import files

# Upload your Kaggle API key (kaggle.json)
uploaded = files.upload()

# Move the uploaded file to the correct directory
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Install Kaggle package
!pip install kaggle

# Download the dataset using Kaggle API
!kaggle datasets download -d thomasqazwsxedc/alphabet-characters-fonts-dataset -p /content/

# Unzip the dataset
!unzip -q /content/alphabet-characters-fonts-dataset.zip -d alphabet-characters-fonts-dataset

Saving kaggle.json to kaggle.json
Downloading alphabet-characters-fonts-dataset.zip to /content
 99% 553M/559M [00:09<00:00, 98.0MB/s]
100% 559M/559M [00:09<00:00, 59.5MB/s]


In [2]:
!pip install torch
!pip install torchvision
!pip install transformers
!pip install scikit-learn
!pip install numpy



In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.utils import make_grid
from sklearn.model_selection import train_test_split
from PIL import Image
import os
import glob
import random
import numpy as np

# Ensure reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the CNN model
class AlphabetCNN(nn.Module):
    def __init__(self, num_classes):
        super(AlphabetCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 64)  # Adjusted the input size for fc1
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)  # Adjusted the input size for fc1
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define a custom dataset class
class AlphabetDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.img_paths = self.get_image_paths()

    def get_image_paths(self):
        img_paths = []
        for cls in self.classes:
            class_path = os.path.join(self.root_dir, cls)
            img_paths.extend(glob.glob(os.path.join(class_path, '*.png')))
        return img_paths

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        image = Image.open(img_path).convert('L')  # Convert to grayscale
        label = self.class_to_idx[os.path.basename(os.path.dirname(img_path))]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)

# Set path to the dataset
dataset_path = "/content/alphabet-characters-fonts-dataset/Images/Images"

# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# Create dataset and dataloaders
alphabet_dataset = AlphabetDataset(dataset_path, transform=transform)

# Split the data into training and testing sets
train_size = int(0.8 * len(alphabet_dataset))
test_size = len(alphabet_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(alphabet_dataset, [train_size, test_size])

# Adjusted batch size for both training and testing
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

# Initialize the model, loss function, and optimizer
num_classes = len(alphabet_dataset.classes)
model = AlphabetCNN(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)

        # Calculate the loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}/{num_epochs}, Training Loss: {running_loss / len(train_loader)}')

# Evaluate the model
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {accuracy}')




Epoch 1/10, Training Loss: 0.7376404950017194
Epoch 2/10, Training Loss: 0.5515833386152573
Epoch 3/10, Training Loss: 0.4910364884347268
Epoch 4/10, Training Loss: 0.4510203400508667
Epoch 5/10, Training Loss: 0.42046326930848155
Epoch 6/10, Training Loss: 0.396617473170571
Epoch 7/10, Training Loss: 0.3772628385601911
Epoch 8/10, Training Loss: 0.3600702015799637
Epoch 9/10, Training Loss: 0.345724885029156
Epoch 10/10, Training Loss: 0.33222944004793337
Test Accuracy: 0.8717175734096186


In [4]:
# Save the trained model
checkpoint_path = "/content/alphabet_model.pth"
torch.save(model.state_dict(), checkpoint_path)
print(f"Model saved at: {checkpoint_path}")

Model saved at: /content/alphabet_model.pth
