In [32]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from torchvision import transforms

import pandas as pd



In [109]:
class Amann(nn.Module):
    def __init__(self):
        """
        Amann is a neueral network for detecting amharic characters. 
        It does so by first resizing the image to 28x28 and then
        applying multiple layers of convolution and pooling, finishing with
        a linear layer.
        
        In Amharic there are 34 base characters each with 7 children. Thus the 
        end layer will have 34*7 = 238 outputs.
        
        One top of the neural network it has a regulairization layer to prevent
        the small kernels from overfitting.
        """
        super(Amann, self).__init__()
        
        
        # convolution layers   
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=9, stride=1, padding=4)
        
        
        # pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        
        # linear layer
        self.linear = nn.Linear(64*3*3, 34)
        self.linear2 = nn.Linear(34, 238)
        
        # regularization layer
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        # convolution layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        # linear layers
        x = x.view(-1, 64*3*3)
        x = F.relu(self.linear(x))
        #x = self.dropout(F.relu(self.linear(x)))
        x = self.linear2(x)
        
        return x        

In [110]:
class AmharicDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_filenames = os.listdir(data_dir)
    
    def __getitem__(self, idx):
        filename = self.image_filenames[idx]
        image = Image.open(os.path.join(self.data_dir, filename)).convert('L')
        image = transforms.ToTensor()(image)
        # Convert the image to grayscale using .convert('L')
        label = int(filename.split('.')[0])
        return image, label - 1
    
    def __len__(self):
        return len(self.image_filenames)
    

In [111]:
# open dataset, train the model, and save the model

data_dir = "../dataset/"
dataset = AmharicDataset(data_dir)
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=80, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=80, shuffle=True)



In [112]:
model = Amann()
criterion = nn.CrossEntropyLoss()
optimzer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 100
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        optimzer.zero_grad()
        
        outputs = model(inputs)
        # print("input shape", inputs.shape, outputs.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimzer.step()
        
        running_loss += loss.item()
        
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        
        
        


[1,   100] loss: 5.475
[1,   200] loss: 5.466
[1,   300] loss: 5.440
[2,   100] loss: 4.538
[2,   200] loss: 3.395
[2,   300] loss: 2.565
[3,   100] loss: 1.769
[3,   200] loss: 1.469
[3,   300] loss: 1.332
[4,   100] loss: 1.050
[4,   200] loss: 0.959
[4,   300] loss: 0.897
[5,   100] loss: 0.728
[5,   200] loss: 0.711
[5,   300] loss: 0.670
[6,   100] loss: 0.548
[6,   200] loss: 0.586
[6,   300] loss: 0.549
[7,   100] loss: 0.423
[7,   200] loss: 0.461
[7,   300] loss: 0.474
[8,   100] loss: 0.377
[8,   200] loss: 0.371
[8,   300] loss: 0.406
[9,   100] loss: 0.301
[9,   200] loss: 0.318
[9,   300] loss: 0.349
[10,   100] loss: 0.271
[10,   200] loss: 0.273
[10,   300] loss: 0.276
[11,   100] loss: 0.214
[11,   200] loss: 0.233
[11,   300] loss: 0.243
[12,   100] loss: 0.189
[12,   200] loss: 0.209
[12,   300] loss: 0.226
[13,   100] loss: 0.158
[13,   200] loss: 0.181
[13,   300] loss: 0.175
[14,   100] loss: 0.132
[14,   200] loss: 0.132
[14,   300] loss: 0.169
[15,   100] loss: 0

KeyboardInterrupt: 

In [113]:
# pandas dataframe to load the csv map file
df = pd.read_csv("../supported_chars.csv")
prop = FontProperties()
prop.set_file("../Fonts/NotoSerif.ttf")
    

#test model accuracy
model.eval()
correct = 0
total = 0

# Iterate over test dataset
with torch.no_grad():
    for inputs, labels in test_loader:
        # Pass input through model to get predictions
        outputs = model(inputs)
        
        # Get predicted labels
        _, predicted = torch.max(outputs.data, 1)
        
        # Update total count and correct count
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Visualize input with image viewer along with the prediction
        # for i in range(inputs.size(0)):
        #     image = inputs[i].numpy()
        #     is_correct = predicted[i].item() == labels[i].item()
            
        #     label_str = "Correct" if is_correct else "Mistaken"
        #     character = df["Character"][labels[i].item()]
        #     predicted_char = df["Character"][predicted[i].item()]
            
        #     # Save test results
        #     plt.imshow(np.squeeze(image), cmap='gray')
        #     plt.title(f"{label_str}: Actual -> {character}, predicted -> {predicted_char}", fontproperties=prop)
        #     plt.savefig(f"../test_results/{label_str}_{i}_{correct}.png")
                
# Calculate accuracy
accuracy = 100 * correct / total

# Print accuracy
print('Accuracy on test set: {:.2f}%'.format(accuracy))



Accuracy on test set: 85.51%


In [114]:
# save model to disk
torch.save(model.state_dict(), "amann.pt")

In [115]:
%store model 

  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear): Linear(in_features=576, out_features=34, bias=True)
  (linear2): Linear(in_features=34, out_features=238, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
                        Proper storage of interactively declared classes (or instances
                        of those classes) is not possible! Only instances
                        of classes in real modules on file system can be %store'd.

