In [1]:
import cv2 as cv
import torch

from pathlib import Path
from PIL import Image
from torch import nn
from torchvision import transforms


MARGIN_SIZE = 32
HIDDEN_UNITS = 96
TRANSFORM_DATA_SIZE = (64, 64)

LETTERS_TRANSFORM = transforms.Compose([
	transforms.Grayscale(num_output_channels=3),
	transforms.Resize(size=TRANSFORM_DATA_SIZE),
	transforms.ToTensor(),
])

In [2]:
IMAGE_CLASSES = ['Ё','А','Б','В','Г','Д','Е','Ж','З','И','Й','К','Л','М','Н','О','П','Р','С','Т','У','Ф','Х','Ц','Ч','Ш','Щ','Ъ','Ы','Ь','Э','Ю','Я']
IMAGE_N_CLASSES = len(IMAGE_CLASSES)

In [3]:
def read_image_and_extract_letters(filepath):
	image = cv.imread(filepath)
	image = cv.rotate(image, cv.ROTATE_90_COUNTERCLOCKWISE)
	image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
	image = cv.medianBlur(image, 3)
	image = cv.adaptiveThreshold(image, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 31, 11)

	kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (5, 5))
	image = cv.morphologyEx(image, cv.MORPH_CLOSE, kernel, iterations=3)

	image_contours, _ = cv.findContours(image, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
	image_contours = sorted(image_contours, key=lambda image_contour: cv.boundingRect(image_contour)[0])

	image_letters = []
	for image_contour in image_contours:
		x, y, width, height = cv.boundingRect(image_contour)
		
		if width < 80 or height < 80:
			continue

		margin = MARGIN_SIZE
		image_letter = image[y-margin:y+height+margin, x-margin:x+width+margin].copy()
		image_letter = cv.bitwise_not(image_letter)
		image_letters.append(Image.fromarray(image_letter))

	return image_letters

In [4]:
def test_image_letters_step(model, transform, image_letters, image_letters_classes, image_classes):
	model.eval()
	with torch.inference_mode():
		for image_letter_index, image_letter in enumerate(image_letters):
			image_pred = model(transform(image_letter).unsqueeze(dim=0))
			
			image_pred_class = torch.argmax(torch.softmax(image_pred, dim=1), dim=1)
			image_pred_class = image_classes[image_pred_class]
			
			image_letter_class = image_letters_classes[image_letter_index]
			print(image_letter_class, '=', image_pred_class, end=', ')
	print()

In [5]:
# TinyVGG 64x64
class TinyVGG_64x64(nn.Module):
    """
    Model architecture copying TinyVGG from: 
    https://poloclub.github.io/cnn-explainer/
    """
    def __init__(self, in_channels: int, hidden_units: int, out_features: int) -> None:
        super().__init__()
        self.tinymodel = nn.Sequential(
            # tinymodel conv2d block 1
			nn.Conv2d(in_channels, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

			# tinymodel conv2d block 1
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
			
	        # tinymodel classifier block
            nn.Flatten(),
            nn.Linear(hidden_units*16*16, out_features)
        )
    
    def forward(self, x: torch.Tensor):
        return self.tinymodel(x)

In [6]:
image_letters = read_image_and_extract_letters('IMG.JPG')
image_letters_classes = ['А', 'В', 'Г', 'К', 'М', 'Н', 'Е']
image_n_letters = len(image_letters_classes)

In [7]:
model = TinyVGG_64x64(in_channels=3, hidden_units=HIDDEN_UNITS, out_features=IMAGE_N_CLASSES)
model.load_state_dict(torch.load(f='TinyVGG_P20_H96_64x64.pth'))

<All keys matched successfully>

In [8]:
test_image_letters_step(model, LETTERS_TRANSFORM, image_letters, image_letters_classes, IMAGE_CLASSES)

А = А, В = В, Г = Г, К = К, М = М, Н = Н, Е = Е, 
