In [1]:
import cv2 as cv
import torch

from PIL import Image
from pathlib import Path
from torch import nn
from torchvision.transforms import v2

In [2]:
MODEL_CLASSES = 'ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'
MODEL_N_CHANNELS = 3 # RGB
MODEL_N_CLASSES = len(MODEL_CLASSES)
MODEL_HIDDEN_UNITS = 96
MODEL_FILEPATH = Path('TinyVGG_P20_H96_64x64.pth')

TRANSFORM_MARGIN_SIZE = 32
TRANSFORM_DATA_SIZE = (64, 64)

MODEL_TRANSFORM = v2.Compose([
	v2.Grayscale(num_output_channels=MODEL_N_CHANNELS),
	v2.Resize(size=TRANSFORM_DATA_SIZE),
	v2.ToImage(),
	v2.ToDtype(torch.float32, scale=True)
])

IMAGE_FILEPATH = Path('IMG.JPG')
IMAGE_CLASSES = 'АВГКМНЕ'

In [3]:
class TinyVGG(nn.Module):
	"""
	Model architecture copying TinyVGG from: 
	https://poloclub.github.io/cnn-explainer/
	"""
	def __init__(self, in_channels: int, hidden_units: int, out_features: int) -> None:
		super().__init__()
		self.tinymodel = nn.Sequential(
			# Conv2d Block 1
			nn.Conv2d(in_channels, hidden_units, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(2),

			# Conv2d Block 1
			nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(2),

			# Classifier Block
			nn.Flatten(),
			nn.Linear(hidden_units*16*16, out_features)
		)

	def forward(self, x: torch.Tensor):
		return self.tinymodel(x)

In [4]:
def read_image_and_extract_letters(filepath=IMAGE_FILEPATH, margin=TRANSFORM_MARGIN_SIZE):
	image = cv.imread(filepath)
	image = cv.rotate(image, cv.ROTATE_90_COUNTERCLOCKWISE)
	image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
	image = cv.medianBlur(image, 3)
	image = cv.adaptiveThreshold(image, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 31, 11)

	kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (5, 5))
	image = cv.morphologyEx(image, cv.MORPH_CLOSE, kernel, iterations=3)

	image_contours, _ = cv.findContours(image, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
	image_contours = sorted(image_contours, key=lambda image_contour: cv.boundingRect(image_contour)[0])

	image_letters = []
	for image_contour in image_contours:
		x, y, width, height = cv.boundingRect(image_contour)

		if width < 80 or height < 80:
			continue

		image_letter = image[y-margin:y+height+margin, x-margin:x+width+margin].copy()
		image_letter = cv.bitwise_not(image_letter)
		image_letters.append(Image.fromarray(image_letter))

	return image_letters

In [5]:
def predict_image_letters(model, transform, letters, image_classes, model_classes):
	model.eval()
	with torch.inference_mode():
		for letter_id, letter in enumerate(letters):
			letter_pred = model(transform(letter).unsqueeze(dim=0))
			
			letter_pred_class = torch.argmax(torch.softmax(letter_pred, dim=1), dim=1)
			letter_pred_class = model_classes[letter_pred_class]
			
			letter_class = image_classes[letter_id]
			print(letter_class, '=', letter_pred_class, end=', ')
	print()

In [6]:
def main():
	letters = read_image_and_extract_letters(IMAGE_FILEPATH)
	model = TinyVGG(MODEL_N_CHANNELS, MODEL_HIDDEN_UNITS, MODEL_N_CLASSES)
	model.load_state_dict(torch.load(f=MODEL_FILEPATH))
	predict_image_letters(model, MODEL_TRANSFORM, letters, IMAGE_CLASSES, MODEL_CLASSES)

In [7]:
if __name__ == '__main__':
	main()

А = А, В = В, Г = Г, К = К, М = М, Н = Н, Е = Е, 
