# Training

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os


class_to_name = {}
dataset_path = r"..\..\dataset\mainDataset"

epoch_amount = 200
batch_size = 32
rotation = 5
terminate_epoch = 125  
dataset = "mainDataset/"



charDict = {}
for i, letter in enumerate( sorted( os.listdir( dataset_path + "/train" ) ) ):
    charDict[i] = letter

print(charDict)

num_classes = len(charDict)



class iztechCNN(nn.Module):
    def __init__(self, num_classes = num_classes, image_size = 32 , kernel_sizes = (3, 3, 3) ):
        super(iztechCNN,self).__init__()
        
        final_kernel = 128
        
        self.FC_input = image_size * image_size * final_kernel // (2**3)**2 

        size_1 = kernel_sizes[0]
        size_2 = kernel_sizes[1]
        size_3 = kernel_sizes[2]

        self.conv1 = nn.Conv2d(1, final_kernel//4,               kernel_size=size_1,    padding = (size_1 -1 ) // 2)
        self.conv2 = nn.Conv2d(final_kernel//4, final_kernel//2, kernel_size=size_2,    padding = (size_2 -1 ) // 2)
        self.conv3 = nn.Conv2d(final_kernel//2, final_kernel,    kernel_size=size_3,    padding = (size_3 -1 ) // 2)

        self.pool = nn.MaxPool2d(2,2)

        self.dropout = nn.Dropout(0.3)          #Will be used if overfitting is detected #Overfitting was detected!

        self.fc1 = nn.Linear(self.FC_input, final_kernel)
        self.fc2 = nn.Linear(final_kernel, num_classes)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1,self.FC_input)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
       
        return self.fc2(x)


{0: '(', 1: ')', 2: '+', 3: '0o', 4: '1', 5: '2', 6: '3', 7: '4', 8: '5s', 9: '6', 10: '7', 11: '8', 12: '9g', 13: '[', 14: ']', 15: 'a', 16: 'b', 17: 'c', 18: 'd', 19: 'e', 20: 'f', 21: 'h', 22: 'horizontal_line', 23: 'n', 24: 'p', 25: 'r', 26: 'sqrt', 27: 't', 28: 'vertical_line', 29: 'x', 30: 'y'}


## Transform - Data Augmentation

The purpose of this part is to make the images in the dataset have more variety in every epoch. This part may be used to normalize the datasets as well, even simultaneosly.

In [22]:


from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.Resize((32,32),interpolation=transforms.InterpolationMode.NEAREST),
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomRotation(degrees=5),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transform = transforms.Compose([
    transforms.Resize((32,32),interpolation=transforms.InterpolationMode.NEAREST),
    transforms.Grayscale(num_output_channels=1),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


train_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"train", transform=train_transform)
val_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"test", transform=val_transform)




train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4,shuffle=False)




## Checking if PyTorch recognizes my GPU

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device) 
print(torch.cuda.get_device_name())



cuda
NVIDIA GeForce RTX 3050 6GB Laptop GPU


## Training Part

In [33]:
import copy
import time


model = iztechCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

counter = 0

biggest = 0.0

most_accurate_model = None

start_time = time.time()


val_accuracy_over_epoch = []

train_loss_over_epoch = []
val_loss_over_epoch = []
learning_rate_over_epoch = []

most_accurate_train_loss_over_batch = None


for epoch in range(epoch_amount):
    
    train_loss_over_batch = []

    model.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        train_loss_over_batch.append(loss.item())

        if i % 10 == 9:
            print(f"[Epoch {epoch+1}, Batch {i+1}/{len(train_loader)}] Train Loss: {loss.item():.4f}")

    avg_train_loss = running_loss / len(train_loader)

    
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {accuracy:.2f}%")

    train_loss_over_epoch.append(avg_train_loss)
    val_loss_over_epoch.append(avg_val_loss)
    learning_rate_over_epoch.append(optimizer.param_groups[0]['lr'])
    
    val_accuracy_over_epoch.append(accuracy)
    scheduler.step()

    if counter >= terminate_epoch:
        print(f"There are no further improve in the last {terminate_epoch} epochs. Training terminates itself.")
        break

    if accuracy <= biggest:
        counter += 1
        continue
    
    counter = 0
    biggest = accuracy
    
    most_accurate_model = copy.deepcopy(model.state_dict())
    most_accurate_train_loss_over_batch = copy.deepcopy(train_loss_over_batch)

    

    

print(f"Biggest Accuracy : {biggest}")
print(time.strftime('%H:%M:%S', time.gmtime( time.time() - start_time )))
        




[Epoch 1, Batch 10/2402] Train Loss: 3.3885
[Epoch 1, Batch 20/2402] Train Loss: 3.3745
[Epoch 1, Batch 30/2402] Train Loss: 3.1808
[Epoch 1, Batch 40/2402] Train Loss: 3.1352
[Epoch 1, Batch 50/2402] Train Loss: 2.5514
[Epoch 1, Batch 60/2402] Train Loss: 1.9839
[Epoch 1, Batch 70/2402] Train Loss: 2.1328
[Epoch 1, Batch 80/2402] Train Loss: 1.5721
[Epoch 1, Batch 90/2402] Train Loss: 1.7413
[Epoch 1, Batch 100/2402] Train Loss: 1.8034
[Epoch 1, Batch 110/2402] Train Loss: 0.9416
[Epoch 1, Batch 120/2402] Train Loss: 1.1730
[Epoch 1, Batch 130/2402] Train Loss: 0.8437
[Epoch 1, Batch 140/2402] Train Loss: 0.6236
[Epoch 1, Batch 150/2402] Train Loss: 0.7731
[Epoch 1, Batch 160/2402] Train Loss: 0.8302
[Epoch 1, Batch 170/2402] Train Loss: 1.6599
[Epoch 1, Batch 180/2402] Train Loss: 0.9361
[Epoch 1, Batch 190/2402] Train Loss: 0.8657
[Epoch 1, Batch 200/2402] Train Loss: 1.1701
[Epoch 1, Batch 210/2402] Train Loss: 0.7675
[Epoch 1, Batch 220/2402] Train Loss: 1.4708
[Epoch 1, Batch 230

## Saving the best model

In [34]:
import os
import matplotlib.pyplot as plt

model_number = len(os.listdir("models/"))

save_name = f'models/charcnn_{model_number}.pth'

torch.save(most_accurate_model, save_name)

print(f"Most Accurate model has been saved to {save_name}")

x_batch = range(len(train_loader))
x_epoch = range(1, len(val_accuracy_over_epoch) + 1)

fig_save = f"../../Documentation/figures/charcnn_{model_number}/"
os.makedirs(fig_save,exist_ok=True)

# Saving the classes that were used in the model
try:
    with open(f"./model_classes/charcnn_{model_number}_classes.txt","a+") as f:
        for char_class in charDict:
            f.write(f"{char_class} {charDict[char_class]}\n")
except Exception as e:
    print(f"Error on saving the classes: {e}")

    
# Val Accuracy over Epoch

plt.plot( x_epoch , val_accuracy_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Validation Accuracy")
plt.title("Validation Accuracy x Epoch")
plt.legend(["Val Accuracy"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "val_accuracy_over_epoch.png")
plt.close()


# Learning Rate over Epoch

plt.plot( x_epoch , learning_rate_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Learning Rate x Epoch")
plt.legend(["lr"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "learning_rate_over_epoch.png")
plt.close()


# Val Loss & Train Loss over Epoch

plt.plot( x_epoch , val_loss_over_epoch )
plt.plot( x_epoch , train_loss_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Validation Loss")
plt.title("Validation Loss x Epoch")
plt.grid(True)

plt.legend(["Val Loss","Train Loss"])

plt.tight_layout()
plt.savefig(fig_save + "val_train_loss_over_epoch.png")
plt.close()



# Train Loss Over Batch in Most Accurate Model



plt.plot( x_batch , most_accurate_train_loss_over_batch )
plt.xlabel("Batch idx")
plt.ylabel("Training Loss")
plt.title("Training Loss x Batch")
plt.legend(["Loss"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "loss_over_batch.png")
plt.close()




Most Accurate model has been saved to models/charcnn_15.pth


## PC SHUTDOWN

In [35]:
os.system("shutdown /s /t 60")

0

# Testing the model

In [20]:

from PIL import Image
import os

chosen_model = "15"
classes_path = f"./model_classes/charcnn_{chosen_model}_classes.txt"


# Importing the classes from the text file
with open(classes_path, "r+") as f:
    classes = f.read()
    classes = classes.split("\n")
charDictTesting = {}
for class_whitespace in classes[:-1]:
    class_char = class_whitespace.split(" ")

    charDictTesting[int(class_char[0])] = class_char[1]


test_model = iztechCNN(num_classes=num_classes, image_size=32).to(device)
test_model.load_state_dict(torch.load(f"./models/charcnn_{chosen_model}.pth", map_location="cuda" ,weights_only=True))
test_model.eval()

path1 = f"../../dataset/mainDataset/test"

with torch.no_grad():
    for letter in os.listdir(path1):
        path2 = os.path.join(path1,letter)
        letter_test_amount = len(os.listdir(path2))
        letter_counter = 0
        for image in os.listdir(path2):
            image_path = os.path.join(path2,image)

            img = Image.open(image_path).convert("L")

            
            img = val_transform(img)
            img = img.to(device)
            
            output = test_model(img)
            _, predicted = torch.max(output, 1)

            charInt = predicted.tolist()[0]

            char = charDictTesting[charInt]

            if char == letter:
                letter_counter += 1
        print(f"Letter: {letter}   Accuracy: {(letter_counter/letter_test_amount):.3f}")



        


KeyboardInterrupt: 

In [49]:
import cv2 
import numpy as np
from PIL import Image

from torchvision import transforms

size = 32

test_model = iztechCNN(num_classes=num_classes,image_size=size)

test_model.load_state_dict(torch.load(r".\models\charcnn_15.pth",map_location = "cuda",weights_only=True))
test_model.eval()


img = cv2.imread(r".\TestImages\ToStringTests_1.png", cv2.IMREAD_GRAYSCALE)

blur = cv2.GaussianBlur(img,(3,3),1)

_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


for index, contour in enumerate(contours):
    x, y, w, h = cv2.boundingRect(contour)
    max_edge = max(w,h)

    drawn_contour = np.zeros_like(thresh)

    cv2.drawContours(drawn_contour, [contour],-1,255, thickness=cv2.FILLED)



    blank = np.zeros((max_edge,max_edge))

    x1,x2 = int( (max_edge - h) / 2 ), int( (max_edge + h) / 2 )
    y1,y2 = int( (max_edge - w) / 2 ), int( (max_edge + w) / 2 )

    blank[x1:x2,y1:y2] = drawn_contour[y:y+h, x:x+w]
    blank = cv2.copyMakeBorder(blank,2,2,2,2,borderType=cv2.BORDER_CONSTANT,value=0)
    
    letter = Image.fromarray(blank.astype(np.uint8))

    
    #cv2.imshow("lala",blank)

    letter = val_transform(letter)
    letter = letter.float()
    letter = letter.unsqueeze(0)
    
    with torch.no_grad():
        output = test_model(letter)
        _, predicted = torch.max(output, 1)

        charInt = predicted.tolist()[0]
        print(charInt)
        print(charDict[charInt])
    #cv2.waitKey(0)
    



10
7
2
+
6
3


# Image to String Codes

## Classes and Main Libraries

In [156]:
from typing import List
import numpy as np
import cv2


class Symbol:
    def __init__(self,char,confidence=None,center=None,size=None):
        self.char = char
        self.confidence = confidence
        self.x, self.y = center
        self.w, self.h = size
        self.used = False

    def isLine(self):
        return "line" in self.char
    
    def __repr__(self): 
        return f"Symbol('{self.char}', ({self.x}, {self.y}), {self.w}x{self.h})"
    
    def __eq__(self, other):
        if not isinstance(other, Symbol):
            return False
        return (self.char, self.x, self.y, self.w, self.h) == (other.char, other.x, other.y, other.w, other.h)

    def __hash__(self):
        # Eğer __eq__'yi override ediyorsan, __hash__'i de buna uygun tanımlaman gerekir.
        return hash((self.char, self.x, self.y, self.w, self.h))

    def __repr__(self):
        return f" ({self.char}  x: {self.x}) "                                 #f"Symbol('{self.char}', ({self.x}, {self.y}), {self.w}x{self.h})"

uppercase_bit_size = 6
lowercase_bit_size = 6  #Based on trials
numbers_bit_size = 6
lower_operator_number_bit_size = 6

symbols = []

DL_model = iztechCNN(num_classes=num_classes)
DL_model.load_state_dict(torch.load(f"./models/charcnn_{chosen_model}.pth",map_location="cuda",weights_only=True))
DL_model.eval()


iztechCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=2048, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=31, bias=True)
)

In [158]:
def SortByX(symbols: List[Symbol]):

    return sorted(symbols, key=lambda s: s.x)

def SortDivisionLinesBySize(symbols: List[Symbol]):
    return sorted(
        [s for s in symbols if "horizontal_line" in s.char],
        key=lambda s: s.h * s.w,
        reverse=True
    )


def ProcessLine(symbols: List[Symbol]):
    symbols = SortByX(symbols)

    result = ""

    for s in symbols:
        result += f"{s.char}"

    return result

def ProcessDivision(div_symbol, above, below, tolerance = 50):
    
    print(f"Above: {above}")
    print(f"Below: {below}")
    
    above_line = Process(above)
    below_line = Process(below)
    

    result = f"({above_line}) / ({below_line})"

    return Symbol(result, confidence=100.0,center=(div_symbol.x, div_symbol.y), size=(div_symbol.w, div_symbol.h))

def Process(symbols):
    symbols_x_sorted = SortByX(symbols)
    new_symbols = []
    used = set()

    division_lines = SortDivisionLinesBySize(symbols)

    for div_symbol in division_lines:
        if div_symbol in used:
            continue
        
        tolerance = div_symbol.w / 10
        div_left = div_symbol.x - div_symbol.w // 2 - tolerance
        div_right = div_symbol.x + div_symbol.w // 2 + tolerance

        above = []
        below = []

        for s2 in symbols:
            if s2 is div_symbol or s2 in used:
                continue
            if div_left <= s2.x <= div_right:
                if s2.y < div_symbol.y:
                    above.append(s2)
                elif s2.y > div_symbol.y:
                    below.append(s2)

        if not above or not below:
            minus = Symbol("-", 0.0, (div_symbol.x, div_symbol.y), (div_symbol.w, div_symbol.h))
            new_symbols.append(minus)
            used.add(div_symbol)
            continue

        # This is a recursive function
        new_symbol = ProcessDivision(div_symbol, above, below, tolerance)

        new_symbols.append(new_symbol)
        used.update(above + below + [div_symbol])

    # sorting of remainings
    remaining_symbols = [s for s in symbols if s not in used]
    return ProcessLine(SortByX(new_symbols + remaining_symbols))

"""
def Process(symbols):

    symbols_x_sorted = SortByX(symbols)
    new_symbols = []
    i = 0
    used = set()

    while i < len(symbols_x_sorted):
        s = symbols_x_sorted[i]

        if "horizontal_line" in s.char:
            # define range of x for division
            tolerance = s.w/10
            div_left = s.x - s.w//2 - tolerance
            div_right = s.x + s.w//2 + tolerance
            
            above = []
            below = []
            # check all symbols to find ones above and below this line
            for s2 in symbols: 
                if s2 is s:
                    continue
                isItIn = div_left <= s2.x <= div_right
                if not isItIn:
                    continue
                if s2.y < s.y:
                    above.append(s2)
                elif s2.y > s.y:
                    below.append(s2)

            # if nothing above and below, it's just a minus
            if not above or not below:
                used.add(s)
                minus = Symbol("-", 0.0, (s.x,s.y), (s.w, s.h))
                new_symbols.append(minus)
                symbols_x_sorted = [s3 for s3 in symbols_x_sorted if s3 not in used]
                i += 1
                continue
            
            used.update(above + below +[s])

            new_symbol = ProcessDivision(s,above,below)
            new_symbols.append(new_symbol)

            symbols_x_sorted = [s3 for s3 in symbols_x_sorted if s3 not in used]
            i = 0

        else:       # not a horizontal line, just move on
            
            i += 1

    #for s in symbols_x_sorted:
    #    print(f"Deneme: {s}")

    return ProcessLine(new_symbols+symbols_x_sorted)

"""



'\ndef Process(symbols):\n\n    symbols_x_sorted = SortByX(symbols)\n    new_symbols = []\n    i = 0\n    used = set()\n\n    while i < len(symbols_x_sorted):\n        s = symbols_x_sorted[i]\n\n        if "horizontal_line" in s.char:\n            # define range of x for division\n            tolerance = s.w/10\n            div_left = s.x - s.w//2 - tolerance\n            div_right = s.x + s.w//2 + tolerance\n\n            above = []\n            below = []\n            # check all symbols to find ones above and below this line\n            for s2 in symbols: \n                if s2 is s:\n                    continue\n                isItIn = div_left <= s2.x <= div_right\n                if not isItIn:\n                    continue\n                if s2.y < s.y:\n                    above.append(s2)\n                elif s2.y > s.y:\n                    below.append(s2)\n\n            # if nothing above and below, it\'s just a minus\n            if not above or not below:\n         

## Tests

In [218]:
img_paths = [f"./TestImages/ToStringTests_1.png", f"./TestImages/ToStringTests_2.png"]

for img_path in img_paths:
    symbols = []

    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    blur = cv2.GaussianBlur(img,(3,3),1)

    _, thresh = cv2.threshold(blur,0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        max_edge = max(w,h)

        drawn_contour = np.zeros_like(thresh)

        cv2.drawContours(drawn_contour, [contour],-1,255, thickness=cv2.FILLED)



        blank = np.zeros((max_edge,max_edge))

        x1,x2 = int( (max_edge - h) / 2 ), int( (max_edge + h) / 2 )
        y1,y2 = int( (max_edge - w) / 2 ), int( (max_edge + w) / 2 )

        blank[x1:x2,y1:y2] = drawn_contour[y:y+h, x:x+w]
        blank = cv2.copyMakeBorder(blank,2,2,2,2,borderType=cv2.BORDER_CONSTANT,value=0)
        
        letter = Image.fromarray(blank.astype(np.uint8))

        letter = val_transform(letter)
        letter = letter.float()
        letter = letter.unsqueeze(0)
        

        with torch.no_grad():
            output = DL_model(letter)
            _, predicted = torch.max(output,1)

        charInt = predicted.tolist()[0]
        char = charDict[charInt]

        symbols.append(Symbol(char,center = (x + w//2 , y + h//2 ), size= (w , h)))    

        
    print(Process(symbols))



Above: [ (3  x: 270) ,  (horizontal_line  x: 280) ,  (horizontal_line  x: 281) ,  (9g  x: 233) ,  (7  x: 338) ]
Below: [ (4  x: 267) ,  (horizontal_line  x: 277) ,  (t  x: 264) ,  (1  x: 228) ,  (5s  x: 317) ]
Above: [ (horizontal_line  x: 281) ,  (9g  x: 233) ,  (7  x: 338) ]
Below: [ (3  x: 270) ]
Above: [ (t  x: 264) ,  (1  x: 228) ,  (5s  x: 317) ]
Below: [ (4  x: 267) ]
e+((9g-7) / (3)) / ((1t5s) / (4))
Above: [ (1  x: 242) ,  (5s  x: 111) ,  (horizontal_line  x: 114) ,  (horizontal_line  x: 254) ,  (e  x: 169) ,  (2  x: 247) ,  (3  x: 113) ]
Below: [ (horizontal_line  x: 158) ,  (5s  x: 111) ,  (6  x: 219) ]
Above: [ (2  x: 247) ]
Below: [ (1  x: 242) ]
Above: [ (3  x: 113) ]
Below: [ (5s  x: 111) ]
((3) / (5s)e(2) / (1)) / (5s-6)
