# Training

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F



class_to_name = {}
dataset_path = r"..\..\dataset\mainDataset"
num_classes = 34
epoch_amount = 375
batch_size = 32
rotation = 5
terminate_epoch = 125  

dataset = "mainDataset/"




class iztechCNN(nn.Module):
    def __init__(self, num_classes = num_classes, image_size = 32 , kernel_sizes = (3, 3, 3) ):
        super(iztechCNN,self).__init__()
        
        final_kernel = 128
        
        self.FC_input = image_size * image_size * final_kernel // (2**3)**2 

        size_1 = kernel_sizes[0]
        size_2 = kernel_sizes[1]
        size_3 = kernel_sizes[2]

        self.conv1 = nn.Conv2d(1, final_kernel//4,               kernel_size=size_1,    padding = (size_1 -1 ) // 2)
        self.conv2 = nn.Conv2d(final_kernel//4, final_kernel//2, kernel_size=size_2,    padding = (size_2 -1 ) // 2)
        self.conv3 = nn.Conv2d(final_kernel//2, final_kernel,    kernel_size=size_3,    padding = (size_3 -1 ) // 2)

        self.pool = nn.MaxPool2d(2,2)

        self.dropout = nn.Dropout(0.3)          #Will be used if overfitting is detected #Overfitting was detected!

        self.fc1 = nn.Linear(self.FC_input, final_kernel)
        self.fc2 = nn.Linear(final_kernel, num_classes)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1,self.FC_input)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
       
        return self.fc2(x)


## Transform - Data Augmentation

The purpose of this part is to make the images in the dataset have more variety in every epoch. This part may be used to normalize the datasets as well, even simultaneosly.

In [2]:


from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.Resize((32,32),interpolation=transforms.InterpolationMode.NEAREST),
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomRotation(degrees=5),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transform = transforms.Compose([
    transforms.Resize((32,32),interpolation=transforms.InterpolationMode.NEAREST),
    transforms.Grayscale(num_output_channels=1),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])


train_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"train", transform=train_transform)
val_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"test", transform=val_transform)




train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4,shuffle=False)




## Checking if PyTorch recognizes my GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device) 
print(torch.cuda.get_device_name())



cuda
NVIDIA GeForce RTX 3050 6GB Laptop GPU
['(', ')', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'horizontal_line', 'n', 'o', 'p', 'r', 's', 'sqrt', 't', 'vertical_line', 'x', 'y']


## Training Part

In [None]:
import copy
import time


model = iztechCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.8)

counter = 0

biggest = 0.0

most_accurate_model = None

start_time = time.time()


val_accuracy_over_epoch = []

train_loss_over_epoch = []
val_loss_over_epoch = []
learning_rate_over_epoch = []

most_accurate_train_loss_over_batch = None


for epoch in range(epoch_amount):
    
    train_loss_over_batch = []

    model.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        train_loss_over_batch.append(loss.item())

        if i % 10 == 9:
            print(f"[Epoch {epoch+1}, Batch {i+1}/{len(train_loader)}] Train Loss: {loss.item():.4f}")

    avg_train_loss = running_loss / len(train_loader)

    
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {accuracy:.2f}%")

    train_loss_over_epoch.append(avg_train_loss)
    val_loss_over_epoch.append(avg_val_loss)
    learning_rate_over_epoch.append(optimizer.param_groups[0]['lr'])
    
    val_accuracy_over_epoch.append(accuracy)
    scheduler.step()

    if counter >= terminate_epoch:
        print(f"There are no further improve in the last {terminate_epoch} epochs. Training terminates itself.")
        break

    if accuracy <= biggest:
        counter += 1
        continue
    
    counter = 0
    biggest = accuracy
    
    most_accurate_model = copy.deepcopy(model.state_dict())
    most_accurate_train_loss_over_batch = copy.deepcopy(train_loss_over_batch)

    

    

print(f"Biggest Accuracy : {biggest}")
print(time.strftime('%H:%M:%S', time.gmtime( time.time() - start_time )))
        




[Epoch 1, Batch 10/2612] Train Loss: 3.5023
[Epoch 1, Batch 20/2612] Train Loss: 3.4076
[Epoch 1, Batch 30/2612] Train Loss: 3.3575
[Epoch 1, Batch 40/2612] Train Loss: 2.8129
[Epoch 1, Batch 50/2612] Train Loss: 2.8565
[Epoch 1, Batch 60/2612] Train Loss: 2.0369
[Epoch 1, Batch 70/2612] Train Loss: 2.0291
[Epoch 1, Batch 80/2612] Train Loss: 1.8049
[Epoch 1, Batch 90/2612] Train Loss: 1.8187
[Epoch 1, Batch 100/2612] Train Loss: 1.7127
[Epoch 1, Batch 110/2612] Train Loss: 1.1763
[Epoch 1, Batch 120/2612] Train Loss: 1.1558
[Epoch 1, Batch 130/2612] Train Loss: 1.7058
[Epoch 1, Batch 140/2612] Train Loss: 1.3187
[Epoch 1, Batch 150/2612] Train Loss: 0.9164
[Epoch 1, Batch 160/2612] Train Loss: 1.2470
[Epoch 1, Batch 170/2612] Train Loss: 1.2021
[Epoch 1, Batch 180/2612] Train Loss: 1.2121
[Epoch 1, Batch 190/2612] Train Loss: 1.2692
[Epoch 1, Batch 200/2612] Train Loss: 1.3891
[Epoch 1, Batch 210/2612] Train Loss: 0.7085
[Epoch 1, Batch 220/2612] Train Loss: 1.1094
[Epoch 1, Batch 230

## Saving the best model

In [None]:
import os
import matplotlib.pyplot as plt

model_number = len(os.listdir("models/"))

save_name = f'models/charcnn_{model_number}.pth'

torch.save(most_accurate_model, save_name)

print(f"Most Accurate model has been saved to {save_name}")

x_batch = range(len(train_loader))
x_epoch = range(1, len(val_accuracy_over_epoch) + 1)

fig_save = f"../../Documentation/figures/charcnn_{model_number}/"
os.makedirs(fig_save,exist_ok=True)

# Val Accuracy over Epoch

plt.plot( x_epoch , val_accuracy_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Validation Accuracy")
plt.title("Validation Accuracy x Epoch")
plt.legend(["Val Accuracy"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "val_accuracy_over_epoch.png")
plt.close()


# Learning Rate over Epoch

plt.plot( x_epoch , learning_rate_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Learning Rate x Epoch")
plt.legend(["lr"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "learning_rate_over_epoch.png")
plt.close()


# Val Loss & Train Loss over Epoch

plt.plot( x_epoch , val_loss_over_epoch )
plt.plot( x_epoch , train_loss_over_epoch )
plt.xlabel("Epoch")
plt.ylabel("Validation Loss")
plt.title("Validation Loss x Epoch")
plt.grid(True)

plt.legend(["Val Loss","Train Loss"])

plt.tight_layout()
plt.savefig(fig_save + "val_train_loss_over_epoch.png")
plt.close()



# Train Loss Over Batch in Most Accurate Model



plt.plot( x_batch , most_accurate_train_loss_over_batch )
plt.xlabel("Batch idx")
plt.ylabel("Training Loss")
plt.title("Training Loss x Batch")
plt.legend(["Loss"])
plt.grid(True)

plt.tight_layout()
plt.savefig(fig_save + "loss_over_batch.png")
plt.close()




Most Accurate model has been saved to models/charcnn_13.pth


## PC SHUTDOWN

In [92]:
os.system("shutdown /s /t 60")

0

# Testing the model

In [21]:
import pandas
from PIL import Image
import os

test_model = iztechCNN(num_classes=num_classes, image_size=32).to(device)
test_model.load_state_dict(torch.load(f"./models/charcnn_13.pth", map_location="cuda" ,weights_only=True))
test_model.eval()

path1 = f"../../dataset/mainDataset/test"

with torch.no_grad():
    for letter in os.listdir(path1):
        path2 = os.path.join(path1,letter)
        letter_test_amount = len(os.listdir(path2))
        letter_counter = 0
        for image in os.listdir(path2):
            image_path = os.path.join(path2,image)

            img = Image.open(image_path).convert("L")

            
            img = val_transform(img)
            img = img.to(device)
            
            output = test_model(img)
            _, predicted = torch.max(output, 1)

            charInt = predicted.tolist()[0]

            char = charDict[charInt]

            if char == letter:
                letter_counter += 1
        print(f"Letter: {letter}   Accuracy: {(letter_counter/letter_test_amount):.3f}")



        


Letter: (   Accuracy: 0.991
Letter: )   Accuracy: 0.998
Letter: +   Accuracy: 0.984
Letter: 0   Accuracy: 0.791
Letter: 1   Accuracy: 0.974
Letter: 2   Accuracy: 0.982
Letter: 3   Accuracy: 0.987
Letter: 4   Accuracy: 0.930
Letter: 5   Accuracy: 0.905
Letter: 6   Accuracy: 0.960
Letter: 7   Accuracy: 0.987
Letter: 8   Accuracy: 0.969
Letter: 9   Accuracy: 0.907
Letter: a   Accuracy: 0.939
Letter: b   Accuracy: 0.935
Letter: c   Accuracy: 0.958
Letter: d   Accuracy: 0.976
Letter: e   Accuracy: 0.977
Letter: f   Accuracy: 0.933
Letter: g   Accuracy: 0.785
Letter: h   Accuracy: 0.941
Letter: horizontal_line   Accuracy: 0.989
Letter: n   Accuracy: 0.977
Letter: o   Accuracy: 0.705
Letter: p   Accuracy: 0.989
Letter: r   Accuracy: 0.964
Letter: s   Accuracy: 0.930
Letter: sqrt   Accuracy: 0.998
Letter: t   Accuracy: 0.949
Letter: vertical_line   Accuracy: 0.976
Letter: x   Accuracy: 0.977
Letter: y   Accuracy: 0.941
Letter: [   Accuracy: 0.974
Letter: ]   Accuracy: 0.987


In [None]:
import cv2 
import numpy as np
from torchvision import transforms

size = 32

test_model = iztechCNN(num_classes=num_classes,image_size=size)

test_model.load_state_dict(torch.load(r".\models\charcnn_8.pth",map_location = "cuda",weights_only=True))
test_model.eval()


img = cv2.imread(r".\TestImages\deneme_3.jpg", cv2.IMREAD_GRAYSCALE)

blur = cv2.GaussianBlur(img,(3,3),1)

_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


for index, contour in enumerate(contours):
    x, y, w, h = cv2.boundingRect(contour)
    max_edge = max(w,h)

    blank = np.zeros((max_edge,max_edge))

    x1,x2 = int( (max_edge - h) / 2 ), int( (max_edge + h) / 2 )
    y1,y2 = int( (max_edge - w) / 2 ), int( (max_edge + w) / 2 )

    blank[x1:x2,y1:y2] = thresh[y:y+h, x:x+w]
    blank = cv2.copyMakeBorder(blank,2,2,2,2,borderType=cv2.BORDER_CONSTANT,value=0)
    letter = cv2.GaussianBlur(blank,(3,3),1)
    
    cv2.imshow("lala",letter)
        
    letter = val_transform(letter)
    letter = letter.float()
    letter = letter.unsqueeze(0)
    
    with torch.no_grad():
        output = test_model(letter)
        _, predicted = torch.max(output, 1)

        charInt = predicted.tolist()[0]
        print(charInt)
        print(charDict[charInt])
    cv2.waitKey(0)
    



21
g
21
g
21
g
21
g
21
g
16
b
20
f
20
f
21
g
21
g
21
g
21
g
20
f
