# Defining Part

# CNN Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F




dataset_path = r"E:\Python_Projeler\ComputerVisionProjects\FinalProject\dataset\main01"
num_classes = 34
epoch_amount = 200
batch_size = 32
rotation = 10
terminate_epoch = 75


class iztechCNN(nn.Module):
    def __init__(self, num_classes = num_classes):
        super(iztechCNN,self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3,padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3,padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2,2)

        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1, 128 * 8 * 8)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


# Transform

The purpose of this part is to make the images in the dataset have more variety in every epoch. This part may be used to normalize the datasets as well, even simultaneosly.

In [None]:


from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomRotation(rotation),  # simple rotation augment
    transforms.ToTensor(),
])

full_dataset  = datasets.ImageFolder(r'E:\Python_Projeler\ComputerVisionProjects\FinalProject\dataset\main01', transform=transform)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


## Checking if PyTorch recognizes my GPU

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device) 
print(torch.cuda.get_device_name())

cuda
NVIDIA GeForce RTX 3050 6GB Laptop GPU


# Training

In [4]:
model = iztechCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

counter = 0

biggest = 0.0
most_accurate_model = None

for epoch in range(epoch_amount):
    model.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 10 == 9:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Train Loss: {loss.item():.4f}")

    avg_train_loss = running_loss / len(train_loader)

    
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {accuracy:.2f}%")


    if counter == terminate_epoch:
        print(f"There are no further improve in the last 50 epochs. Training terminates itself.")
        break

    if accuracy <= biggest:
        counter += 1
        continue
    
    counter = 0
    biggest = accuracy
    most_accurate_model = model

print(f"Biggest Accuracy : {biggest}")
        


    


[Epoch 1, Batch 10] Train Loss: 3.0914
[Epoch 1, Batch 20] Train Loss: 1.2770
[Epoch 1, Batch 30] Train Loss: 0.8246
[Epoch 1, Batch 40] Train Loss: 0.4776
[Epoch 1, Batch 50] Train Loss: 0.6026
[Epoch 1, Batch 60] Train Loss: 0.3566
[Epoch 1, Batch 70] Train Loss: 0.1472
[Epoch 1, Batch 80] Train Loss: 0.3525
[Epoch 1, Batch 90] Train Loss: 0.6764
[Epoch 1, Batch 100] Train Loss: 0.1916
[Epoch 1, Batch 110] Train Loss: 0.1744
[Epoch 1, Batch 120] Train Loss: 0.2126
[Epoch 1, Batch 130] Train Loss: 0.0458
[Epoch 1, Batch 140] Train Loss: 0.0975
[Epoch 1, Batch 150] Train Loss: 0.0373
[Epoch 1, Batch 160] Train Loss: 0.4036
[Epoch 1, Batch 170] Train Loss: 0.3357
[Epoch 1, Batch 180] Train Loss: 0.1966
[Epoch 1, Batch 190] Train Loss: 0.0260
[Epoch 1, Batch 200] Train Loss: 0.0594
[Epoch 1, Batch 210] Train Loss: 0.0792
[Epoch 1, Batch 220] Train Loss: 0.1038
[Epoch 1, Batch 230] Train Loss: 0.1098
[Epoch 1, Batch 240] Train Loss: 0.2203
[Epoch 1, Batch 250] Train Loss: 0.2920
[Epoch 1,

# Saving the Model

In [11]:
import os

model_number = len(os.listdir("models/"))

save_name = f'models/charcnn_{model_number}.pth'

torch.save(most_accurate_model.state_dict(), save_name)


## Testing the model

In [None]:
import cv2 
import numpy as np


test_model = iztechCNN(num_classes=num_classes)

test_model.load_state_dict(torch.load(r"E:\Python_Projeler\ComputerVisionProjects\FinalProject\codes\ModelCodes\models\charcnn_2.pth",map_location = "cuda",weights_only=True))
test_model.eval()


transform = transforms.Compose([
    transforms.ToTensor()
])



img = cv2.imread(r"E:\Python_Projeler\ComputerVisionProjects\FinalProject\codes\ModelCodes\TestImages\deneme.png", cv2.IMREAD_GRAYSCALE)

blur = cv2.GaussianBlur(img,(3,3),1)

_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for index, contour in enumerate(contours):
    x, y, w, h = cv2.boundingRect(contour)
    max_edge = max(w,h)

    blank = np.zeros((max_edge,max_edge))

    x1,x2 = int( (max_edge - h) / 2 ), int( (max_edge + h) / 2 )
    y1,y2 = int( (max_edge - w) / 2 ), int( (max_edge + w) / 2 )

    blank[x1:x2,y1:y2] = thresh[y:y+h, x:x+w]
    letter = cv2.resize(blank, (64,64))
    

    cv2.imshow("lala",letter)
    
    letter = transform(letter)
    letter = letter.float()
    letter = letter.unsqueeze(0)
    
    with torch.no_grad():
        output = test_model(letter)
        _, predicted = torch.max(output, 1)
        print(f"Tahmin edilen class:  {predicted}")
        
    cv2.waitKey(0)



Tahmin edilen class:  tensor([18])
Tahmin edilen class:  tensor([10])
Tahmin edilen class:  tensor([10])
Tahmin edilen class:  tensor([10])
Tahmin edilen class:  tensor([13])
