# Training

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F



class_to_name = {}
dataset_path = r"..\..\dataset\mainDataset"
num_classes = 34
epoch_amount = 250
batch_size = 32
rotation = 5
terminate_epoch = 35



dataset = "mainDataset/"




class iztechCNN(nn.Module):
    def __init__(self, num_classes = num_classes, image_size = 32):
        super(iztechCNN,self).__init__()
        self.FC_input = image_size * image_size * 256 // (2**3)**2 
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3,padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3,padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2,2)

        self.dropout = nn.Dropout(0.5)          #Will be used if overfitting is detected #Overfitting was detected!

        self.fc1 = nn.Linear(self.FC_input, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1,self.FC_input)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
       
        return self.fc2(x)


## Transform - Data Augmentation

The purpose of this part is to make the images in the dataset have more variety in every epoch. This part may be used to normalize the datasets as well, even simultaneosly.

In [None]:


from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomRotation(degrees=5),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0)),
    transforms.ToTensor()
])


train_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"train", transform=transform)
val_dataset = datasets.ImageFolder(r'../../dataset/'+dataset+"test", transform=transform)




train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4,shuffle=False)




## Checking if PyTorch recognizes my GPU

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device) 
print(torch.cuda.get_device_name())



cuda
NVIDIA GeForce RTX 3050 6GB Laptop GPU


## Training Part

In [None]:
model = iztechCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

counter = 0

biggest = 0.0
most_accurate_model = None

for epoch in range(epoch_amount):
    model.train()
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 10 == 9:
            print(f"[Epoch {epoch+1}, Batch {i+1}] Train Loss: {loss.item():.4f}")

    avg_train_loss = running_loss / len(train_loader)

    
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total

    print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {accuracy:.2f}%")


    if counter == terminate_epoch:
        print(f"There are no further improve in the last {terminate_epoch} epochs. Training terminates itself.")
        break

    if accuracy <= biggest:
        counter += 1
        continue
    
    counter = 0
    biggest = accuracy
    most_accurate_model = model

print(f"Biggest Accuracy : {biggest}")
        





[Epoch 1, Batch 10] Train Loss: 3.5349
[Epoch 1, Batch 20] Train Loss: 3.2883
[Epoch 1, Batch 30] Train Loss: 3.2738
[Epoch 1, Batch 40] Train Loss: 2.8537
[Epoch 1, Batch 50] Train Loss: 2.2002
[Epoch 1, Batch 60] Train Loss: 2.3015
[Epoch 1, Batch 70] Train Loss: 2.1203
[Epoch 1, Batch 80] Train Loss: 2.0501
[Epoch 1, Batch 90] Train Loss: 1.9403
[Epoch 1, Batch 100] Train Loss: 2.2215
[Epoch 1, Batch 110] Train Loss: 1.9642
[Epoch 1, Batch 120] Train Loss: 1.5790
[Epoch 1, Batch 130] Train Loss: 1.9814
[Epoch 1, Batch 140] Train Loss: 1.7601
[Epoch 1, Batch 150] Train Loss: 1.3356
[Epoch 1, Batch 160] Train Loss: 1.6967
[Epoch 1, Batch 170] Train Loss: 1.3786
[Epoch 1, Batch 180] Train Loss: 1.3565
[Epoch 1, Batch 190] Train Loss: 1.5190
[Epoch 1, Batch 200] Train Loss: 1.3185
[Epoch 1, Batch 210] Train Loss: 1.5733
[Epoch 1, Batch 220] Train Loss: 1.6751
[Epoch 1, Batch 230] Train Loss: 1.0512
[Epoch 1, Batch 240] Train Loss: 1.1116
[Epoch 1, Batch 250] Train Loss: 0.8712
[Epoch 1,

## Saving the best model

In [None]:
import os

model_number = len(os.listdir("models/"))

save_name = f'models/charcnn_{model_number}.pth'

torch.save(most_accurate_model.state_dict(), save_name)





NameError: name 'torch' is not defined

## PC SHUTDOWN

In [None]:
os.system("shutdown /s /t 60")

# Testing the model

In [11]:
import cv2 
import numpy as np
from torchvision import transforms

test_model = iztechCNN(num_classes=num_classes,image_size=48)

test_model.load_state_dict(torch.load(r".\models\charcnn_6.pth",map_location = "cuda",weights_only=True))
test_model.eval()


transform = transforms.Compose([
    transforms.ToTensor()
])



img = cv2.imread(r"E:\Python_Projeler\ComputerVisionProjects\FinalProject\codes\ModelCodes\TestImages\deneme_3.jpg", cv2.IMREAD_GRAYSCALE)

blur = cv2.GaussianBlur(img,(3,3),1)

_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for index, contour in enumerate(contours):
    x, y, w, h = cv2.boundingRect(contour)
    max_edge = max(w,h)

    blank = np.zeros((max_edge,max_edge))

    x1,x2 = int( (max_edge - h) / 2 ), int( (max_edge + h) / 2 )
    y1,y2 = int( (max_edge - w) / 2 ), int( (max_edge + w) / 2 )

    blank[x1:x2,y1:y2] = thresh[y:y+h, x:x+w]
    blank = cv2.copyMakeBorder(blank,2,2,2,2,borderType=cv2.BORDER_CONSTANT,value=0)
    blank = cv2.GaussianBlur(blank,(3,3),1)
    letter = cv2.resize(blank, (48,48))
    cv2.imshow("lala",letter)
        
    letter = transform(letter)
    letter = letter.float()
    letter = letter.unsqueeze(0)
    
    with torch.no_grad():
        output = test_model(letter)
        _, predicted = torch.max(output, 1)

        charInt = predicted.tolist()[0]
        print(predicted)
    cv2.waitKey(0)
    



tensor([15])
tensor([33])
tensor([26])
tensor([18])
tensor([15])
tensor([18])
tensor([7])
tensor([18])
tensor([15])
tensor([23])
tensor([15])
tensor([26])
tensor([18])
