In [None]:
import os
import glob
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import optim

train_dirs = [r'C:\5. felev\AMM\NHF\Train1', r'C:\5. felev\AMM\NHF\Train2']
test_dir = r'C:\5. felev\AMM\NHF\TestData'


In [None]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,)) 
])

class CustomDataset(Dataset):
    def __init__(self, data_dirs, transform=None):
        self.data_paths = []
        self.labels = []
        self.transform = transform
        for data_dir in data_dirs:
            for label in range(63): 
                image_paths = glob.glob(os.path.join(data_dir, f'Sample{label:03d}', '*.png'))
                self.data_paths.extend(image_paths)
                self.labels.extend([label] * len(image_paths))

    def __len__(self):
        return len(self.data_paths)

    def __getitem__(self, idx):
        img_path = self.data_paths[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert('L') 
        if self.transform:
            image = self.transform(image)
        return image, label


In [None]:
class TestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        self.image_paths = glob.glob(os.path.join(test_dir, '*.png'))
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('L')
        if self.transform:
            image = self.transform(image)
        return image, img_path


In [None]:
class CharacterRecognitionCNN(nn.Module):
    def __init__(self, num_classes=63):
        super(CharacterRecognitionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)  # Kevesebb filter
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)  # Kevesebb neuron
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [None]:
train_data = CustomDataset(train_dirs, transform=transform)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=4)

test_data = TestDataset(test_dir, transform=transform)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False, num_workers=4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CharacterRecognitionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  


In [None]:
def train(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1) 
            total += labels.size(0)  
            correct += (predicted == labels).sum().item()  

        accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")


In [None]:
def generate_predictions(model, test_loader):
    model.eval()
    predictions = []
    
    with torch.no_grad(): 
        for images, img_paths in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            for img_path, pred in zip(img_paths, predicted):
                img_name = os.path.basename(img_path)
                predictions.append(f"{pred.item()};{img_name}")

    return predictions


In [24]:
from sklearn.model_selection import KFold

def reset_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()


def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy


In [None]:
k_folds = 4
kf = KFold(n_splits=k_folds, shuffle=True)

train_data = CustomDataset(train_dirs, transform=transform)

def cross_validate(model, dataset, criterion, optimizer, epochs=10):
    fold = 1
    fold_accuracy = []
    
    for train_idx, valid_idx in kf.split(range(len(dataset))):
        print(f'Fold {fold} / {k_folds}')
        
        train_subset = torch.utils.data.Subset(dataset, train_idx)
        valid_subset = torch.utils.data.Subset(dataset, valid_idx)
        
        train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=4)
        valid_loader = DataLoader(valid_subset, batch_size=64, shuffle=False, num_workers=4)
        
        model.apply(reset_weights)
        
        train(model, train_loader, criterion, optimizer, epochs)

        valid_accuracy = evaluate(model, valid_loader)
        fold_accuracy.append(valid_accuracy)

        print(f'Fold {fold} pontosság: {valid_accuracy:.2f}%')
        fold += 1
    
    avg_accuracy = sum(fold_accuracy) / len(fold_accuracy)
    print(f'Átlagos pontosság {k_folds}-fold keresztvalidáción: {avg_accuracy:.2f}%')


In [None]:
cross_validate(model, train_data, criterion, optimizer, epochs=10)
predictions = generate_predictions(model, test_loader)

output_file = r'C:\5. felev\AMM\NHF\predictions.csv'
with open(output_file, 'w') as f:
    f.write("class;TestImage\n")
    for line in predictions:
        f.write(line + "\n")

print(f"Predikciók mentve a {output_file} fájlba.")

Fold 1 / 4
Epoch [1/10], Loss: 1.0496, Accuracy: 71.67%
Epoch [2/10], Loss: 0.3952, Accuracy: 86.48%
Epoch [3/10], Loss: 0.2733, Accuracy: 89.92%
Epoch [4/10], Loss: 0.2159, Accuracy: 91.83%
Epoch [5/10], Loss: 0.1790, Accuracy: 93.16%
Epoch [6/10], Loss: 0.1502, Accuracy: 94.10%
Epoch [7/10], Loss: 0.1329, Accuracy: 94.88%
Epoch [8/10], Loss: 0.1179, Accuracy: 95.49%
Epoch [9/10], Loss: 0.1044, Accuracy: 96.20%
Epoch [10/10], Loss: 0.0958, Accuracy: 96.50%
Fold 1 pontosság: 88.96%
Fold 2 / 4
Epoch [1/10], Loss: 0.9186, Accuracy: 75.11%
Epoch [2/10], Loss: 0.3598, Accuracy: 87.37%
Epoch [3/10], Loss: 0.2577, Accuracy: 90.49%
Epoch [4/10], Loss: 0.2074, Accuracy: 92.04%
Epoch [5/10], Loss: 0.1734, Accuracy: 93.31%
Epoch [6/10], Loss: 0.1565, Accuracy: 93.94%
Epoch [7/10], Loss: 0.1408, Accuracy: 94.57%
Epoch [8/10], Loss: 0.1301, Accuracy: 94.99%
Epoch [9/10], Loss: 0.1196, Accuracy: 95.37%
Epoch [10/10], Loss: 0.1117, Accuracy: 95.69%
Fold 2 pontosság: 89.56%
Fold 3 / 4
Epoch [1/10], L