# Data loading

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torchvision.io import read_image
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import os
import torch.nn.functional as F
import torchvision
import math
import matplotlib.pyplot as plt

# Data preprossing

In [None]:
# Data preprocessing
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
train_dir = './dataset_letters/train'
test_dir = './dataset_letters/test'
print("done")

In [None]:
# Create datasets
train_dataset = ImageFolder(train_dir, transform=transform)
test_dataset = ImageFolder(test_dir, transform=transform)

# Training

In [None]:
# Hyperparameters
batch_size = 16
learning_rate = 0.001

image_width = 28
image_height = 28
# Convolutional layer parameters
kernel_size = 3
stride = 1
padding = 1
channels = 32

# Calculate output size after convolutional layer
conv_output_width = math.floor((image_width - kernel_size + 2 * padding) / stride) + 1
conv_output_height = math.floor((image_height - kernel_size + 2 * padding) / stride) + 1

# Calculate output size after max pooling layer
pool_output_width = math.floor(conv_output_width / 2)
pool_output_height = math.floor(conv_output_height / 2)
print(pool_output_width, pool_output_height)

In [None]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, channels, kernel_size=kernel_size, stride=stride, padding=padding)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(pool_output_width * pool_output_height * channels, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, pool_output_width * pool_output_height * channels)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = CNN()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, optimizer, criterion):
    train_loss = 0

    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        outputs = torch.squeeze(outputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()

        train_loss += loss.item()*inputs.size(0)

    return train_loss

def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            outputs = torch.squeeze(outputs)
            predicted = (outputs > 0).int()  # Convert logits to binary predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f'total: {total}, correct: {correct}')
    return accuracy


In [None]:
num_epochs = 8
losses = []
accuracies = []
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion)
    accuracy = evaluate(model, test_loader)
    losses.append(train_loss)
    accuracies.append(accuracy*100)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Accuracy: {accuracy*100:.2f}%")
torch.save(model.state_dict(), 'binary_classifier_cnn_letters.pth')

plt.gca().set_ylim(bottom=0, top=100)
plt.plot(accuracies, label='accuracy')
plt.legend()
plt.show()