In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
import os
from PIL import Image


class HandwritingDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.images = self.get_image_paths_and_labels()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path, label = self.images[idx]
        image = Image.open(image_path).convert("L")  # Convert to grayscale
        if self.transform:
            image = self.transform(image)
        return image, label

    def get_image_paths_and_labels(self):
        image_paths = []
        labels = []
        classes = os.listdir(self.image_dir)
        for idx, class_name in enumerate(classes):
            class_dir = os.path.join(self.image_dir, class_name)
            for filename in os.listdir(class_dir):
                if filename.endswith(".png"):  # Assuming your images are PNGs
                    image_paths.append(os.path.join(class_dir, filename))
                    labels.append(idx)
        print("Classes:", classes)
        print("Number of images:", len(image_paths))
        print("Number of labels:", len(labels))
        print("Labels:", np.unique(labels))
        print("Label counts:", np.bincount(labels))

        # self.classes = classes
        # self.labels = labels
        self.labels_map = {i: label for i, label in enumerate(classes)}
        return list(zip(image_paths, labels))


# Define transforms
transform = transforms.Compose(
    [
        transforms.Resize((28, 28)),  # Adjust size as needed
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)),
    ]
)

# Initialize dataset
dataset = HandwritingDataset("Imagens rotacionadas", transform=transform)

Classes: ['Italo', 'Ste', 'Gustavo']
Number of images: 70
Number of labels: 70
Labels: [0 1 2]
Label counts: [28 28 14]


In [136]:
# Split the dataset into a training set and a validation set
val_ratio = 0.05  # 20% of the data will be used for validation
batch_size = 32

val_size = int(len(dataset) * val_ratio)
train_size = len(dataset) - val_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders for the training set and the validation set
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [137]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [145]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 3)  # TODO: 3 classes for person A, B, C

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [124]:
class SimpleCNN(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, pool_size, fc1_out_features, fc2_out_features):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
        self.pool = nn.MaxPool2d(kernel_size=pool_size, stride=pool_size)
        self.conv2 = nn.Conv2d(out_channels, out_channels*2, kernel_size=kernel_size, stride=stride, padding=padding)
        self.fc1 = nn.Linear(out_channels*2 * 7 * 7, fc1_out_features)
        self.fc2 = nn.Linear(fc1_out_features, fc2_out_features)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.fc1.in_features)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [98]:
cuda_device = torch.cuda.current_device()
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(cuda_device))
print(torch.cuda.get_device_properties(cuda_device))
print(torch.cuda.is_available())
print(torch.cuda.memory_allocated())
print(torch.cuda.memory_reserved())

1
NVIDIA GeForce RTX 3050 Ti Laptop GPU
_CudaDeviceProperties(name='NVIDIA GeForce RTX 3050 Ti Laptop GPU', major=8, minor=6, total_memory=3897MB, multi_processor_count=20)
True
18727424
119537664


In [99]:
def evaluate(model, data_loader, device, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, preds = torch.max(outputs, dim=1)
            correct += (preds == labels).sum().item()
    accuracy = correct / len(data_loader.dataset)
    return running_loss / len(data_loader), accuracy

def train(train_loader: DataLoader, val_loader: DataLoader, model: nn.Module, num_epochs: int):
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            # Move data to the GPU
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()  # Zero the gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights
            running_loss += loss.item()

        val_loss, val_acc = evaluate(model, val_loader, device, criterion)
        print(f'Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss}, Val Acc: {val_acc}')

In [148]:
model = SimpleCNN(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1, pool_size=2, fc1_out_features=128, fc2_out_features=len(dataset.labels_map)).to(device)
# model = SimpleCNN().to(device)
train(train_loader, val_loader, model, 10)

TypeError: __init__() got an unexpected keyword argument 'in_channels'

In [139]:
def predict(model, image):
    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, pred = torch.max(outputs, dim=1)
    return pred.item()

In [140]:
for file in os.listdir("validar"):
    image_path = "validar/" + file
    image = Image.open(image_path).convert('L')  # Convert to grayscale
    image = transform(image).to(device)
    prediction = predict(model, image)
    print(f"{file} --- {dataset.labels_map[prediction]}")

Gustavo1.jpeg --- Gustavo
Gustavo2.jpeg --- Gustavo
Italo2.jpeg --- Italo
Ste2.jpeg --- Ste
Italo1.jpeg --- Gustavo
Ste1.jpeg --- Ste


3