In [1]:
# Imports
import os
import random
from glob import glob
import cv2 
import matplotlib.pylab as plt
import seaborn as sns
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix

import argparse
#import torch
#import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [12]:
class CustomImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx].astype(np.uint8)
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

# Load the data(images)
x_npz = np.load("data/x_images_arrays.npz")
X = x_npz["arr_0"]
y_npz = np.load("data/y_labels_arrays.npz")
Y = y_npz["arr_0"]

print(np.array(X).shape)
print(np.array(Y).shape)

# Separete the data into train, val and test sets
# 80% train, 10% val, 10% test
x_train, x_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.2,random_state=1, stratify=Y)

x_test, x_val, y_test, y_val = train_test_split(x_temp, y_temp, test_size=0.5, random_state=1, stratify=y_temp)

print(np.array(x_train).shape)
print(np.array(x_val).shape)
print(np.array(x_test).shape)
# (3045, 128, 128, 3)
# (381, 128, 128, 3)
# (381, 128, 128, 3)

# Transform the data to tensor
# Define a transform
transform = transforms.Compose([
            #transforms.Resize((28, 28)),
            transforms.ToPILImage(),
            transforms.Grayscale(),
            transforms.ToTensor(),
            transforms.Normalize((0,), (1,))])

# Apply the transform to the data
dataset_train = CustomImageDataset(x_train, y_train, transform=transform)
dataset_val = CustomImageDataset(x_val, y_val, transform=transform)
dataset_test = CustomImageDataset(x_val, y_val, transform=transform)

img, label = dataset_train[0]
print(img.shape, label) # [grayscale=1, size=128, size=128] label=0 ('Nothing')
print(dataset_train.__len__()) # 3045 images
print(dataset_val.__len__()) # 381 images
print(dataset_test.__len__()) # 381 images

# Load into the DataLoader
batch_size = 32

train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

(3807, 128, 128, 3)
(3807,)
(3045, 128, 128, 3)
(381, 128, 128, 3)
(381, 128, 128, 3)
torch.Size([1, 128, 128]) 0
3045
381
381


In [None]:
from collections import Counter

class_counts_train = Counter(y_train)
print("Quantidade de imagens por classe no conjunto de treino:")
for class_id, count in class_counts_train.items():
    print(f"Classe {class_id}: {count} imagens")

class_counts_test = Counter(y_val)
print("Quantidade de imagens por classe no conjunto de teste:")
for class_id, count in class_counts_test.items():
    print(f"Classe {class_id}: {count} imagens")

Quantidade de imagens por classe no conjunto de treino:
Classe 0: 2435 imagens
Classe 2: 402 imagens
Classe 1: 208 imagens
Quantidade de imagens por classe no conjunto de treino:
Classe 0: 304 imagens
Classe 1: 26 imagens
Classe 2: 51 imagens


In [8]:
# Network Architecture
entry_channels = 1
exit_channels = 16
kernel_size = 3
stride = 1
num_inputs = 128*128
num_hidden = 1000
num_outputs = 3 # classes

# Use GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(entry_channels, exit_channels, kernel_size, stride, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(exit_channels*num_inputs, num_hidden)
        self.fc2 = nn.Linear(num_hidden, num_outputs)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)  # Ativação após conv1
        #x = F.max_pool2d(x, 2)  # Reduz dimensões (opcional, ajusta tamanho)
        x = torch.flatten(x, 1)  # Achatamento
        x = self.fc1(x)
        x = F.relu(x)  # Ativação após fc1
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)  # Saída com softmax
        return output
    
model = Net().to(device)

In [9]:
PATH = './models/cnn1.pth'
model.load_state_dict(torch.load(PATH))
model.eval()
print(f"Model loaded from {PATH}")

Model loaded from ./models/cnn1.pth


In [10]:
# Função para calcular métricas
def calculate_metrics(y_true, y_pred):
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro') # average = 'weighted' | 'micro' | 'macro'
    return precision, recall, f1

# Listas para armazenar previsões e rótulos verdadeiros
all_preds = []
all_targets = []

# Avaliação do modelo
total = 0
correct = 0

with torch.no_grad():
    model.eval()
    for data, targets in test_loader:
        data = data.to(device)
        targets = targets.to(device)

        # Forward pass
        test_data = model(data)

        # Previsões
        _, predicted = test_data.max(1)

        # Acumular acurácia
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

        # Armazenar previsões e rótulos para F1 e matriz de confusão
        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Calcular acurácia
print(f"Total correctly classified test set images: {correct}/{total}")
print(f"Test Set Accuracy: {100 * correct / total:.2f}%")

# Calcular precisão, recall e F1-score
precision, recall, f1 = calculate_metrics(all_targets, all_preds) 
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

# Calcular e exibir a matriz de confusão
conf_matrix = confusion_matrix(all_targets, all_preds)
print("\nMatriz de Confusão:")
print(conf_matrix)

Total correctly classified test set images: 304/381
Test Set Accuracy: 79.79%
Precision: 0.27
Recall: 0.33
F1-score: 0.30

Matriz de Confusão:
[[304   0   0]
 [ 26   0   0]
 [ 51   0   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
