In [125]:
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torch


In [126]:
args = {
    'epoch_num': 5,
    'lr': 1e-3,
    'weight_decay': 8e-4,
    'batch_size': 20
}

if torch.cuda.is_available():
    args['device'] = torch.device('cuda')
else:
    args['device'] = torch.device('cpu')

print(f'Using device: {args["device"]}')



Using device: cuda


In [127]:
data_transforms_vgg16 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_set = datasets.ImageFolder(root='./training_set/training_set', transform=data_transforms_vgg16)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)

test_set = datasets.ImageFolder(root='./test_set/test_set', transform=data_transforms_vgg16)

test_loader = DataLoader(test_set, batch_size=64, shuffle=True)

print(f'Classes: {train_set.classes}')


Classes: ['cats', 'dogs']


In [128]:
net = models.vgg16_bn(pretrained=True)

print(net)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [129]:
print(list(net.named_children())[-1])

net.classifier[6] = nn.Linear(4096, 2)

net.to(args['device'])

print(list(net.named_children())[-1])

('classifier', Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
))
('classifier', Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=2, bias=True)
))


In [130]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

In [133]:
def train(net, train_loader, optimizer, criterion, device):
    net.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for epoch in range(args['epoch_num']):
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Epoch {epoch+1}/{args["epoch_num"]}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100*correct/total:.2f}%')


def test(net, test_loader, device):
    net.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the test images: {100*correct/total:.2f}%')


In [134]:
train(net, train_loader, optimizer, criterion, args['device'])

Epoch 1/5, Loss: 0.4065, Accuracy: 89.02%
Epoch 2/5, Loss: 0.7111, Accuracy: 87.58%
Epoch 3/5, Loss: 0.8604, Accuracy: 89.71%
Epoch 4/5, Loss: 0.9982, Accuracy: 90.86%
Epoch 5/5, Loss: 1.1191, Accuracy: 91.66%


In [136]:
test(net, test_loader, args['device'])

Accuracy of the network on the test images: 93.97%


In [145]:
from PIL import Image
from torch.nn.functional import softmax

def predict_image_with_probabilities(image_path, model, device, class_names):
    """
    Realiza a inferência em uma imagem, retornando a probabilidade de cada classe.

    Args:
        image_path (str): Caminho para a imagem.
        model (torch.nn.Module): Modelo treinado.
        device (torch.device): CPU ou GPU.
        class_names (list): Lista com os nomes das classes.

    Returns:
        dict: Um dicionário contendo:
            - Nome da classe prevista.
            - Probabilidade da classe prevista.
            - Todas as probabilidades para cada classe.
    """
    # Transformações para a imagem (iguais ao treinamento)
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Tamanho esperado pelo VGG16
        transforms.ToTensor(),         # Converte a imagem para tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalização
    ])

    # Carrega a imagem
    image = Image.open(image_path).convert('RGB')  # Converte para RGB
    image = transform(image)  # Aplica as transformações
    image = image.unsqueeze(0).to(device)  # Adiciona dimensão do batch e move para o device

    # Coloca o modelo em modo de avaliação
    model.eval()
    
    with torch.no_grad():  # Desativa cálculo de gradientes
        outputs = model(image)  # Forward pass
        probabilities = softmax(outputs, dim=1)  # Calcula as probabilidades
        top_prob, top_class = torch.max(probabilities, 1)  # Obtém a classe com maior probabilidade

    # Retorna as probabilidades e a classe prevista
    return {
        "predicted_class": class_names[top_class.item()],
        "predicted_probability": top_prob.item(),
        "all_probabilities": {class_names[i]: probabilities[0][i].item() for i in range(len(class_names))}
    }


In [155]:
inference = predict_image_with_probabilities('lince.jpg', net, args['device'], train_set.classes)

print(inference)


{'predicted_class': 'cats', 'predicted_probability': 1.0, 'all_probabilities': {'cats': 1.0, 'dogs': 4.208379250549704e-13}}
