In [6]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchvision import transforms
from torchvision import datasets

In [7]:
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

valid_transforms = transforms.Compose([transforms.Resize(255),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

train_dataset = datasets.ImageFolder('../gestos/train', transform=train_transforms)
valid_dataset = datasets.ImageFolder('../gestos/valid', transform=valid_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset,num_workers=8, batch_size=32, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset,num_workers=8, batch_size=32, shuffle=False)
from torchvision import models

model = models.resnet18(pretrained=True, progress=True)

In [3]:
#Congelamos todos los parámetros
for param in model.parameters(): 
    param.requires_grad = False

# Recuperamos el número de neuronas de la última capa
neurons = model.fc.in_features 
# La reemplazamos por una nueva capa de salida
model.fc = torch.nn.Linear(neurons, 4) 

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1):
    i=0
    for x, y in train_loader:
        if i > 10:
            break
        i = i + 1
        optimizer.zero_grad()
        yhat = model.forward(x)
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()

    epoch_loss = 0.0
    i=0
    for x, y in valid_loader:
        if i > 10:
            break
        i = i + 1
        yhat = model.forward(x)
        loss = criterion(yhat, y)
        epoch_loss += loss.item()
    print(epoch, epoch_loss)

0 18.04896903038025


In [4]:
targets, predictions = [], []
for mbdata, label in valid_loader:
    logits = model.forward(mbdata)
    predictions.append(logits.argmax(dim=1).detach().numpy())
    targets.append(label.numpy())
predictions = np.concatenate(predictions)
targets = np.concatenate(targets)

from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(targets, predictions)
display(cm)

print(classification_report(targets, predictions))

array([[ 15,  62, 343,  80],
       [ 22,  72, 306, 100],
       [ 13,  55, 349,  83],
       [ 17,  63, 337,  83]])

              precision    recall  f1-score   support

           0       0.22      0.03      0.05       500
           1       0.29      0.14      0.19       500
           2       0.26      0.70      0.38       500
           3       0.24      0.17      0.20       500

    accuracy                           0.26      2000
   macro avg       0.25      0.26      0.21      2000
weighted avg       0.25      0.26      0.21      2000

