In [12]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import os

In [13]:
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 64)  # 28*28 from image dimension
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [14]:
def train():
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
        trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)
        net = SimpleNet()
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

        for epoch in range(2):  # loop over the dataset 2 times
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data
                optimizer.zero_grad()
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

                if i % 2000 == 1999:  # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                    running_loss = 0.0

        print('Finished Training')
        torch.save(net.state_dict(), 'modelo_mnist.pth')

train()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 77007174.08it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 73415572.01it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 22555693.47it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3058851.76it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

[1,  2000] loss: 0.672
[1,  4000] loss: 0.380
[1,  6000] loss: 0.318
[1,  8000] loss: 0.299
[1, 10000] loss: 0.274
[1, 12000] loss: 0.242
[1, 14000] loss: 0.236
[2,  2000] loss: 0.211
[2,  4000] loss: 0.190
[2,  6000] loss: 0.183
[2,  8000] loss: 0.180
[2, 10000] loss: 0.172
[2, 12000] loss: 0.168
[2, 14000] loss: 0.174
Finished Training


In [15]:
model = SimpleNet()
model.load_state_dict(torch.load('modelo_mnist.pth'))
model.eval()  # Important to switch the model to evaluation mode
images = []

for root, dirs, files in os.walk('data'):
    for file in files:
        base, extension = os.path.splitext(file)
        if extension.lower() == '.jpg':
            full_path = os.path.join(root, file)
            images.append(full_path)


In [19]:
def predict(image):
    # Definimos las transformaciones que se deben aplicar a la imagen para que coincida con el formato MNIST
    transformations = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Aplicamos las transformaciones a la imagen
    img_transformed = transformations(image)

    # Convertimos la imagen para que tenga una dimensión de lote (que PyTorch espera)
    img_batch = img_transformed.unsqueeze(0)  # Agrega un batch dimension en la posición 0

    # Verificamos las ...
    # (Continue your code here, as the last line is incomplete)


In [20]:
from PIL import Image

# Assuming 'images' is a list of PIL Images

# Iterate through the list of images and call predict for each one
for image in images:
    result = predict(image)
    print(result)

In [23]:
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import os
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 64)  # 28*28 from image dimension
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Vamos a usar torchvision para "leer" o predecir los números escritos a mano en el directorio raíz data\img_6.jpg, data\img_65.jpg, data\img_83.jpg
# Carga del modelo
model = SimpleNet()
model.load_state_dict(torch.load('modelo_mnist.pth'))
model.eval()  # Importante para decirle al modelo que ahora está en modo de evaluación

images = []
for root, dirs, files in os.walk('data_prueba'):
    for file in files:
        base, extension = os.path.splitext(file)
        if (extension.lower() == '.jpg'):
           full_path = os.path.join(root, file)
           images.append(full_path)

def predict(image):
    # Definimos las transformaciones que se deben aplicar a la imagen para que coincida con el formato MNIST
    transformations = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Aplicamos las transformaciones a la imagen
    img_transformed = transformations(image)

    # Convertimos la imagen para que tenga una dimensión de lote (que PyTorch espera)
    img_batch = img_transformed.unsqueeze(0)  # Agrega un batch dimension en la posición 0

    # Verificamos las dimensiones de la imagen resultante
    print(img_batch.shape)

    # Hacer la predicción
    with torch.no_grad():  # No necesitamos seguir el rastro de los gradientes
        outputs = model(img_batch)
        _, predicted = torch.max(outputs, 1)

    # El resultado 'predicted' es el índice del dígito que el modelo predice
    print(f'El modelo predice que el dígito es: {predicted.item()}')

for image_path in images:
    image = Image.open(image_path)
    predict(image)

torch.Size([1, 1, 28, 28])
El modelo predice que el dígito es: 2
