### To implement some functions of this activity I have used the help of LLM

My personal github repo for this subject: https://github.com/carlos89p/Advanced-Machine-Learning

## Library import

In [6]:
import cv2
import onnx
import onnxruntime as ort
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import torch.nn.functional as F



## CNN Architecture

In [7]:
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        
        self.pool = nn.MaxPool2d(2, 2)
        
        # Cálculo automático del tamaño de entrada a fc1
        self._to_linear = None
        self._get_flatten_size()
        
        self.fc1 = nn.Linear(self._to_linear, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 10)  # 10 clases en FashionMNIST

    def _get_flatten_size(self):
        """Calcula el tamaño de entrada para fc1 ejecutando una pasada con datos de prueba."""
        with torch.no_grad():
            sample_input = torch.zeros(1, 1, 28, 28)  # Imagen de prueba 1x28x28
            output = self.pool(F.relu(self.bn1(self.conv1(sample_input))))
            output = self.pool(F.relu(self.bn2(self.conv2(output))))
            output = self.pool(F.relu(self.bn3(self.conv3(output))))
            output = self.pool(F.relu(self.bn4(self.conv4(output))))
            self._to_linear = output.numel()

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        
        x = x.view(x.size(0), -1)  # Aplanar correctamente
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


## Data processing

In [8]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)


## Model configuration

In [9]:
device = torch.device("cpu")
model = FashionCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


## Training function

In [10]:
def train():
    model.train()
    for epoch in range(20):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train()


Epoch 1, Loss: 0.3308822810649872
Epoch 2, Loss: 0.17786630988121033
Epoch 3, Loss: 0.17027172446250916
Epoch 4, Loss: 0.036168817430734634
Epoch 5, Loss: 0.31223368644714355
Epoch 6, Loss: 0.08663536608219147
Epoch 7, Loss: 0.15267667174339294
Epoch 8, Loss: 0.13604311645030975
Epoch 9, Loss: 0.019858337938785553
Epoch 10, Loss: 0.022205373272299767
Epoch 11, Loss: 0.1941538155078888
Epoch 12, Loss: 0.041313111782073975
Epoch 13, Loss: 0.04856882244348526
Epoch 14, Loss: 0.002854670397937298
Epoch 15, Loss: 0.105343297123909
Epoch 16, Loss: 0.19301873445510864
Epoch 17, Loss: 0.012712798081338406
Epoch 18, Loss: 0.0567750558257103
Epoch 19, Loss: 0.001072875689715147
Epoch 20, Loss: 0.006679262034595013


## ONNX Export

In [None]:
x = torch.randn(1, 1, 28, 28, device=device)
torch.onnx.export(model, x, "fashion_mnist_cnn_prueba.onnx", export_params=True, opset_version=11)


## OpenCV implementation

In [None]:
class_labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

session = ort.InferenceSession("fashion_mnist_cnn_prueba.onnx")
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (28, 28))
    normalized = resized.astype(np.float32) / 255.0
    input_tensor = normalized.reshape(1, 1, 28, 28)
    preds = session.run([output_name], {input_name: input_tensor})[0]
    label = np.argmax(preds)
    label_name = class_labels[label]
    cv2.putText(frame, f'{label_name}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow('Fashion MNIST Real-time', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


2025-02-20 15:20:01.753 python[2643:4481887] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-20 15:20:01.753 python[2643:4481887] +[IMKInputSession subclass]: chose IMKInputSession_Modern


: 