### To implement some functions of this activity I have used the help of LLM

## Library import

In [1]:
import cv2
import onnx
import onnxruntime as ort
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets


## CNN Architecture

In [2]:
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(1600, 128)  # Corrected input size
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.max_pool2d(x, 2)
        x = nn.functional.relu(self.conv2(x))
        x = nn.functional.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x


## Data processing

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)


100%|██████████| 26.4M/26.4M [00:01<00:00, 22.0MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 601kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 9.82MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 6.72MB/s]


## Model configuration

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FashionCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


## Training function

In [5]:
def train():
    model.train()
    for epoch in range(10):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train()


Epoch 1, Loss: 0.3991537094116211
Epoch 2, Loss: 0.2903968095779419
Epoch 3, Loss: 0.21084898710250854
Epoch 4, Loss: 0.25525325536727905
Epoch 5, Loss: 0.08852118253707886
Epoch 6, Loss: 0.22669202089309692
Epoch 7, Loss: 0.1123412549495697
Epoch 8, Loss: 0.03783678635954857
Epoch 9, Loss: 0.12392964214086533
Epoch 10, Loss: 0.07214470207691193


## ONNX Export

In [6]:
x = torch.randn(1, 1, 28, 28, device=device)
torch.onnx.export(model, x, "fashion_mnist_cnn.onnx", export_params=True, opset_version=11)


## OpenCV implementation

In [None]:
class_labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

session = ort.InferenceSession("fashion_mnist_cnn.onnx")
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (28, 28))
    normalized = resized.astype(np.float32) / 255.0
    input_tensor = normalized.reshape(1, 1, 28, 28)
    preds = session.run([output_name], {input_name: input_tensor})[0]
    label = np.argmax(preds)
    label_name = class_labels[label]
    cv2.putText(frame, f'{label_name}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow('Fashion MNIST Real-time', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


2025-02-19 12:54:19.947 python[97592:4175363] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-19 12:54:19.947 python[97592:4175363] +[IMKInputSession subclass]: chose IMKInputSession_Modern


: 