In [1]:
import idx2numpy
import cv2
import numpy as np
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

In [2]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

In [4]:
train_images = idx2numpy.convert_from_file('data/raw/train-images-idx3-ubyte')
train_labels = idx2numpy.convert_from_file('data/raw/train-labels-idx1-ubyte')
test_images = idx2numpy.convert_from_file('data/raw/t10k-images-idx3-ubyte')
test_labels = idx2numpy.convert_from_file('data/raw/t10k-labels-idx1-ubyte')

In [5]:
# Check if cuda is avilable in torch
if torch.cuda.is_available():
    print("Detect GPU: {}".format(torch.cuda.get_device_name()))
    device = torch.device('cuda')
else:
    print('No GPU Detected')
    device = torch.device('cpu')

Detect GPU: GeForce GTX 1080


In [6]:
batch_size = 64
epochs = 25
learning_rate = 1e-04

# Prepare data

In [7]:
class FashinMnistDataSet(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        images = self.images[idx, :]
        images = images / 255.
        images = np.expand_dims(images, axis=0)
        label = self.labels[idx]
        
        data = {
            'images': torch.tensor(images, dtype=torch.float), 'label': torch.tensor(label, dtype=torch.long)
        }
        return data

In [8]:
train_set = FashinMnistDataSet(train_images, train_labels)
test_set = FashinMnistDataSet(test_images, test_labels)

In [9]:
train_sampler = RandomSampler(train_set)
training_loader = DataLoader(train_set, sampler=train_sampler, batch_size=batch_size)

test_sample = SequentialSampler(test_set)
test_loader = DataLoader(test_set, sampler=test_sample, batch_size=batch_size)

# Build model

In [10]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.conv3 = nn.Conv2d(64, 128, 3, 1)
        
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        
        self.fc1 = nn.Linear(3200, 256)
        self.fc2 = nn.Linear(256, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        
        return output

In [11]:
model = SimpleCNN()
model.to(device)

SimpleCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=3200, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [12]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
criterion = nn.CrossEntropyLoss()

for i in tqdm(range(epochs)):
    # Training
    model.train()
    running_loss = 0
    for index, data in enumerate(training_loader):
        images = data['images'].to(device)
        labels = data['label'].to(device)
        
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        
        loss.backward()
        
        optimizer.step()
        running_loss += loss.item()
        
    print('[Epoch %d] loss: %.3f' %
                      (i + 1, running_loss/len(training_loader)))
    
    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for index, data in enumerate(test_loader):
            images, labels = data['images'].to(device), data['label']
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.detach().cpu()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
                
    print('Accuracy of the network on test images: %0.3f %%' % (
        100 * correct / total))

HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))

[Epoch 1] loss: 0.810
Accuracy of the network on test images: 78.840 %
[Epoch 2] loss: 0.518
Accuracy of the network on test images: 83.900 %
[Epoch 3] loss: 0.450
Accuracy of the network on test images: 85.250 %
[Epoch 4] loss: 0.406
Accuracy of the network on test images: 86.550 %
[Epoch 5] loss: 0.376
Accuracy of the network on test images: 87.320 %
[Epoch 6] loss: 0.353
Accuracy of the network on test images: 87.870 %
[Epoch 7] loss: 0.335
Accuracy of the network on test images: 88.420 %
[Epoch 8] loss: 0.321
Accuracy of the network on test images: 88.630 %
[Epoch 9] loss: 0.310
Accuracy of the network on test images: 89.270 %
[Epoch 10] loss: 0.299
Accuracy of the network on test images: 89.540 %
[Epoch 11] loss: 0.291
Accuracy of the network on test images: 89.800 %
[Epoch 12] loss: 0.283
Accuracy of the network on test images: 90.090 %
[Epoch 13] loss: 0.273
Accuracy of the network on test images: 90.210 %
[Epoch 14] loss: 0.267
Accuracy of the network on test images: 90.430 %
[

# Evaluate model

In [13]:
batch_inf_imgs = []
for test_image in test_images:
    test_image = test_image / 255.
    test_image = np.expand_dims(test_image, axis=0)
    batch_inf_imgs.append(test_image)

In [14]:
batch_inf_imgs = torch.tensor(batch_inf_imgs, dtype=torch.float).to(device)
model.eval()
with torch.no_grad():
    y_pred = model(batch_inf_imgs)
    _, y_pred = torch.max(y_pred.data, 1)

    y_pred = y_pred.detach().cpu().numpy()

y_true = test_labels

In [15]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.88      0.87      1000
           1       1.00      0.98      0.99      1000
           2       0.89      0.88      0.89      1000
           3       0.91      0.93      0.92      1000
           4       0.86      0.89      0.87      1000
           5       0.98      0.98      0.98      1000
           6       0.78      0.74      0.76      1000
           7       0.95      0.97      0.96      1000
           8       0.98      0.98      0.98      1000
           9       0.98      0.96      0.97      1000

    accuracy                           0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000



# Save model as torch

In [18]:
torch.save(model.state_dict(), 'simple_cnn.pth')

# Convert to ONNX

In [16]:
x = torch.randn(batch_size, 1, 28, 28, requires_grad=True).to(device)
output = model(x)

In [17]:
torch.onnx.export(
    model,
    x, 'fashion_mnist.onnx', export_params=True,
    opset_version=10, do_constant_folding=True,
    input_names=['input'], output_names=['output'],
     dynamic_axes={'input' : {0 : 'batch_size'},    # variable lenght axes
                'output' : {0 : 'batch_size'}}
)