In [None]:
import torch.nn as nn

# Neural Network Model (1 hidden layer)
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [None]:
import mnist_reader
import numpy as np
root="./../data"
train_x, train_y = mnist_reader.load_mnist(root+"/fashion",'train')
test_x, test_y = mnist_reader.load_mnist(root+"/fashion", 't10k')

train_x = train_x.astype(np.float32)
train_y = train_y.astype(np.int64)

test_x = test_x.astype(np.float32)
test_y = test_y.astype(np.int64)

In [None]:
import numpy as np
import torch
from torch import nn
from mlp import MLP
from sklearn.model_selection import train_test_split
from skorch import NeuralNetClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy.stats import uniform

input_size = 784
num_classes = 10

net = NeuralNetClassifier(
        MLP,
        criterion=nn.CrossEntropyLoss,
        max_epochs=20,
        module__input_size=input_size,
        module__num_classes=num_classes,
        device='cuda'
    )
params = {
    'net__lr': uniform(loc=0, scale=0.2),
    'net__module__hidden_size': randint(100, 1000),
    'net__optimizer__weight_decay': uniform(loc=0, scale=0.2),
    'net__batch_size': randint(10, 200)
}

model = Pipeline(steps=[("net",net)])

rs = RandomizedSearchCV(model, params, refit=True, cv=3, scoring='accuracy', n_iter=100, n_jobs=-1)

import time
start = time.time()

rs.fit(train_x, train_y)

import datetime
results = open("./results/fashionmnist_mlp_results_{}".format(datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")), "w+")
results.write("train: {}\n{}\n".format(rs.best_score_, rs.best_params_))
print(rs.best_score_, rs.best_params_)

test_score = rs.score(test_x, test_y)
results.write("test: {}\n\n".format(test_score))
print(test_score)
print(time.time() - start)
results.close()

In [None]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable

# Hyper Parameters
hidden_size = 790
num_epochs = 30
batch_size = 70
learning_rate = 0.00086824634389979236
weight_decay= 0.14883558801066421
validation_split=0.1

train_dataset = dsets.FashionMNIST(root=root,
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True,)

test_dataset = dsets.FashionMNIST(root=root,
                           train=False,
                           transform=transforms.ToTensor())

# Creating data indices for training and validation splits:
dataset_size = len(train_dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           sampler=train_sampler)

valid_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           sampler=valid_sampler)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

net = MLP(input_size, hidden_size, num_classes)
net.cuda()

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
train_losses = []
train_accuracies = []
valid_losses = []
valid_accuracies = []
# Train the Model
for epoch in range(num_epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    for i, (images, labels) in enumerate(train_loader):
        # Convert torch tensor to Variable
        images = Variable(images.view(-1, 28 * 28)).cuda()
        labels = Variable(labels).cuda()

        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.cpu()
        loss.backward()
        optimizer.step()

        _, predictions = torch.max(outputs.data, 1)
        epoch_accuracy += (predictions.cpu() == labels.cpu()).sum()
        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_indices) // batch_size, loss.item()))
    train_losses.append(float(epoch_loss / (len(train_indices) // batch_size)))
    train_accuracies.append(100*float(epoch_accuracy) / float(len(train_indices)))

    valid_loss = 0
    valid_accuracy = 0
    for i, (images, labels) in enumerate(valid_loader):
        # Convert torch tensor to Variable
        images = Variable(images.view(-1, 28 * 28)).cuda()
        labels = Variable(labels).cuda()

        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        valid_loss += loss.cpu()

        _, predictions = torch.max(outputs.data, 1)
        valid_accuracy += (predictions.cpu() == labels.cpu()).sum()
    valid_loss = float(valid_loss / float(len(val_indices) // batch_size))
    valid_accuracy = 100 * float(valid_accuracy) / float(len(val_indices))
    print('Validation Loss: %.4f' % valid_loss)
    valid_losses.append(valid_loss)
    valid_accuracies.append(valid_accuracy)

# Test the Model
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images.view(-1, 28 * 28)).cuda()
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted.cpu() == labels).sum()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
# Save the Model
torch.save(net.state_dict(), 'model.pkl')
metrics = np.array([train_losses, train_accuracies, valid_losses, valid_accuracies]).T
np.savetxt('metrics.csv', metrics, delimiter=',')

In [None]:
from sklearn.metrics import confusion_matrix

classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
cm = np.zeros((10,10))
n_confusion_matrices = 10

for i in range(n_confusion_matrices):
    net = NeuralNetClassifier(
            MLP,
            criterion=nn.CrossEntropyLoss,
            max_epochs=num_epochs,
            batch_size=batch_size,
            lr=learning_rate,
            module__input_size=input_size,
            module__hidden_size=hidden_size,
            module__num_classes=num_classes,
            optimizer__weight_decay=weight_decay,
            device='cuda'
        )

    net.fit(train_x, train_y)

    predictions = net.predict(test_x)

    cm += confusion_matrix(test_y, predictions) / n_confusion_matrices

import matplotlib.pyplot as plt
import itertools
def plot_confusion_matrix(cm, classes,
                         normalize=False,
                         title='Confusion matrix',
                         cmap=plt.cm.Blues):
   """
   This function prints and plots the confusion matrix.
   Normalization can be applied by setting `normalize=True`.
   """
   print(cm)

   plt.imshow(cm, interpolation='nearest', cmap=cmap)
   plt.title(title)
   plt.colorbar()
   tick_marks = np.arange(len(classes))
   plt.xticks(tick_marks, classes, rotation=45)
   plt.yticks(tick_marks, classes)

   fmt = 'd'
   thresh = cm.max() / 2.
   for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
       plt.text(j, i, format(cm[i, j], fmt),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")

   plt.tight_layout()
   plt.ylabel('True label')
   plt.xlabel('Predicted label')

from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})

plt.figure()
plot_confusion_matrix(cm.astype(np.int64), classes=classes)

plt.savefig("./results/confusion_matrix.png")
plt.show()