Import relevant libraries

In [43]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms

Load Dataset

In [45]:

# Define a transform to convert images to tensors
transform = transforms.ToTensor()

# Load the MNIST dataset
mnist_train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders to load the data in batches
data_loader = torch.utils.data.DataLoader(dataset=mnist_train_data, batch_size=64, shuffle=True)
data_loader_test = torch.utils.data.DataLoader(dataset=mnist_test_data, batch_size=10000, shuffle=False)

Define the Autoencoder

In [46]:
# This is the definition of the Autoencoder model using a neural network with encoder and decoder modules.
class Autoencoder2(nn.Module):
    def __init__(self):
        super(Autoencoder2, self).__init__()
        # N, 784
        self.encoder = nn.Sequential(
            nn.Linear(784, 392),
            nn.ReLU(),
            nn.Linear(392, 196),
            nn.ReLU(),
            nn.Linear(196, 98),
            nn.ReLU(),
            nn.Linear(98, 49),
            nn.ReLU(),
            nn.Linear(49, 16),
            nn.ReLU(),
            nn.Linear(16, 2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Linear(16, 49),
            nn.ReLU(),
            nn.Linear(49, 98),
            nn.ReLU(),
            nn.Linear(98, 196),
            nn.ReLU(),
            nn.Linear(196, 392),
            nn.ReLU(),
            nn.Linear(392, 784),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

Define the relevant model parameters

In [47]:
# These lines define the loss function (MSE loss), instantiate the Autoencoder model
# learning rate of 1e-3 and L2 regularization with a weight decay of 1e-5.
criterion = nn.MSELoss()
model = Autoencoder2()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-10, momentum=0.9)

train the model

In [48]:
# train the model over n epochs
num_epochs = 100
encPics = []
labels = []
for epoch in range(num_epochs):
    for (img, _) in data_loader:
        img = img.reshape(-1, 28 * 28)
        recon = model(img)
        loss = criterion(recon, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch + 1}, Loss:{loss.item():.4f}')

Epoch:1, Loss:0.0605
Epoch:2, Loss:0.0683
Epoch:3, Loss:0.0701
Epoch:4, Loss:0.0665
Epoch:5, Loss:0.0652
Epoch:6, Loss:0.0640
Epoch:7, Loss:0.0646
Epoch:8, Loss:0.0615
Epoch:9, Loss:0.0640
Epoch:10, Loss:0.0602
Epoch:11, Loss:0.0604
Epoch:12, Loss:0.0594
Epoch:13, Loss:0.0596
Epoch:14, Loss:0.0617
Epoch:15, Loss:0.0578
Epoch:16, Loss:0.0588
Epoch:17, Loss:0.0559
Epoch:18, Loss:0.0571
Epoch:19, Loss:0.0618
Epoch:20, Loss:0.0583
Epoch:21, Loss:0.0549
Epoch:22, Loss:0.0507
Epoch:23, Loss:0.0540
Epoch:24, Loss:0.0560
Epoch:25, Loss:0.0545
Epoch:26, Loss:0.0431
Epoch:27, Loss:0.0477
Epoch:28, Loss:0.0440
Epoch:29, Loss:0.0494
Epoch:30, Loss:0.0459
Epoch:31, Loss:0.0492
Epoch:32, Loss:0.0444
Epoch:33, Loss:0.0499
Epoch:34, Loss:0.0484
Epoch:35, Loss:0.0499
Epoch:36, Loss:0.0458
Epoch:37, Loss:0.0470
Epoch:38, Loss:0.0404
Epoch:39, Loss:0.0408
Epoch:40, Loss:0.0430
Epoch:41, Loss:0.0437
Epoch:42, Loss:0.0391
Epoch:43, Loss:0.0404
Epoch:44, Loss:0.0383
Epoch:45, Loss:0.0330
Epoch:46, Loss:0.04

Determine the accuracy of the Random Forest classifier on the reduced data

In [49]:
for (img, l) in data_loader_test:
    item = img.reshape(-1, 28 * 28)
    enc = model.encoder(item)
    enc = enc.detach().numpy()
    l = l.detach().numpy()
    encPics.append(enc)
    labels.append(l)

rf = RandomForestClassifier(n_estimators=266, max_depth=11)
encPics = np.reshape(encPics, (-1, 2))
labels = np.reshape(labels, (-1, 1))
labels = labels.ravel()
rf.fit(encPics, labels)
y_pred_1 = rf.predict(encPics)

accscore = accuracy_score(labels, y_pred_1, normalize=True)
print(accscore)

0.8382
