Import relevant libraries

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms

Load Dataset

In [6]:
# Define a transform to convert images to tensors
transform = transforms.ToTensor()

# Load the MNIST dataset
mnist_train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders to load the data in batches
data_loader = torch.utils.data.DataLoader(dataset=mnist_train_data, batch_size=64, shuffle=True)
data_loader_test = torch.utils.data.DataLoader(dataset=mnist_test_data, batch_size=10000, shuffle=False)

Define the Autoencoder

In [7]:
# This is the definition of the Autoencoder model using a neural network with encoder and decoder modules.
class Autoencoder300D(nn.Module):
    def __init__(self):
        super(Autoencoder300D, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(784, 500),
            nn.ReLU(),
            nn.Linear(500, 400),
            nn.ReLU(),
            nn.Linear(400, 300)
        )

        self.decoder = nn.Sequential(
            nn.Linear(300, 400),
            nn.ReLU(),
            nn.Linear(400, 500),
            nn.ReLU(),
            nn.Linear(500, 784),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

Define the relevant model parameters

In [8]:
# These lines define the loss function (MSE loss), instantiate the Autoencoder model
# learning rate of 1e-3 and L2 regularization with a weight decay of 1e-5.
criterion = nn.MSELoss()
model = Autoencoder300D()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-10, momentum=0.9)

train the model

In [9]:
# train the model over n epochs
num_epochs = 100
encPics = []
labels = []
for epoch in range(num_epochs):
    for (img, _) in data_loader:
        img = img.reshape(-1, 28 * 28)
        recon = model(img)
        loss = criterion(recon, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch + 1}, Loss:{loss.item():.4f}')

Epoch:1, Loss:0.0696
Epoch:2, Loss:0.0622
Epoch:3, Loss:0.0619
Epoch:4, Loss:0.0633
Epoch:5, Loss:0.0556
Epoch:6, Loss:0.0582
Epoch:7, Loss:0.0548
Epoch:8, Loss:0.0429
Epoch:9, Loss:0.0472
Epoch:10, Loss:0.0392
Epoch:11, Loss:0.0384
Epoch:12, Loss:0.0311
Epoch:13, Loss:0.0301
Epoch:14, Loss:0.0287
Epoch:15, Loss:0.0277
Epoch:16, Loss:0.0263
Epoch:17, Loss:0.0228
Epoch:18, Loss:0.0213
Epoch:19, Loss:0.0218
Epoch:20, Loss:0.0242
Epoch:21, Loss:0.0203
Epoch:22, Loss:0.0200
Epoch:23, Loss:0.0183
Epoch:24, Loss:0.0188
Epoch:25, Loss:0.0202
Epoch:26, Loss:0.0188
Epoch:27, Loss:0.0184
Epoch:28, Loss:0.0186
Epoch:29, Loss:0.0162
Epoch:30, Loss:0.0156
Epoch:31, Loss:0.0186
Epoch:32, Loss:0.0174
Epoch:33, Loss:0.0143
Epoch:34, Loss:0.0155
Epoch:35, Loss:0.0149
Epoch:36, Loss:0.0165
Epoch:37, Loss:0.0135
Epoch:38, Loss:0.0126
Epoch:39, Loss:0.0129
Epoch:40, Loss:0.0142
Epoch:41, Loss:0.0108
Epoch:42, Loss:0.0108
Epoch:43, Loss:0.0104
Epoch:44, Loss:0.0136
Epoch:45, Loss:0.0130
Epoch:46, Loss:0.01

Determine the accuracy of the Random Forest classifier on the reduced data

In [10]:
for (img, l) in data_loader_test:
    item = img.reshape(-1, 28 * 28)
    enc = model.encoder(item)
    enc = enc.detach().numpy()
    l = l.detach().numpy()
    encPics.append(enc)
    labels.append(l)

rf = RandomForestClassifier(n_estimators=266, max_depth=11)
encPics = np.reshape(encPics, (-1, 300))
labels = np.reshape(labels, (-1, 1))
labels = labels.ravel()
rf.fit(encPics, labels)
y_pred_1 = rf.predict(encPics)

accscore = accuracy_score(labels, y_pred_1, normalize=True)
print(accscore)

0.9959
