Import relevant libraries

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms

Load Dataset

In [13]:
# Define a transform to convert images to tensors
transform = transforms.ToTensor()

# Load the MNIST dataset
mnist_train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders to load the data in batches
data_loader = torch.utils.data.DataLoader(dataset=mnist_train_data, batch_size=64, shuffle=True)
data_loader_test = torch.utils.data.DataLoader(dataset=mnist_test_data, batch_size=10000, shuffle=False)

Define the Autoencoder

In [14]:
# This is the definition of the Autoencoder model using a neural network with encoder and decoder modules.
class Autoencoder200D(nn.Module):
    def __init__(self):
        super(Autoencoder200D, self).__init__()
        # N, 784
        self.encoder = nn.Sequential(
            nn.Linear(784, 400),
            nn.ReLU(),
            nn.Linear(400, 300),
            nn.ReLU(),
            nn.Linear(300, 200)
        )

        self.decoder = nn.Sequential(
            nn.Linear(200, 300),
            nn.ReLU(),
            nn.Linear(300, 400),
            nn.ReLU(),
            nn.Linear(400, 784),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

Define the relevant model parameters

In [15]:
# These lines define the loss function (MSE loss), instantiate the Autoencoder model
# learning rate of 1e-3 and L2 regularization with a weight decay of 1e-5.
criterion = nn.MSELoss()
model = Autoencoder200D()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-10, momentum=0.9)

train the model

In [16]:
# train the model over n epochs
num_epochs = 100
encPics = []
labels = []
for epoch in range(num_epochs):
    for (img, _) in data_loader:
        img = img.reshape(-1, 28 * 28)
        recon = model(img)
        loss = criterion(recon, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch + 1}, Loss:{loss.item():.4f}')

Epoch:1, Loss:0.0737
Epoch:2, Loss:0.0639
Epoch:3, Loss:0.0582
Epoch:4, Loss:0.0567
Epoch:5, Loss:0.0609
Epoch:6, Loss:0.0511
Epoch:7, Loss:0.0472
Epoch:8, Loss:0.0442
Epoch:9, Loss:0.0397
Epoch:10, Loss:0.0338
Epoch:11, Loss:0.0327
Epoch:12, Loss:0.0289
Epoch:13, Loss:0.0278
Epoch:14, Loss:0.0289
Epoch:15, Loss:0.0271
Epoch:16, Loss:0.0240
Epoch:17, Loss:0.0240
Epoch:18, Loss:0.0240
Epoch:19, Loss:0.0206
Epoch:20, Loss:0.0237
Epoch:21, Loss:0.0206
Epoch:22, Loss:0.0223
Epoch:23, Loss:0.0223
Epoch:24, Loss:0.0232
Epoch:25, Loss:0.0211
Epoch:26, Loss:0.0213
Epoch:27, Loss:0.0193
Epoch:28, Loss:0.0164
Epoch:29, Loss:0.0172
Epoch:30, Loss:0.0160
Epoch:31, Loss:0.0181
Epoch:32, Loss:0.0152
Epoch:33, Loss:0.0166
Epoch:34, Loss:0.0153
Epoch:35, Loss:0.0182
Epoch:36, Loss:0.0167
Epoch:37, Loss:0.0154
Epoch:38, Loss:0.0151
Epoch:39, Loss:0.0151
Epoch:40, Loss:0.0137
Epoch:41, Loss:0.0133
Epoch:42, Loss:0.0147
Epoch:43, Loss:0.0117
Epoch:44, Loss:0.0118
Epoch:45, Loss:0.0114
Epoch:46, Loss:0.01

Determine the accuracy of the Random Forest classifier on the reduced data

In [17]:
for (img, l) in data_loader_test:
    item = img.reshape(-1, 28 * 28)
    enc = model.encoder(item)
    enc = enc.detach().numpy()
    l = l.detach().numpy()
    encPics.append(enc)
    labels.append(l)

rf = RandomForestClassifier(n_estimators=266, max_depth=11)
encPics = np.reshape(encPics, (-1, 200))
labels = np.reshape(labels, (-1, 1))
labels = labels.ravel()
rf.fit(encPics, labels)
y_pred_1 = rf.predict(encPics)

accscore = accuracy_score(labels, y_pred_1, normalize=True)
print(accscore)

0.9966
