Import relevant libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms

Load Dataset

In [2]:
# Define a transform to convert images to tensors
transform = transforms.ToTensor()

# Load the MNIST dataset
mnist_train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders to load the data in batches
data_loader = torch.utils.data.DataLoader(dataset=mnist_train_data, batch_size=64, shuffle=True)
data_loader_test = torch.utils.data.DataLoader(dataset=mnist_test_data, batch_size=10000, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 78567629.83it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 60236545.91it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 27352296.45it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 10684536.61it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Define the Autoencoder

In [3]:
# This is the definition of the Autoencoder model using a neural network with encoder and decoder modules.
class Autoencoder700D(nn.Module):
    def __init__(self):
        super(Autoencoder700D, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(784, 760),
            nn.ReLU(),
            nn.Linear(760, 730),
            nn.ReLU(),
            nn.Linear(730, 700)
        )

        self.decoder = nn.Sequential(
            nn.Linear(700, 730),
            nn.ReLU(),
            nn.Linear(730, 760),
            nn.ReLU(),
            nn.Linear(760, 784),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

Define the relevant model parameters

In [4]:
# These lines define the loss function (MSE loss), instantiate the Autoencoder model
# learning rate of 1e-3 and L2 regularization with a weight decay of 1e-5.
criterion = nn.MSELoss()
model = Autoencoder700D()
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-10, momentum=0.9)

train the model

In [5]:
# train the model over n epochs
num_epochs = 100
encPics = []
labels = []
for epoch in range(num_epochs):
    for (img, _) in data_loader:
        img = img.reshape(-1, 28 * 28)
        recon = model(img)
        loss = criterion(recon, img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch + 1}, Loss:{loss.item():.4f}')

Epoch:1, Loss:0.0645
Epoch:2, Loss:0.0605
Epoch:3, Loss:0.0646
Epoch:4, Loss:0.0627
Epoch:5, Loss:0.0585
Epoch:6, Loss:0.0494
Epoch:7, Loss:0.0431
Epoch:8, Loss:0.0371
Epoch:9, Loss:0.0377
Epoch:10, Loss:0.0297
Epoch:11, Loss:0.0276
Epoch:12, Loss:0.0310
Epoch:13, Loss:0.0250
Epoch:14, Loss:0.0286
Epoch:15, Loss:0.0300
Epoch:16, Loss:0.0217
Epoch:17, Loss:0.0245
Epoch:18, Loss:0.0227
Epoch:19, Loss:0.0182
Epoch:20, Loss:0.0215
Epoch:21, Loss:0.0235
Epoch:22, Loss:0.0166
Epoch:23, Loss:0.0165
Epoch:24, Loss:0.0191
Epoch:25, Loss:0.0169
Epoch:26, Loss:0.0179
Epoch:27, Loss:0.0134
Epoch:28, Loss:0.0135
Epoch:29, Loss:0.0138
Epoch:30, Loss:0.0163
Epoch:31, Loss:0.0137
Epoch:32, Loss:0.0111
Epoch:33, Loss:0.0142
Epoch:34, Loss:0.0147
Epoch:35, Loss:0.0141
Epoch:36, Loss:0.0124
Epoch:37, Loss:0.0110
Epoch:38, Loss:0.0152
Epoch:39, Loss:0.0130
Epoch:40, Loss:0.0095
Epoch:41, Loss:0.0107
Epoch:42, Loss:0.0090
Epoch:43, Loss:0.0104
Epoch:44, Loss:0.0115
Epoch:45, Loss:0.0112
Epoch:46, Loss:0.01

Determine the accuracy of the Random Forest classifier on the reduced data

In [6]:
for (img, l) in data_loader_test:
    item = img.reshape(-1, 28 * 28)
    enc = model.encoder(item)
    enc = enc.detach().numpy()
    l = l.detach().numpy()
    encPics.append(enc)
    labels.append(l)

rf = RandomForestClassifier(n_estimators=266, max_depth=11)
encPics = np.reshape(encPics, (-1, 700))
labels = np.reshape(labels, (-1, 1))
labels = labels.ravel()
rf.fit(encPics, labels)
y_pred_1 = rf.predict(encPics)

accscore = accuracy_score(labels, y_pred_1, normalize=True)
print(accscore)

0.9971
