In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
from torchvision import transforms

transform = transforms.Compose(
    [
        transforms.ToTensor(),

    ]
)

In [None]:
train_dataset = datasets.MNIST(root='kaggle/input', train=True, transform=transform, download=True)

In [None]:
test_dataset = datasets.MNIST(root='kaggle/input', train=False,transform=transform, download=True)

In [None]:
train_loader = DataLoader(train_dataset, shuffle=True, num_workers=2, batch_size=64)
test_loader = DataLoader(test_dataset, shuffle=False, num_workers=2, batch_size=64)

In [None]:
image = train_dataset[0]
image[0].shape

In [None]:
"""
Constructing the Encoder of AutoEncoder
"""

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(4) #28 x 28

        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(8)#14 x 14

        self.conv3 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(16)#7 x 7

        self.pool = nn.MaxPool2d(2,2)
        self.lm_head = nn.Sequential(
            nn.Linear(16*7*7, 512),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.GELU(),
            nn.Linear(256, 128),
            nn.GELU(),
            nn.Linear(128, 64),
            nn.GELU(),
            nn.Linear(64, 9),
        )

        self.fc1 = nn.Linear(in_features=16 * 7 * 7, out_features=9)

    def forward(self,x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = F.relu(self.bn3(self.conv3(x)))

        x = torch.flatten(x, 1)
        x = self.lm_head(x)
        return x

In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.lm_head = nn.Sequential(
            nn.Linear(9, 64),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(64, 128),
            nn.GELU(),
            nn.Linear(128, 256),
            nn.GELU(),
            nn.Linear(256, 512),
            nn.GELU(),
            nn.Linear(512, 16 * 7 * 7)
        )

        self.deconv1 = nn.ConvTranspose2d(in_channels=16, out_channels=8, kernel_size=2, stride=2)
        self.bn1 = nn.BatchNorm2d(8)

        # Block 2: 14x14 -> 28x28
        self.deconv2 = nn.ConvTranspose2d(in_channels=8, out_channels=4, kernel_size=2, stride=2)
        self.bn2 = nn.BatchNorm2d(4)

        # Final Layer: to get back to 1 channel
        self.final_conv = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=3, padding=1)
    def forward(self,x):
        x = self.lm_head(x)
        x = x.view(-1, 16, 7, 7)

        x = F.relu(self.bn1(self.deconv1(x)))


        x = F.relu(self.bn2(self.deconv2(x)))

        x = torch.sigmoid(self.final_conv(x))

        return x

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoEncoder().to(device)

In [None]:
NUM_EPOCHS = 15
BATCH_SIZE = 128
LEARNING_RATE = 1e-3

In [None]:
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
for epoch in range(NUM_EPOCHS):
    model.train()
    running_train_loss = 0.0
    for data in train_loader:

        images, _ = data
        images = images.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, images)
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item() * images.size(0)

    epoch_train_loss = running_train_loss / len(train_loader.dataset)

    model.eval()
    running_test_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            images, _ = data
            images = images.to(device)

            outputs = model(images)
            loss = criterion(outputs, images)

            running_test_loss += loss.item() * images.size(0)

    epoch_test_loss = running_test_loss / len(test_loader.dataset)

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Train Loss: {epoch_train_loss:.6f}, Test Loss: {epoch_test_loss:.6f}")

print("\nTraining finished.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

# --- 5. VISUALIZE RESULTS ON RANDOM SAMPLES ---

# Put the model in evaluation mode
model.eval()

# --- Select Random Images from the Test Dataset ---
num_images_to_show = 10
num_samples_in_test = len(test_dataset)

# Generate a list of random indices
random_indices = random.sample(range(num_samples_in_test), num_images_to_show)

# Create a custom batch containing only the randomly selected images
# We get the image tensor (index 0) from the dataset for each random index
random_test_images = torch.stack([test_dataset[i][0] for i in random_indices])
random_test_images = random_test_images.to(device)

# Get the reconstructed images for our random batch
with torch.no_grad():
    reconstructed_images = model(random_test_images)

# Move tensors to the CPU for plotting
images_np = random_test_images.cpu().numpy()
outputs_np = reconstructed_images.cpu().numpy()

# --- Plotting ---
fig, axes = plt.subplots(nrows=num_images_to_show, ncols=2, figsize=(5, 20))
fig.suptitle('Original vs. Reconstructed (Random Samples)', fontsize=16)

for i in range(num_images_to_show):
    # --- Display Original Image ---
    ax_orig = axes[i, 0]
    ax_orig.imshow(np.squeeze(images_np[i]), cmap='gray')
    ax_orig.set_title("Original")
    ax_orig.get_xaxis().set_visible(False)
    ax_orig.get_yaxis().set_visible(False)

    # --- Display Reconstructed Image ---
    ax_recon = axes[i, 1]
    ax_recon.imshow(np.squeeze(outputs_np[i]), cmap='gray')
    ax_recon.set_title("Reconstructed")
    ax_recon.get_xaxis().set_visible(False)
    ax_recon.get_yaxis().set_visible(False)

# Adjust layout and show the plot
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

In [None]:
# Put the model in evaluation mode
model.eval()

# Initialize an empty list to store the encoded outputs
encoded_representations = []
test_labels = []

# Iterate through the test_loader
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)

        # Pass the images through the model.encoder
        encoded_output = model.encoder(images)

        # Extend the encoded_representations list
        encoded_representations.extend(encoded_output.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())


# Convert the encoded_representations list into a NumPy array
encoded_representations_np = np.array(encoded_representations)
test_labels_np = np.array(test_labels)

print("Shape of encoded representations:", encoded_representations_np.shape)
print("Shape of test labels:", test_labels_np.shape)

## Apply t-sne

### Subtask:
Use scikit-learn's `TSNE` to reduce the dimensionality of the encoded representations to 2.


**Reasoning**:
Use scikit-learn's TSNE to reduce the dimensionality of the encoded representations to 2.



In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=300)
tsne_results = tsne.fit_transform(encoded_representations_np)

print("Shape of t-SNE results:", tsne_results.shape)

## Visualize t-sne results

### Subtask:
Plot the 2-dimensional t-SNE output, coloring the points by their original digit labels.


**Reasoning**:
Plot the 2-dimensional t-SNE output, coloring the points by their original digit labels, and add a title and legend.



In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=test_labels_np, cmap='tab10', s=10)
plt.title("t-SNE Visualization of Encoded MNIST Data")
plt.colorbar(scatter, label="Digit Label")
plt.show()