2nd Solution

In [None]:
import numpy as np
from keras.layers import Input, Dense, Lambda, Reshape
from keras.models import Model
from keras.datasets import mnist
from keras import backend as K
import matplotlib.pyplot as plt

# Load the MNIST dataset
(x_train, _), (x_test, _) = mnist.load_data()

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# Flatten the images into a 784-dimensional vector
img_shape = x_train.shape[1:]
input_dim = np.prod(img_shape)
x_train = x_train.reshape((-1, input_dim))
x_test = x_test.reshape((-1, input_dim))

# Define the dimensions of the latent space
latent_dim = 16

# Define the encoder network
inputs = Input(shape=(input_dim,))
x = Dense(256, activation='relu')(inputs)
x = Dense(128, activation='relu')(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Define the sampling function
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(z_log_var / 2) * epsilon

z = Lambda(sampling)([z_mean, z_log_var])

# Define the decoder network
decoder_inputs = Input(shape=(latent_dim,))
x = Dense(128, activation='relu')(decoder_inputs)
x = Dense(256, activation='relu')(x)
outputs = Dense(input_dim, activation='sigmoid')(x)
decoder = Model(decoder_inputs, outputs)

# Define the VAE model
outputs = decoder(z)
vae = Model(inputs, outputs)

# Define the VAE loss function
reconstruction_loss = K.sum(K.binary_crossentropy(inputs, outputs), axis=-1)
kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)

# Compile the VAE model
vae.compile(optimizer='adam')

# Train the VAE model
vae.fit(x_train, epochs=30, batch_size=128, validation_data=(x_test, None))

# Generate 64 images from the VAE
z_sample = np.random.normal(size=(64, latent_dim))
x_decoded = decoder.predict(z_sample)

# Display the generated images
fig, axs = plt.subplots(8, 8)
for i in range(8):
    for j in range(8):
        axs[i, j].imshow(x_decoded[i * 8 + j].reshape(img_shape), cmap='gray')
        axs[i, j].axis('off')
plt.show()

1st Solution

In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
import numpy as np
import matplotlib.pyplot as plt

# Load the MNIST dataset
(X_train, _), (X_test, _) = mnist.load_data()

# Normalize pixel values to be between 0 and 1
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Flatten the images
X_train = X_train.reshape((len(X_train), -1))
X_test = X_test.reshape((len(X_test), -1))

# Set the hyperparameters
hidden_dims = [32, 64, 128]
reg_strength = 0.0001
batch_size = 128
epochs = 20

for dim in hidden_dims:
    # Define the input layer
    input_img = Input(shape=X_train.shape[1])

    # Define the encoder layers
    encoded = Dense(dim, activation='relu',
                    activity_regularizer=regularizers.l1(reg_strength))(input_img)

    # Define the decoder layers
    decoded = Dense(X_train.shape[1], activation='sigmoid')(encoded)

    # Define the autoencoder model
    autoencoder = Model(input_img, decoded)

    # Compile the model
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

    # Train the model
    autoencoder.fit(X_train, X_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_test, X_test))

    # Calculate reconstruction errors on the train and test datasets
    train_loss = autoencoder.evaluate(X_train, X_train, verbose=0)
    test_loss = autoencoder.evaluate(X_test, X_test, verbose=0)

    print(f"Hidden dimension: {dim}")
    print(f"Train reconstruction error: {train_loss:.4f}")
    print(f"Test reconstruction error: {test_loss:.4f}")

    # Display some reconstructions from the train and test datasets
    decoded_imgs = autoencoder.predict(X_test)
    n = 10
    plt.figure(figsize=(20, 4))
    for i in range(n):
        # Display original images
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(X_test[i].reshape(28, 28))
        plt.gray()
        ax.set_xticks([])
        ax.set_yticks([])

        # Display reconstructed images
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(decoded_imgs[i].reshape(28, 28))
        plt.gray()
        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()

3rd solution

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import matplotlib.pyplot as plt

# Define the self-attention layer
class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.query_conv = nn.Conv2d(in_channels, in_channels//8, kernel_size=1)
        self.key_conv = nn.Conv2d(in_channels, in_channels//8, kernel_size=1)
        self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        batch_size, C, width, height = x.size()
        proj_query = self.query_conv(x).view(batch_size, -1, width*height).permute(0, 2, 1)
        proj_key = self.key_conv(x).view(batch_size, -1, width*height)
        energy = torch.bmm(proj_query, proj_key)
        attention = nn.functional.softmax(energy, dim=-1)
        proj_value = self.value_conv(x).view(batch_size, -1, width*height)
        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
        out = out.view(batch_size, C, width, height)
        out = self.gamma*out + x
        return out

# Define the CNN model with self-attention layer(s)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.attention1 = SelfAttention(32)
        self.attention2 = SelfAttention(64)
        self.attention3 = SelfAttention(128)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128*4*4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = self.attention1(x)
        x = self.pool(x)
        x = nn.functional.relu(self.conv2(x))
        x = self.attention2(x)
        x = self.pool(x)
        x = nn.functional.relu(self.conv3(x))
        x = self.attention3(x)
        x = self.pool(x)
        x = x.view(-1, 128*4*4)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)
        return x

# Set the random seed
torch.manual_seed(42)

# Set the device to CPU
device = torch.device('cpu')

# Load the CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

# Define the CNN model and optimizer
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Define the loss function
criterion = nn.CrossEntropyLoss()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()

        self.query_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
        self.key_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels // 8, kernel_size=1)
        self.value_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        batch_size, channels, height, width = x.size()

        proj_query = self.query_conv(x).view(batch_size, -1, height * width).permute(0, 2, 1)
        proj_key = self.key_conv(x).view(batch_size, -1, height * width)

        energy = torch.bmm(proj_query, proj_key)
        attention = F.softmax(energy, dim=-1)

        proj_value = self.value_conv(x).view(batch_size, -1, height * width)

        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
        out = out.view(batch_size, channels, height, width)

        out = self.gamma * out + x
        return out


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.attention1 = SelfAttention(in_channels=64)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.attention2 = SelfAttention(in_channels=128)

        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)

        self.fc = nn.Linear(in_features=256 * 8 * 8, out_features=10)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.attention1(x)

        x = F.relu(F.max_pool2d(self.bn2(self.conv2(x)), 2))
        x = self.attention2(x)

        x = F.relu(F.max_pool2d(self.bn3(self.conv3(x)), 2))

        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

# Instantiate the model and print the summary
model = CNN()
print(summary(model, (3, 32, 32)))

In [None]:
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader

# Define the transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the CIFAR-10 training and test sets using DataLoader
train_set = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False, num_workers=2)

# Instantiate the model and define the loss function and optimizer
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model for a specified number of epochs
num_epochs = 1

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished training')

# Evaluate the model on the test set
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy on test set: %.2f %%' % (100 * correct / total))