# Question 1: Implementation of GAN from Scratch in Pytorch on MNIST Dataset along with Early Stopping to avoid overfitting

I have used the approach given in paper "[VGAN: Generalizing MSE GAN and WGAN-GP
for Robot Fault Diagnosis](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9760080)"
They have define an innovative approach for Early stopping in GANs by defining a ML model like Random Forest and then use it for the validation part and testing of Early stopping criteria.

In [None]:
import os
import sys
import numpy as np
import torch
import torchvision
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.optim.lr_scheduler as lr_scheduler
import matplotlib.pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import tqdm
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
CUDA = True
DATA_PATH = './data'
BATCH_SIZE = 32
IMAGE_CHANNEL = 1
Z_DIM = 100
G_HIDDEN = 64
X_DIM = 64
D_HIDDEN = 64
EPOCH_NUM = 5
REAL_LABEL = 1
FAKE_LABEL = 0
lr = 2e-4
seed = 1

In [None]:
CUDA = CUDA and torch.cuda.is_available()
print("PyTorch version: {}".format(torch.__version__))
if CUDA:
    print("CUDA version: {}\n".format(torch.version.cuda))

if CUDA:
    torch.cuda.manual_seed(seed)
device = torch.device("cuda:0" if CUDA else "cpu")
cudnn.benchmark = True

In [None]:
# Data preprocessing

train_data = datasets.MNIST(root=DATA_PATH,
                                train=True,
                                download=True,
                                transform=transforms.Compose([
                     transforms.Resize(X_DIM),
                     transforms.ToTensor(),
                     transforms.Normalize((0.5,), (0.5,))
                     ]))

test_data = datasets.MNIST(root=DATA_PATH,
                            train=False,
                            download=True,
                            transform=transforms.Compose([
                     transforms.Resize(X_DIM),
                     transforms.ToTensor(),
                     transforms.Normalize((0.5,), (0.5,))
                     ]))

num_train = len(train_data)
valid_size = 0.2
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# load training data in batches
train_loader = torch.utils.data.DataLoader(train_data,
                                            batch_size=BATCH_SIZE,
                                            sampler=train_sampler,
                                            num_workers=0)

# load validation data in batches
valid_loader = torch.utils.data.DataLoader(train_data,
                                            batch_size=BATCH_SIZE,
                                            sampler=valid_sampler,
                                            num_workers=0)

# load test data in batches
test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=BATCH_SIZE,
                                          num_workers=0)

In [None]:
real_batch = next(iter(train_loader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # input layer
            nn.ConvTranspose2d(Z_DIM, G_HIDDEN * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(G_HIDDEN * 8),
            nn.ReLU(True),
            # 1st hidden layer
            nn.ConvTranspose2d(G_HIDDEN * 8, G_HIDDEN * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(G_HIDDEN * 4),
            nn.ReLU(True),
            # 2nd hidden layer
            nn.ConvTranspose2d(G_HIDDEN * 4, G_HIDDEN * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(G_HIDDEN * 2),
            nn.ReLU(True),
            # 3rd hidden layer
            nn.ConvTranspose2d(G_HIDDEN * 2, G_HIDDEN, 4, 2, 1, bias=False),
            nn.BatchNorm2d(G_HIDDEN),
            nn.ReLU(True),
            # output layer
            nn.ConvTranspose2d(G_HIDDEN, IMAGE_CHANNEL, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # 1st layer
            nn.Conv2d(IMAGE_CHANNEL, D_HIDDEN, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # 2nd layer
            nn.Conv2d(D_HIDDEN, D_HIDDEN * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(D_HIDDEN * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # 3rd layer
            nn.Conv2d(D_HIDDEN * 2, D_HIDDEN * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(D_HIDDEN * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # 4th layer
            nn.Conv2d(D_HIDDEN * 4, D_HIDDEN * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(D_HIDDEN * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # output layer
            nn.Conv2d(D_HIDDEN * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input).view(-1, 1).squeeze(1)

In [None]:
# Create the generator
netG = Generator().to(device)
netG.apply(weights_init)
print(netG)

# Create the discriminator
netD = Discriminator().to(device)
netD.apply(weights_init)
print(netD)

In [None]:
# Initialize BCELoss function
criterion = nn.BCELoss()

# Create batch of latent vectors that I will use to visualize the progression of the generator
viz_noise = torch.randn(BATCH_SIZE, Z_DIM, 1, 1, device=device)

# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(0.5, 0.999))

In [None]:
def display_images(imgs):
    fig, axs = plt.subplots(nrows=4, ncols=4, figsize=(8,8))
    count = 0
    for y in range(4):
        for x in range(4):
            img = imgs[count].view(64, 64)
            axs[y][x].imshow(img, cmap="gray")
            count += 1
    plt.show()

There is no such thing called Early stopping because no one can predict when to stop training of GAN. So, most of the times GAN are train for maximum no. of epochs that are defined.

In [None]:
# Training Loop

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
best_accuracy = -1
best_epoch = -1
random_forest_accuracies = []
generated_data=[]

print("Starting Training Loop...")
for epoch in range(EPOCH_NUM):
    for i, data in enumerate(tqdm(train_loader, desc=f'Epoch {epoch}/{EPOCH_NUM}')):
        # (1) Update the discriminator with real data
        netD.zero_grad()
        # Format batch
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), REAL_LABEL, dtype=torch.float, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        # (2) Update the discriminator with fake data
        # Generate batch of latent vectors
        noise = torch.randn(b_size, Z_DIM, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(FAKE_LABEL)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch, accumulated (summed) with previous gradients
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Compute error of D as sum over the fake and the real batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        # (3) Update the generator with fake data
        netG.zero_grad()
        label.fill_(REAL_LABEL)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, EPOCH_NUM, i, len(train_loader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())

       # Discriminator Early Stopping
        netG.eval()
        val_loss_discriminator = 0.0
        generated_data_np = np.array(generated_data)
        generated_data_reshaped = generated_data_np.reshape(-1, 64 * 64)
        rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_model.fit(generated_data, label.cpu().numpy())

        # Initialize an empty list to store predictions on the validation set
        predictions_valid = []
        target_valid=[]

        # Make predictions on the validation set using the trained random forest model
        rf_model.eval()
        with torch.no_grad():
            for val_data in valid_loader:
                # Validation forward pass for discriminator (similar to training loop, but only forward pass, no backward)
                val_real_cpu = val_data[0].to(device)
                b_size_val = val_real_cpu.size(0)
                target_valid.append(val_data[1].to(device))
                # Reshape the input if needed (adjust based on your model architecture)
                val_real_cpu = val_real_cpu.view(b_size_val, -1)

                # Make predictions on the validation set
                val_output_rf = rf_model.predict(val_real_cpu.cpu().numpy())

                # Collect predictions
                predictions_valid.extend(val_output_rf)

        # Calculate accuracy for the generator and random forest
        accuracy_rf = accuracy_score(target_valid.cpu().numpy(), np.array(predictions_valid))
        random_forest_accuracies.append(accuracy_rf)

        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 500 == 0) or ((epoch == EPOCH_NUM-1) and (i == len(train_loader)-1)):
            with torch.no_grad():
                fake = netG(viz_noise).detach().cpu()
                display_images(fake)
            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))

        iters += 1

# Find the generator with the highest random forest accuracy
best_rf_generator_index = np.argmax(random_forest_accuracies)
best_rf_generator_accuracy = random_forest_accuracies[best_rf_generator_index]

print(f"The best generator for Random Forest is G{best_rf_generator_index + 1} with accuracy: {best_rf_generator_accuracy}")

In [None]:
# Define a function to plot training curves
def plot_training_curves(G_losses_list, D_losses_list, title):
    plt.figure(figsize=(10, 5))
    plt.plot(G_losses_list, label='Generator Loss', alpha=0.7)
    plt.plot(D_losses_list, label='Discriminator Loss', alpha=0.7)
    plt.title(title)
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

plot_training_curves(G_losses, D_losses, 'Model 1 Training Curves')
with torch.no_grad():
    fake_model = netG(viz_noise).detach().cpu()

plt.figure(figsize=(15, 5))
plt.imshow(np.transpose(vutils.make_grid(fake_model, padding=2, normalize=True), (1, 2, 0)))
plt.title('Generated Samples - Model 1')

In [None]:
# Grab a batch of real images from the dataloader
real_batch = next(iter(train_loader))

# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))

# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.show()

# Question 2: Techniques for finetuning of GAN's

## General Techniques:

1. Layer Freezing: Leverage pre-trained features for domain adaptation by fixing lower discriminator layers while fine-tuning upper layers.
2. Progressive Growing: Stabilize training and enhance high-quality image generation by starting low-resolution and gradually increasing complexity.
3. Minibatch Discrimination: Improve training stability by calculating discriminator loss on a subset of samples in each minibatch.
4. Trust-Region Optimization: Ensure safe exploration of the loss landscape with constrained updates during fine-tuning.


## Specialized Techniques:

1. Spectral Diversification: Prevent mode collapse and promote output diversity by penalizing similar generator outputs.
2. Curriculum Learning: Guide the GAN towards better samples by gradually increasing training data difficulty.
3. Pre-trained Embeddings: Inject domain-specific knowledge for controlled and interpretable outputs.
4. Self-Attention Mechanisms: Improve long-range dependencies by allowing the model to focus on relevant image regions.
5. Consistency Regularization: Encourage smooth transitions in the latent space by penalizing inconsistent outputs for perturbed inputs.
6. Dynamic Learning Rates: Adapt learning rates to different model components and training stages for optimal performance.
7. Mini-Batch Standard Deviation: Encourage diversity by incorporating mini-batch standard deviation in the discriminator.
8. Label Smoothing: Prevent overconfidence in the discriminator by using slightly relaxed target labels (real and fake).
9. Adversarial Dropout: Regularize the model during training by generating dropout masks adversarially.
10. Ensemble Methods: Enhance robustness by combining outputs from multiple GANs with diverse architectures or initializations.
11. Style-Based GANs: Provide better control over generated outputs by modeling style and content separately.
12. Temporal GANs: Ensure smooth and realistic video sequences by incorporating temporal coherence constraints into the architecture.
13. Dynamic Batch Sizes: Find an optimal balance between stability and efficiency by experimenting with dynamic batch sizes.
14. GAN Inversion: Understand and control the latent space by mapping real images back to it using GAN inversion techniques.
15. Dual Discriminator GANs: Balance realism and diversity by training with dual discriminators, one for each aspect.
16. Task-Specific Loss Functions: Tailor loss functions to your specific task, such as perceptual loss for image translation or class-specific losses for conditional GANs.

# Quetion 3: Difference between Flask and Fastapi

## About Flask and FastAPI
Flask, a micro-framework established in 2010, is known for its simplicity and flexibility, suitable for small to medium-sized web applications. FastAPI, introduced in 2019, is a modern framework designed for building high-performance APIs with Python 3.6+. It prioritizes speed, simplicity, and adherence to the OpenAPI standard. Both frameworks have distinct strengths, with Flask offering simplicity and flexibility, while FastAPI excels in building efficient APIs with minimal code.




## Difference between Flask and FastAPI
The key differences between Flask and FastAPI are:
* Flask is a micro-framework, while FastAPI is a full-stack framework.
* FastAPI is designed for building APIs, while Flask can be used for building web applications and APIs.
* FastAPI is faster than Flask due to its asynchronous code and type annotations.
* FastAPI has automatic data validation and documentation, while Flask requires manual validation and documentation.
* Flask has a larger community and ecosystem than FastAPI.

# Question 4: Benifits of using docker

## What is Docker?
Docker is a containerization platform that has become increasingly popular in recent years, particularly in the world of DevOps and cloud computing. Docker provides a range of benefits for developers and operations teams, including increased efficiency, flexibility, and portability.

## Benifits of using Docker
1. Consistency and repeatability:
2. Docker allows developers to package applications and dependencies into containers, ensuring that they run consistently across different environments. This makes it easy to create reproducible builds, reducing the risk of bugs and errors caused by differences in environment configurations. With Docker, developers can be confident that their applications will run the same way in development, testing, and production environments.
3. Improved efficiency and resource utilization:
4. Docker allows multiple containers to run on a single host machine, allowing for more efficient use of resources. Each container has its own isolated environment, so applications can be scaled up or down quickly and easily, without impacting other applications running on the same host. This means that organizations can maximize their infrastructure utilization, reduce costs, and improve overall efficiency.
5. Faster development and deployment:
6. Docker streamlines the development and deployment process by making it easy to build, test, and deploy applications in containers. Developers can quickly spin up containers to test changes and experiment with new features, without having to set up a full development environment. Once an application is ready to be deployed, it can be packaged into a container and distributed to any environment, making it easy to deploy applications consistently across different environments.
7. Increased flexibility and portability:
8. Docker containers are designed to be portable, meaning they can run on any host machine that has Docker installed. This makes it easy to move applications between different environments, whether it's from a developer's laptop to a test environment, or from a test environment to a production environment. Docker also makes it easy to manage dependencies, as all the necessary components are included in the container.
9. Improved security:
10. Docker containers provide a more secure environment for running applications, as each container is isolated from the host system and other containers. This reduces the risk of one application impacting another or of an attacker gaining access to the host system. Docker also makes it easy to distribute updates and patches to applications, ensuring that they are always running on the latest version and are protected against known vulnerabilities.
11. One of the main advantages of using Docker is that it allows developers to isolate applications from the underlying infrastructure, reducing the risk of conflicts and making it easier to manage dependencies. Docker also makes it easy to scale applications up or down, as containers can be started and stopped quickly and easily.