# Importing Libraries 


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch import optim,nn
from torch.utils.data.dataset import Dataset
from torchvision import transforms 
from export import load_cifar10_data
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms



# Model Archietechture 

In [2]:
class Encoder(nn.Module):
    def __init__(self, in_channels=3, latent_dim=256):
        super(Encoder, self).__init__()
        self.conv_op = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # 32x32 -> 16x16

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), # 16x16 -> 8x8

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) # 8x8 -> 4x4
        )

        self.flatten = nn.Flatten()
        self.fc = nn.Linear(256*4*4, latent_dim)

    def forward(self, x):
        x = self.conv_op(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [3]:
class Decoder(nn.Module):
    def __init__(self, latent_dim=256, out_channels=3):
        super(Decoder, self).__init__()
        self.fc = nn.Linear(latent_dim, 256*4*4)
        self.conv_op = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2), # 4x4 -> 8x8
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2), # 8x8 -> 16x16
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(64, out_channels, kernel_size=2, stride=2), # 16x16 -> 32x32
            nn.Sigmoid() # Pixel values in range [0, 1]
        )

    def forward(self, x):
        x = self.fc(x)
        x = x.view(-1, 256, 4, 4)
        x = self.conv_op(x)
        return x


In [4]:
class AutoEncoder(nn.Module):
    def __init__(self, in_channles=3,latent_dim=256):
        super(AutoEncoder,self).__init__()
        self.encoder=Encoder(in_channles,latent_dim)
        self.decoder=Decoder(latent_dim,in_channles)
    def forward(self,x):
        x=self.encoder(x)
        x=self.decoder(x)
        return x

In [5]:
if (torch.cuda.is_available):
    device="cuda"
    if device=="cuda":
        num_workers=torch.cuda.device_count() * 1
        print(num_workers)
        print(torch.cuda.get_device_name())
else:
    print("cuda is not availabel")
    device= "cpu"

1
NVIDIA GeForce RTX 3050 Laptop GPU


# Data Loding 

In [6]:
(train_images, train_labels), (test_images, test_labels), class_names = load_cifar10_data()

print(f"Training Data: {train_images.shape}, Labels: {train_labels.shape}")
print(f"Test Data: {test_images.shape}, Labels: {test_labels.shape}")
print(f"Class Names: {class_names}")

class CIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = CIFAR10Dataset(train_images, train_labels, transform=transform)
test_dataset = CIFAR10Dataset(test_images, test_labels, transform=transform)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"Train loader: {train_loader}")
print(f"Test loader: {test_loader}")

Training Data: (50000, 32, 32, 3), Labels: (50000,)
Test Data: (10000, 32, 32, 3), Labels: (10000,)
Class Names: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Train loader: <torch.utils.data.dataloader.DataLoader object at 0x000001C80D0A1090>
Test loader: <torch.utils.data.dataloader.DataLoader object at 0x000001C80CF26BD0>


In [7]:
import torch
import gc

torch.cuda.empty_cache()  # Clears unused memory
gc.collect()  # Garbage collection (clears CPU memory)


42

# Initilizing the model 

In [8]:
model=AutoEncoder(in_channles=3,latent_dim=256)
loss_function=nn.MSELoss()
optimizer=optim.Adam(model.parameters(),lr=1e-3,weight_decay=1e-8)

# Training Model

In [None]:
import torch
import gc

torch.cuda.empty_cache()  # Clears unused memory
gc.collect()  # Garbage collection (clears CPU memory)

epochs = 8
outputs = []
losses = []

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(epochs):
    for batch_idx, (images, _) in enumerate(train_loader):  # Get images from train_loader
        images = images.to(device, dtype=torch.float32)  # Send to GPU/CPU

        output = model(images)
        loss = loss_function(output, images)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())

        # Print loss every 100 batches to reduce console clutter
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx}/{len(train_loader)}], Loss: {loss.item():.4f}")

    # Save only a subset of outputs to avoid memory issues
    outputs.append((epoch, images[:8], output[:8]))  

print("Training Complete!")
