In [None]:
# Gopal Krishna
# 12/15/22
# CS7180 - Advanced Perception

In [None]:
import torch
from torch import nn

class ConvolutionalAutoencoder(nn.Module):
    def __init__(self, num_layers, layer_sizes):
        super(ConvolutionalAutoencoder, self).__init__()
        
        # encoder layers
        self.encoder_layers = nn.ModuleList()
        for i in range(num_layers):
            self.encoder_layers.append(nn.Conv2d(
                in_channels=layer_sizes[i],
                out_channels=layer_sizes[i+1],
                kernel_size=3,
                stride=2,
                padding=1
            ))
        
        # decoder layers
        self.decoder_layers = nn.ModuleList()
        for i in range(num_layers):
            self.decoder_layers.append(nn.ConvTranspose2d(
                in_channels=layer_sizes[-i-1],
                out_channels=layer_sizes[-i-2],
                kernel_size=3,
                stride=2,
                padding=1,
                output_padding=1
            ))

    def forward(self, x):
        # encode
        for layer in self.encoder_layers:
            x = layer(x)
        
        # decode
        for layer in self.decoder_layers:
            x = layer(x)
        
        return x


In [None]:
import torchsummary

In [None]:
autoencoder = ConvolutionalAutoencoder(3, [3, 32, 64, 128])
autoencoder.cuda()

ConvolutionalAutoencoder(
  (encoder_layers): ModuleList(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (2): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  )
  (decoder_layers): ModuleList(
    (0): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (1): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (2): ConvTranspose2d(32, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
  )
)

In [None]:
torchsummary.summary(autoencoder, (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             896
            Conv2d-2         [-1, 64, 128, 128]          18,496
            Conv2d-3          [-1, 128, 64, 64]          73,856
   ConvTranspose2d-4         [-1, 64, 128, 128]          73,792
   ConvTranspose2d-5         [-1, 32, 256, 256]          18,464
   ConvTranspose2d-6          [-1, 3, 512, 512]             867
Total params: 186,371
Trainable params: 186,371
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 3.00
Forward/backward pass size (MB): 58.00
Params size (MB): 0.71
Estimated Total Size (MB): 61.71
----------------------------------------------------------------


In [None]:
# Import the necessary libraries
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

# Change the working directory to the location of your data on Google Drive
%cd /content/gdrive/My\ Drive/cs7180/mel

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/cs7180/mel


In [None]:
!ls

test  train  validation


In [None]:
import cv2
import os

for root, dirs, files in os.walk('./'):
    # Loop through all the files in the current directory
    for file in files:
        print(os.path.join(root, file))
        # Check if the file is an image
        if file.endswith(".jpg") or file.endswith(".png") or file.endswith(".jpeg"):
            # Open the image file
            image = cv2.imread(os.path.join(root, file))
            
            # Resize the image
            image = cv2.resize(image, (512, 512))
            
            # Save the resized image
            cv2.imwrite(os.path.join(root, file), image)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
./train/engine_idling/209992-5-2-100.png
./train/engine_idling/209992-5-2-114.png
./train/engine_idling/209992-5-2-116.png
./train/engine_idling/209992-5-2-115.png
./train/engine_idling/209992-5-2-131.png
./train/engine_idling/209992-5-2-137.png
./train/engine_idling/209992-5-2-138.png
./train/engine_idling/209992-5-2-42.png
./train/engine_idling/209992-5-2-26.png
./train/engine_idling/209992-5-2-43.png
./train/engine_idling/209992-5-2-79.png
./train/engine_idling/209992-5-2-8.png
./train/engine_idling/209992-5-2-77.png
./train/engine_idling/209992-5-2-81.png
./train/engine_idling/209992-5-2-82.png
./train/engine_idling/209992-5-2-87.png
./train/engine_idling/209992-5-2-91.png
./train/engine_idling/209992-5-3-22.png
./train/engine_idling/209992-5-3-6.png
./train/engine_idling/209992-5-3-5.png
./train/engine_idling/209992-5-4-1.png
./train/engine_idling/209992-5-4-15.png
./train/engine_idling/209992-5-4-17.png
./train/engi

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms

class ImageLoader(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.dataset = ImageFolder(self.root, transform=self.transform)
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, index):
        return self.dataset[index]

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_image_loader = ImageLoader(root='./train', transform=transform)
val_image_loader = ImageLoader(root='./validation', transform=transform)

train_dl = DataLoader(train_image_loader, batch_size=128, shuffle=True, num_workers=2)
val_dl = DataLoader(val_image_loader, batch_size=128, shuffle=True, num_workers=2)

print(len(train_dl), len(val_dl))

62 8


In [None]:
import numpy as np

image, _ = next(iter(val_dl))

image.size()

torch.Size([128, 3, 256, 256])

In [None]:
def loss_fn(input_image, reconstructed_image):
    return torch.mean((input_image - reconstructed_image)**2)

In [None]:
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=0.001)

In [None]:
num_epochs = 100

In [None]:
train_loss, validation_loss = [], []

for epoch in range(num_epochs):
  print(f"EPOCH : {epoch}")
  for image, _ in train_dl:
    image = image.cuda()
    output = autoencoder(image)
    loss = loss_fn(output, image)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_loss.append(loss.detach().cpu().numpy())

  for val_image, _ in val_dl:
    val_image = val_image.cuda()
    val_output = autoencoder(val_image)
    val_loss = loss_fn(val_output, val_image)

    validation_loss.append(val_loss.detach().cpu().numpy())

  print(f"train loss : {train_loss[-1]}, validation loss : {validation_loss[-1]}")

EPOCH : 0
train loss : 0.013344398699700832, validation loss : 0.012917229905724525
EPOCH : 1
train loss : 0.006116555072367191, validation loss : 0.004632435739040375
EPOCH : 2
train loss : 0.003116986248642206, validation loss : 0.003099725116044283
EPOCH : 3
train loss : 0.0025938863400369883, validation loss : 0.002256580162793398
EPOCH : 4
train loss : 0.0021177944727241993, validation loss : 0.0019586444832384586
EPOCH : 5
train loss : 0.0016311383806169033, validation loss : 0.0017911528702825308
EPOCH : 6
train loss : 0.0014991944190114737, validation loss : 0.0015488554490730166
EPOCH : 7
train loss : 0.001268503605388105, validation loss : 0.0012560873292386532
EPOCH : 8
train loss : 0.0015034900279715657, validation loss : 0.0016730836359784007
EPOCH : 9
train loss : 0.0011214256519451737, validation loss : 0.0010996655328199267
EPOCH : 10
train loss : 0.001003274810500443, validation loss : 0.0010643558343872428
EPOCH : 11
train loss : 0.0012331296456977725, validation loss