In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
import random

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [42]:
import torch
import torch.nn as nn

class Dehazer(nn.Module):
    def __init__(self, in_channels=3, out_channels=3):
        super(Dehazer, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.decoder = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(64, out_channels, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

#         self.decoder = nn.Sequential(
#             nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
#             nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
#             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(inplace=True),
#             nn.ConvTranspose2d(64, out_channels, kernel_size=4, stride=2, padding=1),
#             nn.ReLU(inplace=True),
#         )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [43]:
class PerceptualLoss(nn.Module):
    def __init__(self, net, gamma=0.0):
        super(PerceptualLoss, self).__init__()
        self.net = net
        self.gamma = gamma
        self.net.eval()
        
    def forward(self, output, target):
        # Extract features from the VGG16 model for output and target
        output_features = self.net(output)
        target_features = self.net(target)

        # Compute the L2 distance between the features
        perceptual_loss = nn.MSELoss()(output_features, target_features)
        
        # pixel wise loss
        pixel_loss = nn.MSELoss()(output, target)
        
#         return pixel_loss
        return perceptual_loss + self.gamma * pixel_loss

In [48]:
class DHaze(Dataset):
    def __init__(self, data_dir, transform=None):
        
        self.data_dir = data_dir
        self.transform = transform[0]
        self.target_transform = transform[1]
        self.x = []
        self.y = []
        
        with open(os.path.join(data_dir, 'train_GT.txt'), "r") as f:
            for l in f.readlines():
                path = l.strip()
                assert os.path.isfile(path), path
                self.x.append(path)

        with open(os.path.join(data_dir, 'train_tmap.txt'), "r") as f:
            for l in f.readlines():
                assert os.path.isfile(l.strip()), l.strip()
                self.y.append(l.strip())

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        image = np.array(Image.open(self.x[idx]))/255
        target = np.array(Image.open(self.y[idx]))/255

        if self.transform is not None:
            image = self.transform(image.astype("float32"))
        if self.target_transform is not None:
            target = self.target_transform(target.astype("float32"))
        
        return image, target

class DHazeTest(Dataset):
    def __init__(self, data_dir, transform):
        
        self.data_dir = data_dir
        self.transform = transform[0]
        self.target_transform = transform[1]
        self.x = []
        self.y = []

        with open(os.path.join(data_dir, 'test_GT.txt'), "r") as f:
            for l in f.readlines():
                assert os.path.isfile(l.strip()), l.strip()
                self.x.append(l.strip())

        with open(os.path.join(data_dir, 'test_tmap.txt'), "r") as f:
            for l in f.readlines():
                assert os.path.isfile(l.strip()), l.strip()
                self.y.append(l.strip())
        
    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        image = np.array(Image.open(self.x[idx]))/255
        target = np.array(Image.open(self.y[idx]))/255
        
        if self.transform is not None:
            image = self.transform(image.astype("float32"))
        
        if self.target_transform is not None:
            target = self.target_transform(target.astype("float32"))
        
        return image, target    

In [49]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.CenterCrop(400),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])          
])

target_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.CenterCrop(400),
])

train_dataset = DHaze("/content/drive/MyDrive/CSE273/dhaze/NYU_split", transform=[transform, target_transform])
test_dataset = DHazeTest("/content/drive/MyDrive/CSE273/dhaze/NYU_split", transform=[transform, target_transform])

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, persistent_workers=True)
test_loader = DataLoader(test_dataset, batch_size=8,  shuffle=False)

print(f"Number of batches per epoch in train set: {len(train_loader)}")
print(f"Number of batches in test set: {len(test_loader)}")

Number of batches per epoch in train set: 145
Number of batches in test set: 37


In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [46]:
model = Dehazer(in_channels=3, out_channels=1)
num_params_m = sum(torch.numel(p) for p in model.parameters())
print(f"Number of parameters in the model: {num_params_m}")

# VGG for perceptual loss
vgg_model = models.vgg16(pretrained=True).features
feat_extractor = nn.Sequential(*list(vgg_model.children())[:24])  # from relu4_2 layer
feat_extractor.add_module("avgpool",nn.AdaptiveAvgPool2d((1, 1)))

# ResNet for perceptual loss
# resnet = models.resnet18(pretrained=True)
# feat_extractor = nn.Sequential(*list(resnet.children())[:7])
# feat_extractor.add_module("avgpool",nn.AdaptiveAvgPool2d((1, 1)))

num_params_f = sum(torch.numel(p) for p in feat_extractor.parameters())
print(f"Number of parameters in the feature extractor: {num_params_f}")

# Loss
GAMMA = 1.5
feat_extractor.to(device)
criterion = PerceptualLoss(feat_extractor, gamma=GAMMA)


learning_rate = 1e-3
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Number of parameters in the model: 3472899




Number of parameters in the feature extractor: 7635264


In [52]:
list(vgg_model.children())[:24]

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(512, 512, kernel_size=(3, 3), stride=(1

In [50]:
model.to(device)

num_epochs = 5

log_dir = f"./logs/gamma{GAMMA}_epochs{num_epochs}"
writer = SummaryWriter(log_dir)


num_batches = len(train_loader)
for epoch in range(num_epochs):
    
        running_loss = 0.0
        for idx, (images, targets) in enumerate(train_loader):
            images = images.to(device)
            targets = targets.to(device)

            # Forward pass
            outputs = model(images)
            
            # Compute the loss
            loss = criterion(outputs, targets)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
                
            if idx%100 == 0  or num_batches-1 == idx:
                writer.add_scalar('Loss/train', loss.cpu().item(), epoch * len(train_loader) + idx)

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.6f}")

writer.close()

RuntimeError: ignored