<a href="https://colab.research.google.com/github/csw5866/hello/blob/main/HW_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Q1. AlexNet with PyTorch

### Implementing AlexNet

> 1. **Dataset**
>> - We use CIFAR-100 (32x32 --> 224x224) (Due to the computational constraints.)
>> - You need to resize the input dataset
>> - 10 Epochs


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# CIFAR-100 stats (commonly used)
mean = (0.5071, 0.4867, 0.4408)
std = (0.2675, 0.2565, 0.2761)

# Transforms
transform_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

# Datasets / Loaders
train_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train
)
test_dataset = torchvision.datasets.CIFAR100(
    root="./data", train=False, download=True, transform=transform_test
)

train_loader = DataLoader(
    train_dataset, batch_size=128, shuffle=True,
    num_workers=1, pin_memory=True,
)
test_loader = DataLoader(
    test_dataset, batch_size=128, shuffle=False,
    num_workers=1, pin_memory=True,
)



Device: cuda


In [12]:
# Initialize model


class Alexnet(nn.Module):
  def __init__(self):
    super(Alexnet,self).__init__()

    #conv 1 (48kernels for each gpu)
    self.cnn1= nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4)
    self.relu1=nn.ReLU()
    self.lrn1=nn.LocalResponseNorm(size=5,alpha=1e-4,beta=0.75,k=2.0)

    #max pool 1
    self.maxpool1=nn.MaxPool2d(kernel_size=3, stride=2)

    #conv 2 (gpu1->1, gpu2->2, 128 kernels for each gpu)
    self.cnn2=nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5,stride=1, padding=2)
    self.relu2=nn.ReLU()
    self.lrn2=nn.LocalResponseNorm(size=5,alpha=1e-4,beta=0.75,k=2.0)

    #max pool 2
    self.maxpool2=nn.MaxPool2d(kernel_size=3, stride=2)

    #conv3 (gpu1->1/2, gpu2->1/2,  192 kernels for each gpu)
    self.cnn3=nn.Conv2d(in_channels=256,out_channels=384, kernel_size=3, stride=1, padding=1)
    self.relu3=nn.ReLU()

    #conv4 (gpu1->1, gpu2->2, 192 kernels for each gpu)
    self.cnn4=nn.Conv2d(in_channels=384,out_channels=384, kernel_size=3, stride=1, padding=1)
    self.relu4=nn.ReLU()

    #conv5 (gpu1->1, gpu2->2, 128 kernels for each gpu)
    self.cnn5=nn.Conv2d(in_channels=384,out_channels=256, kernel_size=3, stride=1, padding=1)
    self.relu5=nn.ReLU()

    #max pool 5
    self.maxpool5=nn.MaxPool2d(kernel_size=3, stride=2)

    #FC 1
    self.fc1=nn.Linear(256*5*5,4096)
    self.relu6=nn.ReLU()

    #FC 2
    self.fc2=nn.Linear(4096,4096)
    self.relu7=nn.ReLU()

    #FC 3
    self.fc3=nn.Linear(4096,100)

    #dropout
    self.dropout=nn.Dropout(p=0.5)

  def forward(self,x):
    #conv1
    out=self.cnn1(x)
    out=self.relu1(out)
    out=self.lrn1(out)

    #max pooling 1
    out=self.maxpool1(out)

    #conv2
    out=self.cnn2(out)
    out=self.relu2(out)
    out=self.lrn2(out)

    #max pooling 2
    out=self.maxpool2(out)

    #conv3
    out=self.cnn3(out)
    out=self.relu3(out)

    #conv4
    out=self.cnn4(out)
    out=self.relu4(out)

    #conv3
    out=self.cnn5(out)
    out=self.relu5(out)

    #max pooling 5
    out=self.maxpool5(out)

    # resize
    #print(out.shape)
    out=out.view(x.size(0),-1)

    # fc 1
    out=self.dropout(out)
    out=self.fc1(out)
    out=self.relu6(out)

    # fc 2
    out=self.dropout(out)
    out=self.fc2(out)
    out=self.relu7(out)

    # fc 3
    out=self.fc3(out)

    return out


# Train & Test your own code

model=Alexnet()
model.to(device)

criterion=nn.CrossEntropyLoss()

learning_rate=0.01
optimizer= torch.optim.SGD(model.parameters(), lr=learning_rate,momentum=0.9,weight_decay=0.0005)

def train(model, loader, criterion, optimizer,device):
  model.train()
  for inputs, targets in loader:
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs,targets)
    loss.backward()
    optimizer.step()


def test(model, loader, criterion, device, epoch):
  model.eval()
  total, correct, tot_loss =0,0,0
  with torch.no_grad():
    for inputs, targets in loader:
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = model(inputs)
      loss = criterion(outputs,targets)
      tot_loss += loss.item()
      _, predicted = torch.max(outputs.data, 1)
      total += targets.size(0)
      correct += (predicted == targets).sum().item()

  accuracy = 100 * correct / total

  print('Epoch: {}. Loss: {}. Accuracy: {}'.format(epoch,tot_loss, accuracy))


num_epochs=10

for epoch in range(num_epochs):
  train(model, train_loader, criterion, optimizer, device)
  test(model, test_loader, criterion, device, epoch)

Epoch: 0. Loss: 363.77926874160767. Accuracy: 1.4
Epoch: 1. Loss: 357.3419985771179. Accuracy: 2.19
Epoch: 2. Loss: 324.9289002418518. Accuracy: 5.75
Epoch: 3. Loss: 306.14215660095215. Accuracy: 10.3
Epoch: 4. Loss: 286.4503881931305. Accuracy: 13.73
Epoch: 5. Loss: 267.70901322364807. Accuracy: 18.51
Epoch: 6. Loss: 250.10759949684143. Accuracy: 23.26
Epoch: 7. Loss: 230.69673943519592. Accuracy: 27.25
Epoch: 8. Loss: 216.24664449691772. Accuracy: 30.6
Epoch: 9. Loss: 199.9049003124237. Accuracy: 34.51


# Q2. Wiener Deconvolution

### Implementing Wiener deconvolution hyperparameter tuning

### Get Dataset

In [None]:
!pip install gdown
!gdown --fuzzy https://drive.google.com/file/d/1ATQh01anBo8ov--1ge8NWJM1Azl4HwfA/view?usp=sharing
!unzip diffusercam_dataset.zip

In [None]:
!pip install lpips
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import torchvision
from torch import optim, nn
from torch.utils.data import Dataset, DataLoader, Subset
import lpips
from IPython.display import display, clear_output
import cv2
import numpy as np
import matplotlib.pyplot as plt

class LossFunction(nn.Module):
    """Loss function class for multiple loss function."""
    def __init__(self):
        super().__init__()
        self.criterion_mse = nn.MSELoss()
        self.criterion_lpips = lpips.LPIPS(net='vgg')

    def forward(self, output, label, epoch=0):
        lpips_loss = torch.mean(self.criterion_lpips.to(output.device)(output, label))
        mix_loss = self.criterion_mse(output, label)
        loss = mix_loss * 1 + lpips_loss * 0.05
        return loss

class WallerDataset(Dataset):
    def __init__(self, path, train=False, transform_raw=None, transform_lab=None):
        self.path = path
        self.transform_raw = transform_raw
        self.transform_lab = transform_lab

    def __len__(self):
        return 100

    def __getitem__(self, idx):
        raw_path = (self.path + '/raw' + '/' + f'im{idx+2}.npy')
        lab_path = (self.path + '/label' + '/' + f'im{idx+2}.npy')
        raw = np.load(raw_path)
        raw = cv2.cvtColor(raw, cv2.COLOR_BGR2RGB)

        lab = np.load(lab_path)
        lab = cv2.cvtColor(lab, cv2.COLOR_BGR2RGB)

        if self.transform_raw is not None:
            raw = self.transform_raw(raw)
        if self.transform_lab is not None:
            lab = self.transform_lab(lab)

        return raw, lab

def PSNR(img1, img2):
    mse = torch.mean((img1 - img2) ** 2, dim=(1,2,3), keepdim=True)
    if torch.mean(mse) == 0:
        return "Same Image"
    return torch.mean(10 * torch.log10(1. / mse))

In [None]:
def gaussian_window(size, fwhm):
    with torch.no_grad():
        sigma = size / fwhm
        x = torch.arange(size) - (size - 1) / 2
        gauss = torch.exp(-0.5 * (x / sigma) ** 2)
    return gauss.detach()

def gaus_t(x, fwhm=2):
    b, c, w, h = x.size()
    device = x.device
    dtype = x.dtype

    ga_w = gaussian_window(w, fwhm)
    ga_h = gaussian_window(h, fwhm)

    # 외적을 이용하여 2D 가우시안 윈도우 생성
    ga = ga_w.unsqueeze(1) * ga_h.unsqueeze(0)
    ga = ga.unsqueeze(0).unsqueeze(0)  # (1, 1, w, h)

    return x * ga.to(x.device)


class Wiener(nn.Module):
    def __init__(self, psf, height, width, height_p, width_p, weights=0.01):
        super(Wiener, self).__init__()
        self.height_freq = height + height_p
        self.width_freq = (width + width_p)// 2 + 1

        # Set parameters as learnable
        self.psf = psf
        self.alpha = 0.1
        self.delta = weights

    def forward(self, raw: torch.Tensor, epsilon=1e-6) -> torch.Tensor:
        psf = self.psf
        B, C, H, W = raw.shape
        _, _, H_p, W_p = psf.shape
        psf = psf.reshape(1,-1,psf.size(-2),psf.size(-1))
        psf_sum = psf.sum(dim=(-3, -2, -1), keepdim=True)          # (K, C, 1, 1)
        psf_normalized = psf / (abs(psf_sum) * self.alpha + 1e-12)  # Prevent division by zero

        # Apply symmetric padding to raw input
        raw_padded = F.pad(
            raw,
            (W_p // 2, W_p - W_p // 2, H_p // 2, H_p - H_p // 2),
            mode='replicate'
        )

        raw_padded = gaus_t(raw_padded, fwhm=2)

        psf_padded = F.pad(
            psf_normalized,
            (W // 2, W - W // 2, H // 2, H - H // 2),
            mode='constant'
        )

        raw_fft = torch.fft.rfft2(raw_padded, dim=(-2, -1))  # Shape: (B, C, H_freq, W_freq)
        psf_fft = torch.fft.rfft2(psf_padded, s=(raw_padded.size(-2), raw_padded.size(-1)), dim=(-2, -1))  # Shape: (B, C, H_freq, W_freq)

        wiener_filter = psf_fft.conj() / (psf_fft.abs()**2 + epsilon + self.delta)
        out_fft = raw_fft * wiener_filter  # (B, C, H, W//2+1)

        out_spatial = torch.fft.irfft2(out_fft, dim=(-2, -1))
        out_spatial = torch.fft.ifftshift(out_spatial, dim=(-2, -1))
        start_H = H_p // 2
        start_W = W_p // 2
        out_cropped = out_spatial[..., start_H:start_H + H, start_W:start_W + W]  # Shape: (K, B, C, H, W)

        return out_cropped.real

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

batch_size = 1
channels   = 1
height, width     = 270, 480    # 원본 이미지 크기
height_p, width_p = 270, 480    # PSF 커널 크기
alpha      = 1                  # Wiener weight (alpha)

psf = cv2.imread('./diffusercam_dataset/psf.tiff', 0)
raw = np.load('./diffusercam_dataset/raw/im65.npy')
img = np.load('./diffusercam_dataset/label/im65.npy')

psf = cv2.resize(psf, (480,270))
raw = cv2.cvtColor(raw, cv2.COLOR_BGR2RGB)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

raw = torch.tensor(raw).permute(2,0,1).unsqueeze(0).to(device) # raw: (B, C, H, W)
psf = torch.tensor(psf).unsqueeze(0).unsqueeze(0).to(device)
img = torch.tensor(img).permute(2,0,1).unsqueeze(0).to(device)
psf = psf / 255.
psf_bg = torch.mean(psf[:,:,0 : 15, 0 : 15])
psf = psf-psf_bg
psf[psf<0] = 0


model = Wiener(
    psf = psf,
    height=height,
    width=width,
    height_p=height_p,
    width_p=width_p,
    weights=alpha,
).to(device)

out = model(raw)
out = out / out.max()
out[out<0] = 0
print("Output shape:", out.shape)

plt.imshow(img[0].permute(1,2,0).cpu().detach())
plt.show()
plt.imshow(raw[0].permute(1,2,0).cpu().detach())
plt.show()
plt.imshow(out[0].permute(1,2,0).cpu().detach())
plt.show()

out_before_learn_ = out

In [None]:
# Load dataset
transformer = transforms.Compose([
    transforms.ToTensor(),
])

trainset_load = WallerDataset('./diffusercam_dataset', train=True, transform_raw=transformer, transform_lab=transformer)

trainset_loader = torch.utils.data.DataLoader(
    trainset_load,
    batch_size=16,
    shuffle=True,
    num_workers=1,
    pin_memory=False,
)

testset_loader = torch.utils.data.DataLoader(
    trainset_load,
    batch_size=1,
    shuffle=False,
    num_workers=1,
    pin_memory=False,
)

# Initialize model


# Train & Test your own code