In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.datasets import CIFAR10


# Define the FCN-Siamese model
class FCNSiamese(nn.Module):
    def __init__(self):
        super(FCNSiamese, self).__init__()
        # TODO: Define our FCN layers
        self.fcn = nn.Conv2d(3, 3, kernel_size=3)
        
    def forward(self, x1, x2):
        # TODO: Implement the forward pass of the FCN-Siamese model
        output1 = self.fcn(x1)
        output2 = self.fcn(x2)
        return output1, output2

# Define the training loop
def train(model, train_loader, criterion, optimizer):
    model.train()
    
    for batch_idx, (lr_images, hr_images) in enumerate(train_loader):
        lr_images = lr_images.to(device)
        hr_images = hr_images.to(device)
        
        optimizer.zero_grad()
        
        # Generate super-resolved images using the model
        sr_images, _ = model(lr_images, lr_images)
        
        # Resize super-resolved and high-resolution images to the same size
        print(lr_images.size(), hr_images.size(), sr_images.size())
        sr_images = nn.functional.interpolate(sr_images, size=hr_images.size()[0:], mode='bilinear', align_corners=False)
        
        # Compute the loss between super-resolved images and ground truth high-resolution images
        loss = criterion(sr_images, hr_images)
        
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f"Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the FCN-Siamese model
model = FCNSiamese().to(device)

# Define loss function
criterion = nn.MSELoss()

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define transforms and load the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((32, 32)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = CIFAR10(root='dataset/', train=True, download=True, transform=transform)

# TODO: Create dataset of format where for each index, we have 2 images, 
# each of same image, but different resolution and maybe rotation
train_loader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train(model, train_loader, criterion, optimizer)


Files already downloaded and verified
Epoch 1/10
torch.Size([64, 3, 32, 32]) torch.Size([64]) torch.Size([64, 3, 30, 30])


ValueError: size shape must match input shape. Input is 2D, size is 1