In [5]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, WeightedRandomSampler, RandomSampler
import pandas as pd
import os
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models import Inception3, Inception_V3_Weights
from torch.nn.functional import softmax, cross_entropy, mse_loss
from torch.optim import Adam
import matplotlib.pyplot as plt
import numpy as np

# Set the precision to medium
torch.set_float32_matmul_precision("medium")

import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor

# from torch.utils.tensorboard import SummaryWriter

from customdataset import CustomDataset
from fullhand_model_lghtng import ResNet50
from fullhand_model_inception_lghtng import Inception

# define path to the folder and csv file
img_folder_path = "./data/fh_train"
csv_file_path = "./data/train.csv"

In [12]:
transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),
        transforms.RandomHorizontalFlip(p=0.2),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
        transforms.RandomAdjustSharpness(sharpness_factor=1.5),
        transforms.RandomRotation(degrees=(-20, 20)),
        transforms.RandomCrop(size=(500, 500)),
        transforms.Resize((299, 299)),
        transforms.ToTensor(),
        transforms.Normalize((0.14), (0.18)),
    ]
)

# create instance of custom dataset
full_dataset = CustomDataset(img_folder_path, csv_file_path, transform=transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size]
)

In [13]:
# create dataloader for the dataset
batch_size = 64
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)
val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False
)

In [14]:
images, labels = next(iter(train_loader))
print(torch.min(images), torch.max(images))

tensor(-0.7778) tensor(4.7778)


In [19]:
class Autoencoder(nn.Module):
    def __init__(self):
        super().__init__()        
        # N, 1, 28, 28
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1), # -> N, 16, 14, 14
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1), # -> N, 32, 7, 7
            nn.ReLU(),
            nn.Conv2d(32, 64, 7) # -> N, 64, 1, 1
        )
        
        # N , 64, 1, 1
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 7), # -> N, 32, 7, 7
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1), # N, 16, 14, 14 (N,16,13,13 without output_padding)
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1), # N, 1, 28, 28  (N,1,27,27)
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [20]:
model = Autoencoder()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=1e-3, 
                             weight_decay=1e-5)

In [21]:
# Point to training loop video
num_epochs = 1
outputs = []
for epoch in range(num_epochs):
    for (img, _) in train_loader:
        # img = img.reshape(-1, 28*28) # -> use for Autoencoder_Linear
        recon = model(img)
        loss = criterion(recon, img)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch:{epoch+1}, Loss:{loss.item():.4f}')
    outputs.append((epoch, img, recon))

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (300) must match the size of tensor b (299) at non-singleton dimension 3