In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Import necessary libraries
import os
import sys

from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

# Directory path used in Google Colab
# project_dir = '/content/drive/MyDrive/Colab Notebooks/HGCAL/visual-inspection'

# Directory path used in local
project_dir = '../'

current_dir = os.path.join(project_dir, 'autoencoder')
sys.path.append(current_dir)

from data_loading import *
from training import *

# Set the seed
torch.manual_seed(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Path to the datasets folder
DATASET_PATH = os.path.join(project_dir, 'datasets')
CHECKPOINT_PATH = os.path.join(current_dir, 'small_ae.pt')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
print(CHECKPOINT_PATH)

../autoencoder\small_ae.pt


In [4]:
# Read in the image
image = Image.open(os.path.join(DATASET_PATH, 'unperturbed_data', 'good_hexaboard.png'))

# Get the height and width of the image
width, height = image.size
print('Image width:', width)
print('Image height:', height)

Image width: 1100
Image height: 943


In [5]:
# Adjust the number of segments
# THIS SHOULD WORK WITH THE GUI
NUM_VERTICAL_SEGMENTS = 20
NUM_HORIZONTAL_SEGMENTS = 12

# Define the transformations
transform = transforms.Compose([
    RotationAndSegmentationTransform(
        height=height,
        width=width,
        vertical_segments=NUM_VERTICAL_SEGMENTS,
        horizontal_segments=NUM_HORIZONTAL_SEGMENTS
    ),
    transforms.RandomRotation(degrees=2),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
])

# Read in and process the iamges
train_dataset = HexaboardDataset(
    image_dir=os.path.join(DATASET_PATH, 'unperturbed_data'),
    transform=transform
)

val_dataset = HexaboardDataset(
    image_dir=os.path.join(DATASET_PATH, 'perturbed_data'),
    transform=transform
)

# Set the batch and chunk size
batch_size = 1
chunk_size = 12

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Print some information about the data
print(f'Train dataset size: {len(train_dataset)}')
print(f'Validation dataset size: {len(val_dataset)}')
# print(f'Test dataset size: {len(test_dataset)}')
print(f'Segments Shape: {train_dataset[0].shape}')
print(f'Image shape: {train_dataset[0][0].shape}')
print(f'Image tensor type: {train_dataset[0][0].dtype}')
print(f'Batches: {len(train_loader)}')

Train dataset size: 1
Validation dataset size: 2
Segments Shape: torch.Size([720, 3, 47, 45])
Image shape: torch.Size([3, 47, 45])
Image tensor type: torch.float32
Batches: 1


In [6]:
# Get the segments' height and width
segment_height = train_dataset[0][0][0].shape[0]
segment_width = train_dataset[0][0][0].shape[1]
print('Segment height:', segment_height)
print('Segment width:', segment_width)

Segment height: 47
Segment width: 45


In [7]:
# Initialize the model
cnn_ae = ConvAutoEncoder(
    height=segment_height,
    width=segment_width,
    latent_dim=128,
    kernel_sizes=[32, 64]
)
cnn_ae.to(device)

ConvAutoEncoder(
  (encoder): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
    (3): ReLU(inplace=True)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=7040, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=7040, bias=True)
  (unflatten): Unflatten(dim=1, unflattened_size=(64, 11, 10))
  (decoder): Sequential(
    (0): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(2, 2), output_padding=(0, 1))
    (1): ReLU(inplace=True)
    (2): ConvTranspose2d(32, 3, kernel_size=(3, 3), stride=(2, 2))
  )
)

In [8]:
optimizer = optim.Adam(cnn_ae.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
criterion = nn.BCEWithLogitsLoss()

# Train the model
history, cnn_ae = train_autoencoder(
    model=cnn_ae,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=val_loader,
    scheduler=scheduler,
    num_epochs=20,
    save_path=CHECKPOINT_PATH
)

Epoch 1/20:   0%|          | 0/1 [00:00<?, ?batch/s]

RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[60, 4, 47, 45] to have 3 channels, but got 4 channels instead

In [None]:
# View the training progress
plot_metrics(history)

In [None]:
# Load the model's weights
cnn_ae.load_state_dict(torch.load(CHECKPOINT_PATH, map_location=device))

In [None]:
# # Evaluate the model
# evaluate_autoencoder(
#     model=cnn_ae,
#     criterion=criterion,
#     test_loader=test_loader,
#     num_images=10,
#     chunk_size=chunk_size,
#     visualize=True
# )