In [14]:
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import torchvision.transforms as T
from tqdm import tqdm
from PIL import Image
import pandas as pd
import json
from pathlib import Path

In [15]:
import os
N_WORKERS = os.cpu_count()

In [16]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [4]:
from comet_ml import Experiment

N_EPOCHS = 200

# Create an instance of the Experiment class
experiment = Experiment(
    project_name="ResNet50 Piece Counter",  # Replace with your project name
    workspace="cristy17001"  # Replace with your workspace name
)

experiment.set_name("ResNet50 Count Regressor no warping")
experiment.log_parameters({
    "model": "resnet50",
    "optimizer": "AdamW",
    "lr": 1e-4,
    "weight_decay": 1e-4,
    "loss_function": "MSELoss",
    "scheduler": "ReduceLROnPlateau",
    "pretrained": True,
    "patience": 2,
    "batch_size": 64,
    "epochs": N_EPOCHS,
})

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/cristy17001/resnet50-piece-counter/7ea98f3dacac41a48451c531b88466cf



In [17]:
def save_preprocessed_dataset(image_dir, training_inputs_dir, output_file, transform):
    from pathlib import Path
    import torch
    import json
    from PIL import Image

    image_dir = Path(image_dir)
    image_files = sorted([f for f in image_dir.iterdir() if f.is_file()])
    print(f"Preprocessing and saving {len(image_files)} samples...")
    data = []

    for idx, image_path in enumerate(image_files):
        base_name = image_path.name.split('.')[0]  # Get the base name without extension
        json_path = Path(training_inputs_dir) / f"{base_name}.json"
        if not json_path.exists():
            continue

        with open(json_path) as f:
            meta = json.load(f)
            count = meta["piece_count"]
            presence_matrix = meta["presence_matrix"]

        # Convert 8x8 presence matrix to 1D tensor of size 64
        presence_tensor = torch.tensor(presence_matrix, dtype=torch.float32).flatten()
        print(presence_tensor)

        # Load and transform the image
        img = Image.open(image_path).convert("RGB")
        img_tensor = transform(img)

        # Store the tuple (image, presence, count)
        data.append((img_tensor, presence_tensor, torch.tensor(count, dtype=torch.float32)))

        if (idx + 1) % 100 == 0:
            print(f"Processed {idx + 1}/{len(image_files)}")

    torch.save(data, output_file)
    print(f"Saved preprocessed dataset to {output_file}")

In [18]:
save_preprocessed_dataset("../Shared/training_inputs/chessred2k_YOLO/train/images", "../Shared/training_inputs/matrices", "train_data_noWarp.pt", transform=transform)
save_preprocessed_dataset("../Shared/training_inputs/chessred2k_YOLO/test/images", "../Shared/training_inputs/matrices", "test_data_noWarp.pt", transform=transform)
save_preprocessed_dataset("../Shared/training_inputs/chessred2k_YOLO/valid/images", "../Shared/training_inputs/matrices", "val_data_noWarp.pt", transform=transform)

Preprocessing and saving 1442 samples...
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 0., 1.])
tensor([1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
        1., 

In [7]:
class PreloadedDataset(Dataset):
    def __init__(self, tensor_file):
        self.data = torch.load(tensor_file)  # list of (img_tensor, label)
        print(self.data[0][0].shape)
        

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [8]:
train_dataset = PreloadedDataset("./train_data_noWarp.pt")
test_dataset = PreloadedDataset("./test_data_noWarp.pt")
val_dataset = PreloadedDataset("./val_data_noWarp.pt")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)

  self.data = torch.load(tensor_file)  # list of (img_tensor, label)


torch.Size([3, 640, 640])
torch.Size([3, 640, 640])
torch.Size([3, 640, 640])


### Training Model

In [9]:
# Define the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet18(weights=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 1)
model = model.to(device)



In [10]:
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.1, verbose=True)



In [11]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

def evaluate(model, dataloader, criterion):
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            all_preds.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(dataloader)
    rmse = np.sqrt(mean_squared_error(all_labels, all_preds))
    mae = mean_absolute_error(all_labels, all_preds)
    r2 = r2_score(all_labels, all_preds)

    return avg_val_loss, rmse, mae, r2

In [12]:
def train_model(model, train_loader, val_loader, optimizer, criterion, scheduler, num_epochs=10, save_best=True):
    best_val_rmse = float('inf')
    best_model_wts = model.state_dict()

    history = {'train_loss': [], 'val_loss': [], 'val_rmse': [], 'val_mae': [], 'val_r2': []}

    os.makedirs("checkpoints", exist_ok=True)

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        model.train()
        train_loss = 0.0

        for inputs, labels, _ in tqdm(train_loader, desc="Training"):
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        val_loss, rmse, mae, r2 = evaluate(model, val_loader, criterion)

        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Val   Loss: {val_loss:.4f} | RMSE: {rmse:.4f} | MAE: {mae:.4f} | R²: {r2:.4f}")

        experiment.log_metrics({
            "train_loss": avg_train_loss,
            "val_loss": val_loss,
            "val_rmse": rmse,
            "val_mae": mae,
            "val_r2": r2,
            "lr": optimizer.param_groups[0]['lr']
        }, epoch=epoch)

        history['train_loss'].append(avg_train_loss)
        history['val_loss'].append(val_loss)
        history['val_rmse'].append(rmse)
        history['val_mae'].append(mae)
        history['val_r2'].append(r2)

        # Save checkpoint
        checkpoint_path = f"checkpoints/epoch_{epoch+1}.pt"
        torch.save(model.state_dict(), checkpoint_path)
        experiment.log_model(f"model_epoch_{epoch+1}", checkpoint_path)

        # Save best model
        if rmse < best_val_rmse:
            best_val_rmse = rmse
            best_model_wts = model.state_dict()
            torch.save(best_model_wts, "best_count_regressor.pt")
            experiment.log_model("best_model", "best_count_regressor.pt")

        scheduler.step(val_loss)

    model.load_state_dict(best_model_wts)
    return model, history

In [13]:
best_model, history = train_model(
    model,
    train_loader,
    validation_loader,
    optimizer,
    criterion,
    scheduler,
    num_epochs=N_EPOCHS
)


Epoch 1/200


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
Training: 100%|██████████| 46/46 [00:11<00:00,  4.08it/s]


ValueError: too many values to unpack (expected 2)

In [14]:
torch.save(best_model.state_dict(), "best_count_regressor.pt")

In [None]:
# End the experiment
experiment.end()