In [18]:
import os
import time
import pandas as pd
from typing import List, Tuple

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

from PIL import Image

import torchvision
from torchvision import transforms
from torchvision.models import resnet18, ResNet18_Weights

In [19]:
CSV_PATH = "training_data.csv"   # path to your CSV
IMAGE_PATH_COL = "full_image_path"

BATCH_SIZE = 32
NUM_EPOCHS = 5
VAL_SPLIT = 0.2
LEARNING_RATE = 1e-4
NUM_WORKERS = 0  # keep 0 for Windows, can increase (e.g. 4) on Linux

In [None]:
class GlyphDataset(Dataset):
    """
    Simple multi-label dataset for your codex glyphs.
    - Expects a DataFrame with:
        - one column with the image path (IMAGE_PATH_COL)
        - several columns (label_columns) with 0/1 multi-hot labels.
    """

    def __init__(self, df: pd.DataFrame, label_columns: List[str], transform=None):
        self.df = df.reset_index(drop=True)
        self.label_columns = label_columns
        self.transform = transform

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        row = self.df.iloc[idx]

        img_path = row[IMAGE_PATH_COL]
        # Ensure it's a string (avoid issues if there are NaNs)
        img_path = str(img_path)

        # Load image
        image = Image.open(img_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        # Multi-label target as float16 tensor
        print("LABELS: ", row[self.label_columns].values)
        labels = torch.tensor(row[self.label_columns].values, dtype=torch.float16)

        return image, labels

In [21]:
def build_model(num_labels: int) -> nn.Module:
    """
    Build a ResNet18 model with a new classification head for multi-label outputs.
    """
    weights = ResNet18_Weights.IMAGENET1K_V1
    model = resnet18(weights=weights)

    # Replace final FC layer with num_labels outputs
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_labels)

    return model

In [22]:
def train_one_epoch(
    model: nn.Module,
    dataloader: DataLoader,
    criterion: nn.Module,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
    epoch: int
) -> float:
    model.train()
    running_loss = 0.0

    for batch_idx, (images, targets) in enumerate(dataloader):
        images = images.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        optimizer.zero_grad()

        logits = model(images)  # shape: [B, num_labels]
        loss = criterion(logits, targets)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Simple console log every N batches
        if (batch_idx + 1) % 20 == 0:
            avg_loss = running_loss / (batch_idx + 1)
            print(f"[Epoch {epoch:02d} | Batch {batch_idx+1:04d}] "
                  f"Train loss: {avg_loss:.4f}")

    epoch_loss = running_loss / len(dataloader)
    return epoch_loss

In [23]:
@torch.no_grad()
def evaluate(
    model: nn.Module,
    dataloader: DataLoader,
    criterion: nn.Module,
    device: torch.device
) -> Tuple[float, float]:
    """
    Very simple evaluation:
      - returns average loss
      - and a rough multi-label accuracy:
        proportion of (label-wise) correct predictions
        with threshold 0.5 on sigmoid outputs.
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, targets in dataloader:
        images = images.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)

        logits = model(images)
        loss = criterion(logits, targets)
        running_loss += loss.item()

        # Multi-label prediction: sigmoid + threshold
        probs = torch.sigmoid(logits)
        preds = (probs >= 0.5).float()

        # label-wise accuracy (how many individual labels are correct)
        correct += (preds == targets).sum().item()
        total += targets.numel()

    avg_loss = running_loss / len(dataloader)
    label_accuracy = correct / total if total > 0 else 0.0

    return avg_loss, label_accuracy

In [24]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Optional: set matmul precision (useful on recent PyTorch)
if hasattr(torch, "set_float32_matmul_precision"):
    torch.set_float32_matmul_precision("medium")

Using device: cuda


In [25]:
# Load CSV
print(f"Loading dataset from: {CSV_PATH}")
df = pd.read_csv(CSV_PATH)

# Identify label columns:
#   In your CSV, first 4 columns are metadata, rest are labels
#   If that changes later, you can define label_columns manually.
all_columns = list(df.columns)
label_columns = all_columns[4:]  # from column index 4 to end

print(f"Found {len(label_columns)} label columns.")
# Basic sanity check
print("Example label columns:", label_columns[:10])

Loading dataset from: training_data.csv
Found 330 label columns.
Example label columns: ['?', '?_02', '?_03', '?_04', '?_05', '?_07', 'acalli', 'acatl', 'acocotli', 'ahuictli']


In [26]:
# Transforms (ImageNet-style for ResNet18)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

# Create whole dataset
full_dataset = GlyphDataset(df, label_columns=label_columns, transform=transform)

# Split into train/val
num_samples = len(full_dataset)
num_val = int(VAL_SPLIT * num_samples)
num_train = num_samples - num_val

train_dataset, val_dataset = random_split(
    full_dataset,
    [num_train, num_val],
    generator=torch.Generator().manual_seed(42),
)

print(f"Dataset size: {num_samples} samples")
print(f"Train: {num_train} | Val: {num_val}")

Dataset size: 4175 samples
Train: 3340 | Val: 835


In [27]:
# DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

In [28]:
# Model, loss, optimizer
num_labels = len(label_columns)
model = build_model(num_labels=num_labels).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [29]:
# ---- Training loop ----
best_val_loss = float("inf")

for epoch in range(1, NUM_EPOCHS + 1):
    start_time = time.time()

    train_loss = train_one_epoch(
        model, train_loader, criterion, optimizer, device, epoch
    )
    val_loss, val_label_acc = evaluate(
        model, val_loader, criterion, device
    )

    elapsed = time.time() - start_time

    print(f"Epoch {epoch:02d} | "
            f"Train loss: {train_loss:.4f} | "
            f"Val loss: {val_loss:.4f} | "
            f"Val label-wise acc: {val_label_acc:.4f} | "
            f"Time: {elapsed:.1f}s")

    # Simple best-checkpoint logic (placeholder for future saving)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "val_loss": val_loss,
                "label_columns": label_columns,
            },
            "best_resnet18_codex.pt",
        )
        print("  -> New best model saved to best_resnet18_codex.pt")

print("Training finished.")


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.