In [1]:
from helper import  draw_batch
import torch
from dataset import AarizDataset
from torch.utils.data import DataLoader
from CephNet import CephNet


print(f"Device: {torch.cuda.get_device_name(0)}")
print(f"Compute Capability: {torch.cuda.get_device_capability(0)}")
print(f"PyTorch Version: {torch.__version__}")
print(f"PyTorch CUDA: {torch.version.cuda}")

path = "Aariz Dataset"#
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model  = CephNet()

model.to(device)
batch_size = 5

training_data = AarizDataset( path ,"TRAIN")
train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle=True)

for (images, landmarks) in train_dataloader:
    images = images.to(device)
    print(f"Shape of Image Batch: {images.shape}")
    print(f"Shape of Landmark Batch: {landmarks.shape} {landmarks.dtype}")
    # draw_batch(images, landmarks)
    output =model(images)
    print(f"Shape of Output batch: {output.shape}")
    break



Device: NVIDIA GeForce RTX 5080 Laptop GPU
Compute Capability: (12, 0)
PyTorch Version: 2.10.0+cu128
PyTorch CUDA: 12.8
Shape of Image Batch: torch.Size([5, 3, 512, 512])
Shape of Landmark Batch: torch.Size([5, 29, 2]) torch.float32
Shape of Output batch: torch.Size([5, 29, 2])


In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from dataset import AarizDataset
from CephNet import CephNet,train,validate,test
from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = CephNet()
model.to(device)
model.load_state_dict(torch.load("best_model_epoch_95.pth"))


path = "Aariz Dataset"

batch_size = 5

training_data = AarizDataset( path ,"TRAIN")
val_data = AarizDataset( path ,"VALID")
test_data = AarizDataset( path ,"TEST")

train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size,shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size,shuffle=True)


num_epochs = 300
loss_fn = nn.SmoothL1Loss(beta=1.0)
optimizer = optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=7, factor=0.1)

checkpoint = torch.load("last_checkpoint.pth")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
best_euclid = checkpoint['best_euclid']




# float("inf")

file_name = "best_model_epoch_95.pth"

for epoch in range (152,num_epochs+1):
    print(f"\n--- Epoch {epoch}/{num_epochs} ---")

    # 1. TRAIN
    # Returns (epoch_loss, avg_euclid)
    train_loss, train_euclid = train(model, train_dataloader, loss_fn, optimizer, device, epoch)

    # 2. VALIDATE
    # Returns (epoch_loss, avg_euclid)
    val_loss, val_euclid = validate(model, val_dataloader, loss_fn, device, epoch)

    # 3. UPDATE SCHEDULER
    # ReduceLROnPlateau watches the Validation Loss
    scheduler.step(val_loss)

    # 4. Check for overfitting
    if val_euclid < best_euclid:
        best_euclid = val_euclid
        file_name = f"best_model_epoch_{epoch}.pth"
        torch.save(model.state_dict(), file_name)
        print(f"⭐ New Best Model Saved! (Best MRE: {best_euclid:.6f})")

    checkpoint = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(), # This is the missing piece!
    'best_euclid': best_euclid
}
    torch.save(checkpoint, "last_checkpoint.pth")

# ==========================================
# FINAL TEST PHASE (After Training Finishes)
# ==========================================
print("\n" + "="*30)
print("TRAINING COMPLETE. RUNNING FINAL TEST...")

# Load the weights from the BEST performing epoch
model.load_state_dict(torch.load(file_name))

# Run the test function on the unseen Test Set
final_loss, final_mre = test(model, test_dataloader, loss_fn, device, "FINAL")

#79











--- Epoch 152/300 ---
Epoch: 152 | Average Training Loss: 0.000002
Epoch: 152 | Average Euclidean Distance: 0.0026
Epoch: 152 | Average Val Loss: 0.000070
Epoch: 152 | Average Euclidean Distance: 0.0139

--- Epoch 153/300 ---
Epoch: 153 | Average Training Loss: 0.000002
Epoch: 153 | Average Euclidean Distance: 0.0026
Epoch: 153 | Average Val Loss: 0.000071
Epoch: 153 | Average Euclidean Distance: 0.0139

--- Epoch 154/300 ---
Epoch: 154 | Average Training Loss: 0.000002
Epoch: 154 | Average Euclidean Distance: 0.0027
Epoch: 154 | Average Val Loss: 0.000068
Epoch: 154 | Average Euclidean Distance: 0.0138

--- Epoch 155/300 ---
Epoch: 155 | Average Training Loss: 0.000002
Epoch: 155 | Average Euclidean Distance: 0.0026
Epoch: 155 | Average Val Loss: 0.000072
Epoch: 155 | Average Euclidean Distance: 0.0140

--- Epoch 156/300 ---
Epoch: 156 | Average Training Loss: 0.000002
Epoch: 156 | Average Euclidean Distance: 0.0026
Epoch: 156 | Average Val Loss: 0.000068
Epoch: 156 | Average Euclide

In [10]:
print(f"Device: {torch.cuda.get_device_name(0)}")
print(f"Compute Capability: {torch.cuda.get_device_capability(0)}")
print(f"PyTorch Version: {torch.__version__}")
print(f"PyTorch CUDA: {torch.version.cuda}")

path = "Aariz Dataset"#
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model  = CephNet()

model.to(device)
batch_size = 5

model.load_state_dict(torch.load("best_model_epoch_195.pth"))

test_data = AarizDataset( path ,"TEST")
test_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle=True)

for (images,_) in test_dataloader:
    images = images.to(device)
    output =model(images)
    draw_batch(images, output)
    print(f"Shape of Output batch: {output.shape}")
    break


Device: NVIDIA GeForce RTX 5080 Laptop GPU
Compute Capability: (12, 0)
PyTorch Version: 2.10.0+cu128
PyTorch CUDA: 12.8
Shape of Output batch: torch.Size([5, 29, 2])
