In [1]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->efficientnet_pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->efficientnet_pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->efficientnet_pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metada

In [25]:
import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
import os
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
import torch.optim as optim
import zipfile
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# about 3 min
import zipfile

zip_path = "/content/drive/MyDrive/archive.zip"
extract_path = "/content/archive_unzipped"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [17]:
class BoneAgeDataset(Dataset):
    def __init__(self, csv_path, image_dir, transform=None):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.transform = transform
        self.sex_map = {"True": 0, "False": 1}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, str(row['id']))
        img_path_ext = img_path + ".png"
        image = Image.open(img_path_ext).convert("RGB")
        if self.transform:
            image = self.transform(image)

        #print(self.sex_map[str(row["male"])])
        sex = self.sex_map[str(row["male"])]
        age = row['boneage']

        return image, torch.tensor(sex), torch.tensor(age).float(), str(row['id'])

In [51]:
# do not use ATM this class
class TestBoneAgeDataset(Dataset):
    def __init__(self, csv_path, image_dir, transform=None):
        self.df = pd.read_csv(csv_path)
        self.image_dir = image_dir
        self.transform = transform
        self.sex_map = {"TRUE": 0, "FALSE": 1}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, str(row['id']))
        print(f"IMMAGINE: {img_path}")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        #print(self.sex_map[str(row["male"])])
        sex = self.sex_map[str(row["male"]).upper()]

        return image, torch.tensor(sex), str(row['id'])

In [7]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [20]:
class BoneAgeRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = EfficientNet.from_pretrained('efficientnet-b0')
        for param in self.backbone.parameters():
            param.requires_grad = False

        self.backbone._fc = nn.Identity()
        self.fc = nn.Sequential(
            nn.Linear(1280 + 1, 128),  # 1280 = output di EfficientNet-B0
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1)  # Output: età in mesi
        )

    def forward(self, image, sex):
        x = self.backbone(image)
        sex = sex.view(-1, 1).float()
        x = torch.cat([x, sex], dim=1)
        return self.fc(x)

In [21]:
dataset = BoneAgeDataset(csv_path="/content/archive_unzipped/boneage-training-dataset.csv",
                         image_dir="/content/archive_unzipped/boneage-training-dataset/boneage-training-dataset",
                         transform=transform)

total_size = len(dataset)
train_size = int(0.8 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, num_workers=2, pin_memory=True)

In [22]:
def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=10, save_path='best_model.pth', patience=3):
    best_val_loss = float('inf')
    train_losses, val_losses = [], []
    patience_counter = 0

    print(f"inizio loop epoch")
    for epoch in range(num_epochs):
        print(f"dentro loop epoch")
        model.train()
        running_loss = 0.0
        for images, sexes, ages, _ in train_loader:
            images, sexes, ages = images.to(device), sexes.to(device), ages.to(device).view(-1, 1)
            optimizer.zero_grad()
            loss = criterion(model(images, sexes), ages)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
        train_loss = running_loss / len(train_loader.dataset)
        train_losses.append(train_loss)

        print(f"inizio eval")
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, sexes, ages in val_loader:
                images, sexes, ages = images.to(device), sexes.to(device), ages.to(device).view(-1, 1)
                val_loss += criterion(model(images, sexes), ages).item() * images.size(0)
        val_loss = val_loss / len(val_loader.dataset)
        val_losses.append(val_loss)

        print(f"Epoch {epoch+1}: Train MAE {train_loss:.2f}, Val MAE {val_loss:.2f}")
        if val_loss < best_val_loss:
            print("Saving best model...")
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), save_path)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break

    # Plot
    plt.plot(train_losses, label="Train MAE")
    plt.plot(val_losses, label="Val MAE")
    plt.legend()
    plt.xlabel("Epoch")
    plt.ylabel("MAE")
    plt.grid()
    plt.show()

In [28]:
def predict_test(model, test_loader, output_csv='submission.csv'):
    model.eval()
    results = []
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, sexes, ages, ids in test_loader:
            images, sexes = images.to(device), sexes.to(device)
            preds = model(images, sexes).cpu().numpy()
            y_pred.extend(preds)
            y_true.extend(ages)
            for id, pred in zip(ids, preds):
                results.append((id, round(float(pred), 2)))

    import csv
    with open(output_csv, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['id', 'boneage'])
        for row in results:
            writer.writerow(row)

    print(f"Predizioni salvate in: {output_csv}")

    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred) ** 0.5
    r2 = r2_score(y_true, y_pred)

    print(f"Metriche di valutazione:")
    print(f"MAE:  {mae:.2f} mesi")
    print(f"RMSE: {rmse:.2f} mesi")
    print(f"R²:   {r2:.3f}")

In [37]:
def predict_submission(model, submission_loader, output_csv='submission.csv'):
    model.eval()
    results = []

    with torch.no_grad():
        for images, sexes, ids in submission_loader:
            images, sexes = images.to(device), sexes.to(device)
            preds = model(images, sexes).cpu().numpy()
            for id, pred in zip(ids, preds):
                results.append((id, round(float(pred), 2)))

    import csv
    with open(output_csv, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['id', 'boneage'])
        for row in results:
            writer.writerow(row)

    print(f"Predizioni salvate in: {output_csv}")

In [12]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# Model
model = BoneAgeRegressor().to(device)
print(f"modello inizializzato")

# Loss e ottimizzatore
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Allenamento
train_model(model, train_loader, val_loader, criterion, optimizer)

Device: cuda


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 101MB/s] 


Loaded pretrained weights for efficientnet-b0
modello inizializzato
inizio loop epoch
dentro loop epoch
inizio eval
Epoch 1: Train MAE 56.79, Val MAE 36.43
Saving best model...
dentro loop epoch
inizio eval
Epoch 2: Train MAE 35.92, Val MAE 30.79
Saving best model...
dentro loop epoch
inizio eval
Epoch 3: Train MAE 31.69, Val MAE 27.09
Saving best model...
dentro loop epoch
inizio eval
Epoch 4: Train MAE 29.57, Val MAE 24.71
Saving best model...
dentro loop epoch
inizio eval
Epoch 5: Train MAE 27.91, Val MAE 23.49
Saving best model...
dentro loop epoch
inizio eval
Epoch 6: Train MAE 27.04, Val MAE 23.09
Saving best model...
dentro loop epoch
inizio eval
Epoch 7: Train MAE 26.17, Val MAE 22.45
Saving best model...
dentro loop epoch
inizio eval
Epoch 8: Train MAE 25.55, Val MAE 21.65
Saving best model...
dentro loop epoch
inizio eval
Epoch 9: Train MAE 24.89, Val MAE 20.86
Saving best model...
dentro loop epoch
inizio eval
Epoch 10: Train MAE 24.40, Val MAE 21.82


AttributeError: module 'matplotlib' has no attribute 'plot'

In [29]:
# Carica miglior modello e predici
model.load_state_dict(torch.load("best_model.pth"))
predict_test(model, test_loader)

  results.append((id, round(float(pred), 2)))


Predizioni salvate in: submission.csv
Metriche di valutazione:
MAE:  19.47 mesi
RMSE: 24.94 mesi
R²:   0.624


In [52]:
submission_dataset = TestBoneAgeDataset(csv_path="/content/drive/MyDrive/test/test.csv",
                         image_dir="/content/drive/MyDrive/test",
                         transform=transform)
submission_loader = DataLoader(submission_dataset, batch_size=32)
model.load_state_dict(torch.load("best_model.pth"))
predict_submission(model, submission_loader)

IMMAGINE: /content/drive/MyDrive/test/1.png
IMMAGINE: /content/drive/MyDrive/test/2.png
IMMAGINE: /content/drive/MyDrive/test/3.png
IMMAGINE: /content/drive/MyDrive/test/4.png
IMMAGINE: /content/drive/MyDrive/test/5.png
IMMAGINE: /content/drive/MyDrive/test/6.png
IMMAGINE: /content/drive/MyDrive/test/7.png
IMMAGINE: /content/drive/MyDrive/test/8.png
IMMAGINE: /content/drive/MyDrive/test/9.png
IMMAGINE: /content/drive/MyDrive/test/10.png
IMMAGINE: /content/drive/MyDrive/test/11.png
IMMAGINE: /content/drive/MyDrive/test/12.png
IMMAGINE: /content/drive/MyDrive/test/13.png
IMMAGINE: /content/drive/MyDrive/test/14.png
IMMAGINE: /content/drive/MyDrive/test/15.png
IMMAGINE: /content/drive/MyDrive/test/16.png
IMMAGINE: /content/drive/MyDrive/test/17.png
IMMAGINE: /content/drive/MyDrive/test/18.png
IMMAGINE: /content/drive/MyDrive/test/19.png
IMMAGINE: /content/drive/MyDrive/test/20.png
IMMAGINE: /content/drive/MyDrive/test/21.png
IMMAGINE: /content/drive/MyDrive/test/22.png
IMMAGINE: /content/

  results.append((id, round(float(pred), 2)))
