### EfficientNet without sex

Load packages

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import copy
from tqdm import tqdm
import timm

Load Data

In [2]:
csv_path = '/Users/yuhsuanko/Desktop/UChicago/UChicago_Q3/ML_II/Final_Project/BMI/Data/data.csv'
img_dir = '/Users/yuhsuanko/Desktop/UChicago/UChicago_Q3/ML_II/Final_Project/BMI/Data/Images'

df = pd.read_csv(csv_path)
df['sex'] = df['gender'].map({'Male': 0, 'Female': 1})
df = df[df['is_training'] == 1].reset_index(drop=True)

In [3]:
df_train, df_val = train_test_split(df, test_size=0.2, random_state=42)

mean_bmi = df_train['bmi'].mean()
std_bmi = df_train['bmi'].std()
print(f"Training BMI mean: {mean_bmi:.2f}, std: {std_bmi:.2f}")

df_train['bmi_z'] = (df_train['bmi'] - mean_bmi) / std_bmi
df_val['bmi_z'] = (df_val['bmi'] - mean_bmi) / std_bmi

df_train.to_csv("train_split.csv", index=False)
df_val.to_csv("val_split.csv", index=False)

Training BMI mean: 32.53, std: 8.04


Transform and Dataset

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [5]:
class BMIDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.data = pd.read_csv(csv_path).reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        while True:
            try:
                img_path = os.path.join(self.img_dir, self.data.loc[idx, 'name'].strip())
                image = Image.open(img_path).convert("RGB")
                if self.transform:
                    image = self.transform(image)
                bmi_z = self.data.loc[idx, 'bmi_z']
                sex = self.data.loc[idx, 'sex']
                return image, torch.tensor(bmi_z, dtype=torch.float32)
            except:
                idx = (idx + 1) % len(self.data)

In [6]:
train_ds = BMIDataset("train_split.csv", img_dir, transform)
val_ds = BMIDataset("val_split.csv", img_dir, transform)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)

EfficientNetV2

In [7]:
import torch
import torch.nn as nn
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights

class EfficientNetBMI_NoSex(nn.Module):
    def __init__(self):
        super().__init__()
        weights = EfficientNet_V2_S_Weights.DEFAULT
        self.backbone = efficientnet_v2_s(weights=weights)
        num_features = self.backbone.classifier[1].in_features
        self.backbone.classifier = nn.Identity()

        self.fc = nn.Sequential(
            nn.BatchNorm1d(num_features),
            nn.Linear(num_features, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        features = self.backbone(x)
        return self.fc(features)


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientNetBMI_NoSex().to(device)

Loss, Optimizer, Evaluate

In [10]:
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

def evaluate(model, val_loader, mean_bmi, std_bmi):
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for images, labels_z in val_loader:
            images = images.to(device)
            labels_z = labels_z.to(device)
            outputs = model(images).squeeze()
            preds.extend(outputs.cpu().numpy())
            targets.extend(labels_z.cpu().numpy())

    preds = np.array(preds) * std_bmi + mean_bmi
    targets = np.array(targets) * std_bmi + mean_bmi

    mse = mean_squared_error(targets, preds)
    mae = mean_absolute_error(targets, preds)
    corr, _ = pearsonr(targets, preds)
    return mse, mae, corr

Training

In [13]:
best_mae = float('inf')
best_model_state = None
epochs = 5
train_losses, val_maes, val_corrs = [], [], []

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels_z in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        images = images.to(device)
        labels_z = labels_z.to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, labels_z)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    val_mse, val_mae, val_corr = evaluate(model, val_loader, mean_bmi, std_bmi)
    train_losses.append(running_loss)
    val_maes.append(val_mae)
    val_corrs.append(val_corr)

    print(f"Epoch {epoch+1} - Loss: {running_loss:.2f}, Val MSE: {val_mse:.2f}, MAE: {val_mae:.2f}, Corr: {val_corr:.3f}")

    if val_mae < best_mae:
        best_mae = val_mae
        best_model_state = copy.deepcopy(model.state_dict())
        torch.save(best_model_state, "best_bmi_model_no_sex.pt")
        print(f"Saved new best model at epoch {epoch+1} with MAE: {val_mae:.2f}")


Epoch 1: 100%|██████████| 85/85 [25:15<00:00, 17.82s/it]


Epoch 1 - Loss: 76.67, Val MSE: 65.55, MAE: 6.19, Corr: 0.164
Saved new best model at epoch 1 with MAE: 6.19


Epoch 2: 100%|██████████| 85/85 [24:50<00:00, 17.54s/it]


Epoch 2 - Loss: 65.43, Val MSE: 57.55, MAE: 5.51, Corr: 0.400
Saved new best model at epoch 2 with MAE: 5.51


Epoch 3: 100%|██████████| 85/85 [24:54<00:00, 17.59s/it]


Epoch 3 - Loss: 58.77, Val MSE: 179.94, MAE: 7.06, Corr: 0.178


Epoch 4: 100%|██████████| 85/85 [24:48<00:00, 17.51s/it]


Epoch 4 - Loss: 47.99, Val MSE: 46.46, MAE: 4.87, Corr: 0.650
Saved new best model at epoch 4 with MAE: 4.87


Epoch 5: 100%|██████████| 85/85 [25:14<00:00, 17.82s/it]


Epoch 5 - Loss: 34.49, Val MSE: 39.47, MAE: 4.69, Corr: 0.636
Saved new best model at epoch 5 with MAE: 4.69


Model testing

In [14]:
model = EfficientNetBMI_NoSex().to(device)
model.load_state_dict(torch.load("/Users/yuhsuanko/Desktop/UChicago/UChicago_Q3/ML_II/Final_Project/best_bmi_model_no_sex.pt"))
model.eval()


EfficientNetBMI_NoSex(
  (backbone): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): FusedMBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (1): B

Use some information from training data

In [18]:
from PIL import Image

def predict_bmi_from_image(image_path,transform, model, mean_bmi, std_bmi):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image).item()

    # De-normalize back to actual BMI
    bmi = output * std_bmi + mean_bmi
    return round(bmi, 2)


In [20]:
bmi = predict_bmi_from_image(
    image_path="/Users/yuhsuanko/Desktop/UChicago/UChicago_Q3/ML_II/Final_Project/BMI/Data/Images/img_4016.bmp",
    transform=transform,
    model=model,
    mean_bmi=mean_bmi,
    std_bmi=std_bmi
)

print(f"Predicted BMI: {bmi}")


Predicted BMI: 35.18
