In [10]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torchvision.models as models

In [11]:
path_to_images = "./BMI/Data/Images"
bmi_data = pd.read_csv("./BMI/Data/data.csv")

In [12]:
import os

image_folder = "./BMI/Data/Images"

# Count total rows before filtering
before = len(bmi_data)

# Keep only rows where the image file exists
bmi_data = bmi_data[bmi_data['name'].apply(lambda fname: os.path.exists(os.path.join(image_folder, fname)))].reset_index(drop=True)

# Count total rows after filtering
after = len(bmi_data)

# 🔍 Report how many were dropped
print(f"✅ Valid image entries: {after}/{before} (Dropped {before - after} missing)")

✅ Valid image entries: 3962/4206 (Dropped 244 missing)


In [13]:
bmi_data.head()

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name
0,0,34.207396,Male,1,img_0.bmp
1,1,26.45372,Male,1,img_1.bmp
2,2,34.967561,Female,1,img_2.bmp
3,3,22.044766,Female,1,img_3.bmp
4,6,25.845588,Female,1,img_6.bmp


In [14]:
class BMIDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.data = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]['name'])
        image = Image.open(img_path).convert("RGB")
        label = self.data.iloc[idx]['bmi']

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)

In [15]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [16]:
train_df = bmi_data[bmi_data['is_training'] == 1]
test_df = bmi_data[bmi_data['is_training'] == 0]

train_dataset = BMIDataset(train_df, path_to_images, transform)
test_dataset = BMIDataset(test_df, path_to_images, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 1)  # Replace final layer

model = model.to(device)



In [18]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, targets in train_loader:
        images, targets = images.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")

Epoch 1, Loss: 841.5256
Epoch 2, Loss: 507.3380
Epoch 3, Loss: 320.8967
Epoch 4, Loss: 184.3296
Epoch 5, Loss: 94.7529
Epoch 6, Loss: 43.8090
Epoch 7, Loss: 18.3221
Epoch 8, Loss: 8.7604
Epoch 9, Loss: 5.1802
Epoch 10, Loss: 4.2157


In [19]:
torch.save(model.state_dict(), "resnet_bmi_weights.pth")

In [20]:
model.eval()
preds, labels = [], []

with torch.no_grad():
    for images, targets in test_loader:
        images = images.to(device)
        outputs = model(images).squeeze().cpu().numpy()
        preds.extend(outputs)
        labels.extend(targets.numpy())

from sklearn.metrics import mean_squared_error, r2_score
print("MSE:", mean_squared_error(labels, preds))
print("R² Score:", r2_score(labels, preds))

MSE: 53.999628013476446
R² Score: 0.3639082374155037


In [22]:
from scipy.stats import pearsonr
r_value, _ = pearsonr(labels, preds)
print("Pearson r:", r_value)

Pearson r: 0.6525977


In [21]:
import numpy as np

threshold = 1.0 # Adjust as needed
labels_np = np.array(labels)
preds_np = np.array(preds)

accuracy_within_threshold = np.mean(np.abs(labels_np - preds_np) <= threshold)
print(f"Accuracy within ±{threshold} BMI: {accuracy_within_threshold:.2%}")

Accuracy within ±1.0 BMI: 15.43%


In [24]:
class BMIDatasetGender(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.data = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]['name'])
        image = Image.open(img_path).convert("RGB")
        bmi = self.data.iloc[idx]['bmi']
        gender = self.data.iloc[idx]['gender']  # assumes gender column exists (e.g., 'M' or 'F')

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(bmi, dtype=torch.float32), gender

In [26]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [27]:
train_df = bmi_data[bmi_data['is_training'] == 1]
test_df = bmi_data[bmi_data['is_training'] == 0]

train_dataset = BMIDatasetGender(train_df, path_to_images, transform)
test_dataset = BMIDatasetGender(test_df, path_to_images, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
model.eval()
preds, labels, genders = [], [], []

with torch.no_grad():
    for images, targets, gender_batch in test_loader:
        images = images.to(device)
        outputs = model(images).squeeze().cpu().numpy()
        preds.extend(outputs)
        labels.extend(targets.numpy())
        genders.extend(gender_batch)  # gender_batch is a list of strings (e.g., 'Male', 'Female')

In [32]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

preds = np.array(preds)
labels = np.array(labels)
genders = np.array(genders)

for gender in ['Male', 'Female']:
    idx = np.where(genders == gender)[0]
    gender_preds = preds[idx]
    gender_labels = labels[idx]

    print(f"\n--- Gender: {gender} ---")
    print("MSE:", mean_squared_error(gender_labels, gender_preds))
    print("R² Score:", r2_score(gender_labels, gender_preds))
    r, _ = pearsonr(gender_labels, gender_preds)
    print("Pearson r:", round(r, 4))


--- Gender: Male ---
MSE: 48.5145263671875
R² Score: 0.39741820096969604
Pearson r: 0.6928

--- Gender: Female ---
MSE: 61.206207275390625
R² Score: 0.3206917643547058
Pearson r: 0.6013


Checking for Overfit


In [36]:
model.eval()
train_preds, train_labels, train_genders = [], [], []

with torch.no_grad():
    for images, targets, gender_batch in train_loader:
        images = images.to(device)
        outputs = model(images).squeeze().cpu().numpy()
        train_preds.extend(outputs)
        train_labels.extend(targets.numpy())
        train_genders.extend(gender_batch)  # gender_batch is a list of strings (e.g., 'Male', 'Female')

In [37]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

preds = np.array(train_preds)
labels = np.array(train_labels)
genders = np.array(train_genders)

for gender in ['Male', 'Female']:
    idx = np.where(genders == gender)[0]
    gender_preds = preds[idx]
    gender_labels = labels[idx]

    print(f"\n--- Gender: {gender} ---")
    print("MSE:", mean_squared_error(gender_labels, gender_preds))
    print("R² Score:", r2_score(gender_labels, gender_preds))
    r, _ = pearsonr(gender_labels, gender_preds)
    print("Pearson r:", round(r, 4))


--- Gender: Male ---
MSE: 2.7148563861846924
R² Score: 0.9548702836036682
Pearson r: 0.9902

--- Gender: Female ---
MSE: 2.4232895374298096
R² Score: 0.965533971786499
Pearson r: 0.9907


Redo the model training

In [42]:
import torch
from sklearn.metrics import r2_score
import numpy as np

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

best_val_loss = float('inf')
patience = 3
trigger_times = 0

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for images, targets, genders in train_loader:
        images, targets = images.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    val_preds, val_labels, val_genders = [], [], []

    with torch.no_grad():
        for images, targets, genders in test_loader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images).squeeze()
            loss = criterion(outputs, targets)
            val_loss += loss.item()

            val_preds.extend(outputs.cpu().numpy())
            val_labels.extend(targets.cpu().numpy())
            val_genders.extend(genders)

    val_loss /= len(test_loader)
    val_r2 = r2_score(val_labels, val_preds)

    for gender in ['Male', 'Female']:
        idx = np.where(val_genders == gender)[0]
        if len(idx) > 1:
            r, _ = pearsonr(val_labels[idx], val_preds[idx])
            print(f"Val Pearson r ({gender}): {r:.3f}")
        else:
            print(f"Val Pearson r ({gender}): N/A (not enough samples)")

    if len(val_labels) > 1:
        r_all, _ = pearsonr(val_labels, val_preds)
        print(f"Val Pearson r (All): {r_all:.3f}")
    else:
        print("Val Pearson r (All): N/A (not enough samples)")

    print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val R²: {val_r2:.3f}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
        torch.save(model.state_dict(), "best_model.pth")  # Save best model
    else:
        trigger_times += 1
        print(f"→ Early stopping counter: {trigger_times}/{patience}")
        if trigger_times >= patience:
            print("⛔ Early stopping triggered.")
            break

  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.299
Epoch 1: Train Loss: 785.1815 | Val Loss: 674.2072 | Val R²: -6.914


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.635
Epoch 2: Train Loss: 484.1349 | Val Loss: 441.6921 | Val R²: -4.198


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.607
Epoch 3: Train Loss: 301.6793 | Val Loss: 318.9215 | Val R²: -2.750


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.647
Epoch 4: Train Loss: 168.0639 | Val Loss: 168.9425 | Val R²: -0.989


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.625
Epoch 5: Train Loss: 80.5640 | Val Loss: 140.0996 | Val R²: -0.647


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.658
Epoch 6: Train Loss: 34.0328 | Val Loss: 65.4829 | Val R²: 0.228


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.640
Epoch 7: Train Loss: 13.4323 | Val Loss: 63.6234 | Val R²: 0.250


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.643
Epoch 8: Train Loss: 7.4238 | Val Loss: 49.6673 | Val R²: 0.413


  idx = np.where(val_genders == gender)[0]


Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.639
Epoch 9: Train Loss: 5.4138 | Val Loss: 57.2754 | Val R²: 0.325
→ Early stopping counter: 1/3
Val Pearson r (Male): N/A (not enough samples)
Val Pearson r (Female): N/A (not enough samples)
Val Pearson r (All): 0.656
Epoch 10: Train Loss: 4.7388 | Val Loss: 50.7534 | Val R²: 0.400
→ Early stopping counter: 2/3


  idx = np.where(val_genders == gender)[0]


In [43]:
model.eval()
train_preds, train_labels, train_genders = [], [], []

with torch.no_grad():
    for images, targets, gender_batch in train_loader:
        images = images.to(device)
        outputs = model(images).squeeze().cpu().numpy()
        train_preds.extend(outputs)
        train_labels.extend(targets.numpy())
        train_genders.extend(gender_batch)  # gender_batch is a list of strings (e.g., 'Male', 'Female')

In [44]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

preds = np.array(train_preds)
labels = np.array(train_labels)
genders = np.array(train_genders)

for gender in ['Male', 'Female']:
    idx = np.where(genders == gender)[0]
    gender_preds = preds[idx]
    gender_labels = labels[idx]

    print(f"\n--- Gender: {gender} ---")
    print("MSE:", mean_squared_error(gender_labels, gender_preds))
    print("R² Score:", r2_score(gender_labels, gender_preds))
    r, _ = pearsonr(gender_labels, gender_preds)
    print("Pearson r:", round(r, 4))


--- Gender: Male ---
MSE: 1.3449336290359497
R² Score: 0.977642834186554
Pearson r: 0.989

--- Gender: Female ---
MSE: 1.6741993427276611
R² Score: 0.9761881232261658
Pearson r: 0.9889
