In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

from PIL import Image
import os 
import glob
import random

random.seed(42)


In [3]:
# Restart

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the CSV file
df = pd.read_csv("data.csv")
# Set your base path where images are stored.
image_base_path = "BMI/Data/Images"  # update this to your actual image directory

# Load the CSV file
df = pd.read_csv("data.csv")

# Create a new column 'file_path' with the full path for each image.
df['file_path'] = df['name'].apply(lambda x: os.path.join(image_base_path, x))
df = df.fillna(0)
# Keep only the rows where the file exists.
df = df[df['file_path'].apply(os.path.exists)]
df['sex'] = df['gender'].map({'Male': 0, 'Female': 1})
# Split into training and test sets based on the 'is_training' flag

# from scipy.stats import boxcox

# # Assume df is your DataFrame and it contains a 'bmi' column
# # Check if BMI values are strictly positive. If not, shift them by adding a constant.
# if (df['bmi'] <= 0).any():
#     offset = 1 - df['bmi'].min()  # ensure minimum value becomes 1
#     df['bmi_adjusted'] = df['bmi'] + offset
# else:
#     df['bmi_adjusted'] = df['bmi']

# # Apply the Box-Cox transformation to the adjusted BMI column.
# df['bmi_boxcox'], fitted_lambda = boxcox(df['bmi_adjusted'])

# print("Fitted lambda for Box-Cox transformation:", fitted_lambda)
df_train_full = df[df['is_training'] == 1]
df_test = df[df['is_training'] == 0]
# Further split the full training set into train and validation sets (e.g., 80/20 split)
df_train, df_valid = train_test_split(df_train_full, test_size=0.2, random_state=42)


In [9]:
import os



def build_img_list(df, image_base_path):

    # Returns a list of tuples: (full_image_path, bmi, sex)
    # Assumes df contains columns: 'name', 'bmi', and 'sex'

    return [
        (os.path.join(image_base_path, row['name']), row['bmi'], row['sex'])
        for _, row in df.iterrows()
    ]

train_img_lst = build_img_list(df_train, image_base_path)
valid_img_lst = build_img_list(df_valid, image_base_path)
test_img_lst  = build_img_list(df_test, image_base_path)


In [11]:
from torchvision import transforms

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),              # Resize to 256x256
    transforms.RandomCrop((224, 224)),            # Random crop to 224x224
    transforms.RandomRotation(degrees=20),        # Random rotation (-20 to +20 degrees)
    transforms.ToTensor(),                        # Convert image to tensor
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])

valid_transforms = transforms.Compose([
    transforms.Resize((256, 256)),              # Resize to 256x256
    transforms.CenterCrop((224, 224)),            # Center crop to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])


In [12]:
import torch
from torch.utils.data import Dataset
from PIL import Image

class CustomImageDataset(Dataset):
    def __init__(self, img_list, transform=None):
        """
        img_list: List of tuples (image_path, bmi, sex)
        transform: Image transformations to apply
        """
        self.img_list = img_list  
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_path, bmi, sex = self.img_list[idx]
        # Open the image and convert to RGB
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        # Convert bmi and sex to tensors (adjust types as needed)
        bmi = torch.tensor(bmi, dtype=torch.float32)
        sex = torch.tensor(sex, dtype=torch.float32)
        
        # Return the image and its corresponding features/labels.
        return image, bmi, sex


In [13]:
df

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name,file_path,sex
0,0,34.207396,Male,1,img_0.bmp,BMI/Data/Images\img_0.bmp,0
1,1,26.453720,Male,1,img_1.bmp,BMI/Data/Images\img_1.bmp,0
2,2,34.967561,Female,1,img_2.bmp,BMI/Data/Images\img_2.bmp,1
3,3,22.044766,Female,1,img_3.bmp,BMI/Data/Images\img_3.bmp,1
6,6,25.845588,Female,1,img_6.bmp,BMI/Data/Images\img_6.bmp,1
...,...,...,...,...,...,...,...
4201,4201,34.078947,Male,0,img_4201.bmp,BMI/Data/Images\img_4201.bmp,0
4202,4202,34.564776,Female,0,img_4202.bmp,BMI/Data/Images\img_4202.bmp,1
4203,4203,27.432362,Female,0,img_4203.bmp,BMI/Data/Images\img_4203.bmp,1
4204,4204,40.492800,Male,0,img_4204.bmp,BMI/Data/Images\img_4204.bmp,0


In [14]:
# Create your datasets using your lists and appropriate transforms
train_data = CustomImageDataset(train_img_lst, transform=train_transforms)
valid_data = CustomImageDataset(valid_img_lst, transform=valid_transforms)
test_data = CustomImageDataset(test_img_lst, transform=valid_transforms)

In [15]:
batch_size = 32 # Specified batch size is 32

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,shuffle=True)

In [21]:
import torch.nn as nn
import torchvision

In [23]:
# import torch


class ResNet50(nn.Module):
    def __init__(self, load_weights=True, freeze_hidden_layers=False):
        super(ResNet50, self).__init__()

        # Load the pretrained ResNet50 model.
        self.base_model = torchvision.models.resnet50(pretrained=load_weights)
        
        # Get the number of features from the original fc layer.
        num_ftrs = self.base_model.fc.in_features
        
        # Remove the original fc layer.
        self.base_model.fc = nn.Identity()
        
        # Create a new fc layer that accepts both the image features and the extra sex feature.
        # We add one extra input feature to account for sex.
        self.fc_reg = nn.Linear(num_ftrs + 1, 1)
        
        if freeze_hidden_layers:
            for name, param in self.base_model.named_parameters():
                param.requires_grad = False
            # Optionally, allow gradients for the new fc_reg layer
            for param in self.fc_reg.parameters():
                param.requires_grad = True

    def forward(self, x, sex):
        # Get features from the base model.
        features = self.base_model(x)
        # Ensure the sex feature has shape [batch_size, 1]
        if len(sex.shape) == 1:
            sex = sex.unsqueeze(1)
        # Concatenate the image features with the sex feature.
        combined = torch.cat((features, sex), dim=1)
        # Produce the final regression output.
        output = self.fc_reg(combined)
        return output


In [25]:


class ResNeXt101(nn.Module):
    def __init__(self, load_weights=True, freeze_hidden_layers=False):
        super(ResNeXt101, self).__init__()

        # Load the pretrained ResNet50 model.
        self.base_model = torchvision.models.resnext101_32x8d(pretrained=load_weights)
        
        # Get the number of features from the original fc layer.
        num_ftrs = self.base_model.fc.in_features
        
        # Remove the original fc layer.
        self.base_model.fc = nn.Identity()
        
        # Create a new fc layer that accepts both the image features and the extra feature.
        self.fc_reg = nn.Linear(num_ftrs + 1, 1)
        
        if freeze_hidden_layers:
            for name, param in self.base_model.named_parameters():
                param.requires_grad = False
            # Optionally, allow gradients for the new fc_reg layer
            for param in self.fc_reg.parameters():
                param.requires_grad = True

    def forward(self, x, sex):
        # Get features from the base model.
        features = self.base_model(x)
        # Ensure the sex feature has shape [batch_size, 1]
        if len(sex.shape) == 1:
            sex = sex.unsqueeze(1)
        # Concatenate the image features with the sex feature.
        combined = torch.cat((features, sex), dim=1)
        # Produce the final regression output.
        output = self.fc_reg(combined)
        return output


In [27]:
device='cuda'

In [29]:
import torch
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, roc_auc_score

# Function to evaluate the model on a dataloader.
def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X, y, sex in dataloader:
            X, y, sex = X.to(device), y.to(device), sex.to(device)
            preds = model(X, sex)
            # Squeeze predictions from [batch_size, 1] to [batch_size]
            preds = preds.squeeze(1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
    return np.array(all_targets), np.array(all_preds)

# Load your model. Ensure that the ResNet50 model definition matches the one used during training.
# model = ResNet50()


# # Evaluate the model on the test set.
# y_true, y_pred = evaluate_model(model, test_loader, device)

# # Compute regression metrics.
# mse = mean_squared_error(y_true, y_pred)
# rmse = np.sqrt(mse)
# mae = mean_absolute_error(y_true, y_pred)
# mape = mean_absolute_percentage_error(y_true, y_pred)

# print("Regression Metrics:")
# print("MSE:", mse)
# print("RMSE:", rmse)
# print("MAE:", mae)
# print("MAPE:", mape)

# # Optional: Calculate AUC if you have a binary classification task.
# # For example, you can define BMI >= 25 as the positive class.
# y_true_binary = (y_true >= 25).astype(int)
# try:
#     auc = roc_auc_score(y_true_binary, y_pred)
#     print("AUC:", auc)
# except Exception as e:
#     print("AUC calculation error:", e)


In [41]:
model = ResNet50()
model.load_state_dict(torch.load('best_corr_resnet50_SGD_4.pth'))  # update file name if needed
# model.load_state_dict(torch.load('best_lr_vggface2_Adam_0.pth'))  # update file name if needed
model.to(device)
from scipy.stats import pearsonr
# model = ResNet50()
# model.load_state_dict(torch.load('best_model_resnrt50_Adam_3.pth'))  # update file name if needed
# model.to(device)

def evaluate_model(model, dataloader, device):
    """
    Evaluate the model on a dataloader and return the ground truth and predicted BMI values.
    Assumes each batch from the dataloader returns a tuple: (X, y, sex)
    """
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X, y, sex in dataloader:
            X, y, sex = X.to(device), y.to(device), sex.to(device)
            preds = model(X, sex)
            # Squeeze predictions from [batch_size, 1] to [batch_size]
            preds = preds.squeeze(1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
    return np.array(all_targets), np.array(all_preds)

# Assuming 'model', 'test_loader', and 'device' are already defined and set up.
# For example, load your model and move it to device:
# model = ResNet50()
# model.load_state_dict(torch.load('best_model_Q2_Adam_0.pth'))
# model.to(device)

# Evaluate the model on the test set
y_true, y_pred = evaluate_model(model, test_loader, device)

# Calculate the Pearson correlation coefficient and its p-value.
pearson_corr, p_value = pearsonr(y_true, y_pred)

print("Pearson correlation coefficient:", pearson_corr)
print("P-value:", p_value)




Pearson correlation coefficient: 0.6610541843973373
P-value: 1.2191729854467048e-95


In [32]:

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, roc_auc_score



# Compute regression metrics.
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)

print("Regression Metrics:")
print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("MAPE:", mape)



Regression Metrics:
MSE: 51.35579
RMSE: 7.1662955
MAE: 5.026977
MAPE: 0.14067928
