In [None]:
import pandas as pd
import cv2
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"
import tensorflow as tf
import keras_cv_attention_models as cv
tf.test.is_gpu_available()
import matplotlib.pyplot as plt

def cfp_model():
    model = tf.keras.Sequential()
    model.add(cv.lcnet.LCNet250(num_classes=0, input_shape=(299, 299, 3)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    model.add(tf.keras.layers.Dropout(0.4))
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(1e-4)))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1))
    model.add(tf.keras.layers.Activation("linear"))
    return model

from tensorflow.keras import backend as K

def pearson_corr(y_true, y_pred):
    mx = K.mean(y_true)
    my = K.mean(y_pred)
    xm = y_true - mx
    ym = y_pred - my
    r_num = K.sum(xm * ym)
    r_den = K.sqrt(K.sum(K.square(xm)) * K.sum(K.square(ym)))
    r = r_num / (r_den + K.epsilon())
    return r

model = cfp_model()
model.load_weights("./best_model.h5")

In [None]:
# 資料夾路徑
load_dir = "/home/hank52052/code/cfp/"

# 依序讀取檔案
SMG_data = np.load(load_dir + "SMG_data.npy")
SMG_label = np.load(load_dir + "SMG_label.npy")

odir_data = np.load(load_dir + "odir_data.npy")
odir_label = np.load(load_dir + "odir_label.npy")

ex_data =  np.load(load_dir + "bioage_data.npy")
ex_label =  np.load(load_dir + "bioage_label_chr.npy")
ex_bio_label =  np.load(load_dir + "bioage_label.npy")

test_data = np.load(load_dir + "test_data.npy")
test_label = np.load(load_dir + "test_label.npy")
test_data_sex = np.load(load_dir + "test_data_sex.npy")
test_data_myo = np.load(load_dir + "test_data_myo.npy")

train_data = np.load(load_dir + "train_data.npy")
train_label = np.load(load_dir + "train_label.npy")

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np
import pandas as pd

pred_odir = model.predict(odir_data).ravel()
true_odir = odir_label.ravel()

pred_smdg = model.predict(SMG_data).ravel()
true_smdg = SMG_label.ravel()

pred_lu_chrono = model.predict(ex_data).ravel()
true_lu_chrono = ex_label.ravel()

pred_lu_bio = model.predict(ex_data).ravel()
true_lu_bio = ex_bio_label.ravel()

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

results = {
    "Dataset": ["Odir-5K", "SMDG", "Dr. Lu (chrono)", "Dr. Lu (bio)"],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for y_true, y_pred in [
    (true_odir, pred_odir),
    (true_smdg, pred_smdg),
    (true_lu_chrono, pred_lu_chrono),
    (true_lu_bio, pred_lu_bio)
]:
    mse, mae, r2, r = compute_metrics(y_true, y_pred)
    results["MSE"].append(round(mse, 2))
    results["MAE"].append(round(mae, 2))
    results["R²"].append(round(r2, 2))
    results["Pearson r"].append(round(r, 2))

df_results = pd.DataFrame(results)
print(df_results)


In [None]:
from sklearn.metrics import mean_absolute_error
import numpy as np
import pandas as pd

pred_odir = model.predict(odir_data).ravel()
true_odir = odir_label.ravel()

pred_smdg = model.predict(SMG_data).ravel()
true_smdg = SMG_label.ravel()

pred_lu_chrono = model.predict(ex_data).ravel()
true_lu_chrono = ex_label.ravel()

pred_lu_bio = model.predict(ex_data).ravel()
true_lu_bio = ex_bio_label.ravel()

def bootstrap_mae(y_true, y_pred, n_iter=100, sample_size=100):
    maes = []
    n = len(y_true)
    for _ in range(n_iter):
        idx = np.random.choice(n, sample_size, replace=True)
        mae = mean_absolute_error(y_true[idx], y_pred[idx])
        maes.append(mae)
    return maes

mae_results = {
    "Dataset": [],
    "MAE": []
}

datasets = {
    "Odir-5K": (true_odir, pred_odir),
    "SMDG": (true_smdg, pred_smdg),
    "Dr. Lu (chrono)": (true_lu_chrono, pred_lu_chrono),
    "Dr. Lu (bio)": (true_lu_bio, pred_lu_bio)
}

for name, (y_true, y_pred) in datasets.items():
    maes = bootstrap_mae(y_true, y_pred)
    mae_results["Dataset"].extend([name] * len(maes))
    mae_results["MAE"].extend(maes)

df_mae = pd.DataFrame(mae_results)
print(df_mae)

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr

disease_df = pd.read_csv("../../Dataset/odir/full_df.csv").iloc[:, 7:15]
disease_df.columns = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']

pred_age = model.predict(odir_data).ravel()
true_age = odir_label.ravel()

disease_name_map = {
    'N': 'Normal',
    'D': 'Diabetic Retinopathy',
    'G': 'Glaucoma',
    'C': 'Cataract',
    'A': 'AMD',
    'H': 'Hypertension',
    'M': 'Myopia',
    'O': 'Other Abnormality'
}

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

subgroup_results = {
    "Disease Code": [],
    "Disease Name": [],
    "Group": [],
    "Sample Size": [],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for col in disease_df.columns:
    disease_flag = disease_df[col].values
    disease_name = disease_name_map[col]

    for val, label in zip([1, 0], [f"{col}=1", f"{col}=0"]):
        idx = (disease_flag == val)
        if np.sum(idx) < 10:
            continue
        y_true, y_pred = true_age[idx], pred_age[idx]
        mse, mae, r2, r = compute_metrics(y_true, y_pred)

        subgroup_results["Disease Code"].append(col)
        subgroup_results["Disease Name"].append(disease_name)
        subgroup_results["Group"].append(label)
        subgroup_results["Sample Size"].append(len(y_true))
        subgroup_results["MSE"].append(round(mse, 2))
        subgroup_results["MAE"].append(round(mae, 2))
        subgroup_results["R²"].append(round(r2, 2))
        subgroup_results["Pearson r"].append(round(r, 2))

df_disease = pd.DataFrame(subgroup_results)
print(df_disease)

df_disease.to_excel("ODIR_Disease_Subgroup_Analysis.xlsx", index=False)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np
import pandas as pd

pred_lu_chrono = model.predict(ex_data).ravel()
true_lu_chrono = ex_label.ravel()
pred_lu_bio = model.predict(ex_data).ravel()
true_lu_bio = ex_bio_label.ravel()

age_bins = [20, 30, 40, 50, np.inf]
age_labels = ['20-30', '30-40', '40-50', '50+']
age_groups = pd.cut(true_lu_chrono, bins=age_bins, labels=age_labels, right=False)

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

subgroup_results = {
    "Age Group": [],
    "Target": [],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for label in age_labels:
    idx = (age_groups == label)
    for target_name, y_true, y_pred in [
        ("Chronological Age", true_lu_chrono[idx], pred_lu_chrono[idx]),
        ("Biological Age", true_lu_bio[idx], pred_lu_bio[idx])
    ]:
        if len(y_true) < 10:
            continue
        mse, mae, r2, r = compute_metrics(y_true, y_pred)
        subgroup_results["Age Group"].append(label)
        subgroup_results["Target"].append(target_name)
        subgroup_results["MSE"].append(round(mse, 2))
        subgroup_results["MAE"].append(round(mae, 2))
        subgroup_results["R²"].append(round(r2, 2))
        subgroup_results["Pearson r"].append(round(r, 2))

df_subgroups = pd.DataFrame(subgroup_results)
print(df_subgroups)
df_subgroups.to_excel("Age_subgroup_Lu's Data.xlsx", index=False)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np
import pandas as pd

pred_lu_chrono = model.predict(ex_data).ravel()
true_lu_chrono = ex_label.ravel()
pred_lu_bio = model.predict(ex_data).ravel()
true_lu_bio = ex_bio_label.ravel()

gender = np.load("./bioage_sex.npy")

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

subgroup_results = {
    "Gender": [],
    "Target": [],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for g in ['M', 'F']:
    idx = (gender == g)
    for target_name, y_true, y_pred in [
        ("Chronological Age", true_lu_chrono[idx], pred_lu_chrono[idx]),
        ("Biological Age", true_lu_bio[idx], pred_lu_bio[idx])
    ]:
        if len(y_true) < 10:
            continue
        mse, mae, r2, r = compute_metrics(y_true, y_pred)
        subgroup_results["Gender"].append(g)
        subgroup_results["Target"].append(target_name)
        subgroup_results["MSE"].append(round(mse, 2))
        subgroup_results["MAE"].append(round(mae, 2))
        subgroup_results["R²"].append(round(r2, 2))
        subgroup_results["Pearson r"].append(round(r, 2))

df_gender_subgroups = pd.DataFrame(subgroup_results)
print(df_gender_subgroups)
df_gender_subgroups.to_excel("Gender_subgroup_Lu_Data.xlsx", index=False)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np
import pandas as pd

pred_age = model.predict(test_data).ravel()
true_age = test_label.ravel()

age_bins = [20, 30, 40, 50, np.inf]
age_labels = ['20-30', '30-40', '40-50', '50+']
age_groups = pd.cut(true_age, bins=age_bins, labels=age_labels, right=False)

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

subgroup_results = {
    "Age Group": [],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for label in age_labels:
    idx = (age_groups == label)
    if np.sum(idx) < 10:
        continue
    mse, mae, r2, r = compute_metrics(true_age[idx], pred_age[idx])
    subgroup_results["Age Group"].append(label)
    subgroup_results["MSE"].append(round(mse, 2))
    subgroup_results["MAE"].append(round(mae, 2))
    subgroup_results["R²"].append(round(r2, 2))
    subgroup_results["Pearson r"].append(round(r, 2))

df_subgroups = pd.DataFrame(subgroup_results)
print(df_subgroups)

df_subgroups.to_excel("Age_subgroup_NTUH.xlsx", index=False)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import pearsonr
import numpy as np
import pandas as pd

pred_age = model.predict(test_data).ravel()
true_age = test_label.ravel()

sex = test_data_sex.ravel()
myopia = test_data_myo.ravel()

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    r, _ = pearsonr(y_true, y_pred)
    return mse, mae, r2, r

subgroup_results = {
    "Group Type": [],
    "Group": [],
    "MSE": [],
    "MAE": [],
    "R²": [],
    "Pearson r": []
}

for val, name in zip([1, 0], ['Male', 'Female']):
    idx = (sex == val)
    if np.sum(idx) < 10:
        continue
    mse, mae, r2, r = compute_metrics(true_age[idx], pred_age[idx])
    subgroup_results["Group Type"].append("Sex")
    subgroup_results["Group"].append(name)
    subgroup_results["MSE"].append(round(mse, 2))
    subgroup_results["MAE"].append(round(mae, 2))
    subgroup_results["R²"].append(round(r2, 2))
    subgroup_results["Pearson r"].append(round(r, 2))

for val, name in zip([1, 0], ['Myopia', 'Non-Myopia']):
    idx = (myopia == val)
    if np.sum(idx) < 10:
        continue
    mse, mae, r2, r = compute_metrics(true_age[idx], pred_age[idx])
    subgroup_results["Group Type"].append("Myopia")
    subgroup_results["Group"].append(name)
    subgroup_results["MSE"].append(round(mse, 2))
    subgroup_results["MAE"].append(round(mae, 2))
    subgroup_results["R²"].append(round(r2, 2))
    subgroup_results["Pearson r"].append(round(r, 2))

df_subgroups = pd.DataFrame(subgroup_results)
print(df_subgroups)
df_subgroups.to_excel("Sex_Myopia_subgroups_NTUH.xlsx", index=False)


In [None]:
import numpy as np
import pandas as pd
import cv2
from sklearn.metrics import mean_absolute_error

blur_settings = {
    "No Blur": None,
    "Light Blur": (5, 25, 25),
    "Medium Blur": (9, 75, 75),
    "Heavy Blur": (15, 150, 150)
}

results = {
    "Blur Level": [],
    "Dataset": [],
    "MAE": []
}

for blur_name, params in blur_settings.items():
    if params is None:
        blurred_data = ex_data.copy()
    else:
        d, sigmaColor, sigmaSpace = params
        blurred_data = np.array([
            cv2.cvtColor(
                cv2.bilateralFilter(cv2.cvtColor(img, cv2.COLOR_RGB2BGR), d, sigmaColor, sigmaSpace),
                cv2.COLOR_BGR2RGB
            )
            for img in ex_data
        ])

    pred_chrono = model.predict(blurred_data).ravel()
    mae_chrono = mean_absolute_error(ex_label.ravel(), pred_chrono)

    pred_bio = model.predict(blurred_data).ravel()
    mae_bio = mean_absolute_error(ex_bio_label.ravel(), pred_bio)

    results["Blur Level"].extend([blur_name, blur_name])
    results["Dataset"].extend(["Dr. Lu (chrono)", "Dr. Lu (bio)"])
    results["MAE"].extend([mae_chrono, mae_bio])

df_blur_mae = pd.DataFrame(results)
print(df_blur_mae)
