In [None]:
import pandas as pd
import numpy as np
import cv2
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import os

def load_images_and_labels(csv_path):
    df = pd.read_csv(csv_path, index_col=0)
    def load_and_resize(i):
        img_path = df.loc[i, "IMAGE_FILENAME"]
        age = df.loc[i, "AGE"]
        path = "/work1043/yuhung0716/blur_images_result/Original_images_cropped/crop_images/" + img_path + ".jpg"
        try:
            img = cv2.imread(path)
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (299, 299))
                return img, age
        except:
            pass
        return None, None
    data, label = [], []
    with ThreadPoolExecutor(max_workers=16) as executor:
        for img, age in tqdm(executor.map(load_and_resize, df.index), total=len(df)):
            if img is not None:
                data.append(img)
                label.append(age)
    return np.array(data), np.array(label)

train_data, train_label = load_images_and_labels("/work1043/yuhung0716/Final_model/dataset_confounder_train.csv")
test_data, test_label = load_images_and_labels("/work1043/yuhung0716/Final_model/dataset_confounder_test.csv")
val_data, val_label = load_images_and_labels("/work1043/yuhung0716/Final_model/dataset_confounder_val.csv")

save_dir = "/home/hank52052/work/code/cfp/"
os.makedirs(save_dir, exist_ok=True)

np.save(os.path.join(save_dir, "ori_train_data.npy"), train_data)
np.save(os.path.join(save_dir, "ori_train_label.npy"), train_label)
np.save(os.path.join(save_dir, "ori_test_data.npy"), test_data)
np.save(os.path.join(save_dir, "ori_test_label.npy"), test_label)
np.save(os.path.join(save_dir, "ori_val_data.npy"), val_data)
np.save(os.path.join(save_dir, "ori_val_label.npy"), val_label)

print("train_data.shape:", train_data.shape)
print("train_label.shape:", train_label.shape)
print("test_data.shape:", test_data.shape)
print("test_label.shape:", test_label.shape)
print("val_data.shape:", val_data.shape)
print("val_label.shape:", val_label.shape)

In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import pydicom
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

def load_and_process_images(csv_path, image_dir, image_col, label_col, save_data_path, save_label_path):
    df = pd.read_csv(csv_path).dropna(subset=[image_col, label_col])
    data, labels = [], []
    for i in tqdm(range(df.shape[0])):
        try:
            img = cv2.imread(os.path.join(image_dir, df.loc[i, image_col]))
            img = cv2.resize(img, (299, 299))
            img = img[:, :, ::-1]
            data.append(img)
            labels.append(df.loc[i, label_col])
        except:
            continue
    data, labels = np.array(data), np.array(labels)
    np.save(save_data_path, data)
    np.save(save_label_path, labels)

load_and_process_images(
    "../../Dataset/odir/full_df.csv",
    "../../Dataset/odir/preprocessed_images/",
    "filename",
    "Patient Age",
    "./original_data/odir_data.npy",
    "./original_data/odir_label.npy"
)

load_and_process_images(
    "../../Dataset/SMG/metadata_standardized.csv",
    "../../Dataset/SMG/full-fundus/",
    "fundus",
    "age",
    "./original_data/SMG_data.npy",
    "./original_data/SMG_label.npy"
)

predict_data = pd.read_excel("./evaluation_dataset/AGE_AI_0512_v2.xlsx")
original_data = pd.read_excel("./evaluation_dataset/LAB_0512_V2.xlsx")
predict_data = predict_data.loc[:, ["CHARGE_NO", "ANAMNESIS_NO", "Predicted biological age", "SEX"]]
original_data = original_data.loc[:, ["CHARGE_NO", "ANAMNESIS_NO", "Age"]]
new_data = pd.merge(original_data, predict_data, on=["CHARGE_NO", "ANAMNESIS_NO"], how="outer").dropna()

dicom_dir = "./evaluation_dataset/DE-OPDICOM/"
all_files = [
    (filename.split("@")[0], os.path.join(dicom_dir, filename))
    for filename in os.listdir(dicom_dir) if filename.endswith(".dcm")
]

patients_data = defaultdict(lambda: defaultdict(list))

def process_file(patient_file_tuple):
    patient_id, filepath = patient_file_tuple
    try:
        dcm = pydicom.dcmread(filepath, stop_before_pixels=False)
        study_date = dcm.get((0x0008, 0x0020), None)
        laterality = dcm.get((0x0020, 0x0060), None)
        if study_date is None: return None
        img = dcm.pixel_array
        if img.shape != (2320, 3472, 3): return None
        img = img[:, 577:2897, ::-1]
        img = cv2.resize(img, (299, 299))
        img = img[:, :, ::-1]
        return (patient_id, study_date.value, filepath, laterality.value, img)
    except:
        return None

with ThreadPoolExecutor(max_workers=32) as executor:
    futures = [executor.submit(process_file, pf) for pf in all_files]
    for future in tqdm(as_completed(futures), total=len(futures), desc="處理DICOM檔案"):
        res = future.result()
        if res:
            patient_id, study_date_val, filepath, laterality_val, img = res
            patients_data[patient_id][study_date_val].append((filepath, laterality_val, img))

result = {}
for patient_id, date_dict in patients_data.items():
    latest_date = max(date_dict.keys())
    imgs_list = date_dict[latest_date]
    left_eye_imgs = [img for _, lat, img in imgs_list if lat == "L"]
    right_eye_imgs = [img for _, lat, img in imgs_list if lat == "R"]
    result[patient_id] = {
        "latest_date": latest_date,
        "left_eye_imgs": left_eye_imgs,
        "right_eye_imgs": right_eye_imgs
    }

right_eye_imgs, left_eye_imgs, bio_ages, ages, sex_all = [], [], [], [], []
for _, row in new_data.iterrows():
    pid = row['CHARGE_NO']
    if pid in result:
        right = result[pid].get('right_eye_imgs', [])
        left = result[pid].get('left_eye_imgs', [])
        if len(right) == 0 or len(left) == 0:
            continue
        right_eye_imgs.append(right[0].astype(np.uint8))
        left_eye_imgs.append(left[0].astype(np.uint8))
        bio_ages.append(float(row['Predicted biological age']))
        ages.append(float(row['Age']))
        sex_all.append(row['SEX'])

right_eye_imgs = np.stack(right_eye_imgs)
left_eye_imgs = np.stack(left_eye_imgs)
bio_ages = np.array(bio_ages, dtype=np.float32)
ages = np.array(ages, dtype=np.float32)
sex_all = np.array(sex_all)

print("右眼影像 shape:", right_eye_imgs.shape)
print("左眼影像 shape:", left_eye_imgs.shape)
print("生物年齡 shape:", bio_ages.shape)
print("年齡 shape:", ages.shape)
print("性別 shape:", sex_all.shape)

np.save("./original_data/bioage_data.npy", right_eye_imgs)
np.save("./original_data/bioage_label.npy", bio_ages)
np.save("./original_data/bioage_label_chr.npy", ages)
np.save("./original_data/bioage_sex.npy", sex_all)

In [None]:
import os
import numpy as np
import cv2
from itertools import product
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

input_dir = "/home/hank52052/code/cfp/original_data"

def image_sharpness_score(img):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return cv2.Laplacian(gray, cv2.CV_64F).var()

def brightness_penalty(img, threshold=190):
    brightness = img.mean()
    penalty = max(0, brightness - threshold) ** 2
    return penalty

def apply_blur_enhance_auto(img):
    best_score = -np.inf
    best_enhanced = None

    ksize_list = [
        (3, 3), (5, 5), (7, 7),
        (9, 9), (11, 11), (13, 13),
        (15, 15), (17, 17), (19, 19)
    ]
    alpha_list = [1.5, 2.0, 2.5]
    beta_list = [-0.8, -1.0, -1.2]
    gamma_list = [0, 10, 15]

    for ksize, alpha, beta, gamma in product(ksize_list, alpha_list, beta_list, gamma_list):
        blurred = cv2.blur(img, ksize)
        enhanced = cv2.addWeighted(img, alpha, blurred, beta, gamma)
        enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
        sharpness = image_sharpness_score(enhanced)
        penalty = brightness_penalty(enhanced, threshold=190)
        score = sharpness - 0.05 * penalty
        if score > best_score:
            best_score = score
            best_enhanced = enhanced

    return best_enhanced

def process_single_image(img):
    return apply_blur_enhance_auto(img)

def process_npy_file(filepath):
    output_path = filepath.replace("_data.npy", "_data_process.npy")
    
    if os.path.exists(output_path):
        print(f"[⏩] Skipping (already processed): {os.path.basename(output_path)}")
        return

    print(f"\n[🔍] Processing: {os.path.basename(filepath)}")
    data = np.load(filepath)
    with Pool(processes=cpu_count()) as pool:
        results = list(tqdm(pool.imap(process_single_image, data), total=len(data), desc=f"Enhancing {os.path.basename(filepath)}"))
    result_array = np.array(results, dtype=np.uint8)
    np.save(output_path, result_array)
    print(f"[✅] Saved: {output_path}")

if __name__ == "__main__":
    all_files = os.listdir(input_dir)
    target_files = [os.path.join(input_dir, f) for f in all_files if f.endswith("_data.npy")]

    for file_path in target_files:
        process_npy_file(file_path)


In [None]:
plt.figure(figsize=(24, 6))
ax = plt.subplot(141)
ax.axis("off")
plt.imshow(SMG_data[8])

ax = plt.subplot(142)
ax.axis("off")
blurred = cv2.bilateralFilter(SMG_data[8][:, :, ::-1], d=5, sigmaColor=25, sigmaSpace=25)
plt.imshow(blurred[:, :, ::-1])

ax = plt.subplot(143)
ax.axis("off")
blurred = cv2.bilateralFilter(SMG_data[8][:, :, ::-1], d=9, sigmaColor=75, sigmaSpace=75)
plt.imshow(blurred[:, :, ::-1])

ax = plt.subplot(144)
ax.axis("off")
blurred = cv2.bilateralFilter(SMG_data[8][:, :, ::-1], d=15, sigmaColor=150, sigmaSpace=150)
plt.imshow(blurred[:, :, ::-1])

plt.savefig("blue_show.png", dpi=400, bbox_inches="tight")
plt.show()