In [None]:
# Hugging Face authentication (do NOT hardcode tokens in notebooks)
import os
import getpass

HF_TOKEN = os.environ.get('HF_TOKEN')
if not HF_TOKEN:
    HF_TOKEN = getpass.getpass('Enter your Hugging Face token (input hidden): ')

# Make it available to libraries that look for these env vars
os.environ['HF_TOKEN'] = HF_TOKEN
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HF_TOKEN


In [None]:
!git clone https://github.com/rmaphoh/RETFound.git

%cd RETFound

In [None]:
from google.colab import drive
drive.mount('/content/drive')

data_path = "/content/drive/MyDrive/Data/brset/images_224"

In [None]:
import os
print("CWD before:", os.getcwd())
%cd /content/RETFound
!ls

In [None]:
!pip install -r requirements.txt
!pip install -q huggingface_hub

In [None]:
from huggingface_hub import login

login(HF_TOKEN)  # paste your read token between quotes

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
print("Inside Image_data:")
!ls "/content/drive/MyDrive/Image_data"

print("\nInside Image_data/Data (if it exists):")
!ls "/content/drive/MyDrive/Image_data/Data"

print("\nTry brset under Image_data:")
!ls "/content/drive/MyDrive/Image_data/brset"

print("\nTry images_224 under possible paths:")
!ls "/content/drive/MyDrive/Image_data/brset/images_224"
!ls "/content/drive/MyDrive/Image_data/Data/brset/images_224"

In [None]:
!pip install -q "huggingface_hub>=0.23.0"

import os
from huggingface_hub import login, hf_hub_download

HF_TOKEN = "your-token here"

os.environ["HF_TOKEN"] = HF_TOKEN
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN
login(HF_TOKEN, add_to_git_credential=False)

# Hard test
path = hf_hub_download(
    repo_id="YukunZhou/RETFound_mae_natureCFP",
    filename="RETFound_mae_natureCFP.pth",
    token=HF_TOKEN,
)
print("OK:", path)

In [None]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from PIL import Image
import models_vit as models
from huggingface_hub import hf_hub_download

np.set_printoptions(threshold=np.inf)
np.random.seed(1)
torch.manual_seed(1)

# ---- paths ----
# You already defined this above; make sure it matches
data_path = "/content/drive/MyDrive/Image_data/Data/brset/images_224"
assert os.path.isdir(data_path), f"data_path does not exist: {data_path}"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

def prepare_model(chkpt_dir, arch="RETFound_mae"):
    checkpoint = torch.load(chkpt_dir, map_location="cpu", weights_only=False)

    if arch == "RETFound_mae":
        model = models.__dict__[arch](
            img_size=224,
            num_classes=5,
            drop_path_rate=0,
            global_pool=True,
        )
        _ = model.load_state_dict(checkpoint["model"], strict=False)
    else:
        # For dinov2-based RETFound, we’ll adjust later
        raise ValueError(f"Unsupported arch: {arch}")
    return model


def run_one_image(img, model, arch):
    x = torch.tensor(img)
    x = x.unsqueeze(dim=0)
    x = torch.einsum("nhwc->nchw", x)

    x = x.to(device, non_blocking=True)
    latent = model.forward_features(x.float())

    latent = torch.squeeze(latent)
    return latent


def get_feature(data_path, chkpt_dir, device, arch="RETFound_mae"):
    model_ = prepare_model(chkpt_dir, arch)
    model_.to(device)
    model_.eval()

    img_list = os.listdir(data_path)
    name_list = []
    feature_list = []

    finished_num = 0
    for fname in img_list:
        finished_num += 1
        if finished_num % 1000 == 0:
            print(f"{finished_num} images finished")

        img = Image.open(os.path.join(data_path, fname)).convert("RGB")
        img = img.resize((224, 224))
        img = np.array(img) / 255.0
        img[..., 0] = (img[..., 0] - img[..., 0].mean()) / img[..., 0].std()
        img[..., 1] = (img[..., 1] - img[..., 1].mean()) / img[..., 1].std()
        img[..., 2] = (img[..., 2] - img[..., 2].mean()) / img[..., 2].std()

        latent_feature = run_one_image(img, model_, arch)

        name_list.append(fname)
        feature_list.append(latent_feature.detach().cpu().numpy())

    return name_list, feature_list


# ---- download one RETFound checkpoint (example: RETFound_mae_natureCFP) ----
chkpt_dir = hf_hub_download(
    repo_id="YukunZhou/RETFound_mae_natureCFP",
    filename="RETFound_mae_natureCFP.pth",
)

print("Checkpoint downloaded to:", chkpt_dir)

# ---- run feature extraction ----
name_list, feature = get_feature(data_path, chkpt_dir, device, arch="RETFound_mae")

# ---- save embeddings to Drive ----
df_feature = pd.DataFrame(feature)
df_imgname = pd.DataFrame(name_list, columns=["name"])
df_visualization = pd.concat([df_imgname, df_feature], axis=1)

column_name_list = [f"feature_{i}" for i in range(df_feature.shape[1])]
df_visualization.columns = ["name"] + column_name_list

output_dir = "/content/drive/MyDrive/brset_retfound_embeddings"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "Embeddings_brset_RETFound_mae_natureCFP.csv")
df_visualization.to_csv(output_path, index=False)

print("Embeddings saved to:", output_path)

In [None]:
!pip install -q huggingface_hub

from huggingface_hub import login, hf_hub_download

TOKEN = "your-token here"  # ypur token here

login(TOKEN, add_to_git_credential=False)


In [None]:
from huggingface_hub import login, hf_hub_download

TOKEN = "your-token here"

login(TOKEN)  # or skip this and just pass token=TOKEN below

chkpt_dir = hf_hub_download(
    repo_id="YukunZhou/RETFound_mae_shanghai",
    filename="RETFound_mae_shanghai.pth",
    token=TOKEN,  # force this token
)
print("Downloaded to:", chkpt_dir)

In [None]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from PIL import Image
import models_vit as models
from huggingface_hub import hf_hub_download

np.set_printoptions(threshold=np.inf)
np.random.seed(1)
torch.manual_seed(1)

# ---- paths ----
# IMPORTANT: put here the SAME path you used successfully for NatureCFP
# e.g. "/content/drive/MyDrive/Image_data/brset/images_224"
data_path = "/content/drive/MyDrive/Image_data/Data/brset/images_224"
assert os.path.isdir(data_path), f"data_path does not exist: {data_path}"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

def prepare_model(chkpt_dir, arch="RETFound_mae"):
    checkpoint = torch.load(chkpt_dir, map_location="cpu", weights_only=False)

    if arch == "RETFound_mae":
        model = models.__dict__[arch](
            img_size=224,
            num_classes=5,
            drop_path_rate=0,
            global_pool=True,
        )
        _ = model.load_state_dict(checkpoint["model"], strict=False)
    else:
        raise ValueError(f"Unsupported arch: {arch}")
    return model


def run_one_image(img, model, arch):
    x = torch.tensor(img)
    x = x.unsqueeze(dim=0)
    x = torch.einsum("nhwc->nchw", x)

    x = x.to(device, non_blocking=True)
    latent = model.forward_features(x.float())
    latent = torch.squeeze(latent)
    return latent


def get_feature(data_path, chkpt_dir, device, arch="RETFound_mae"):
    model_ = prepare_model(chkpt_dir, arch)
    model_.to(device)
    model_.eval()

    img_list = os.listdir(data_path)
    name_list = []
    feature_list = []

    finished_num = 0
    for fname in img_list:
        finished_num += 1
        if finished_num % 1000 == 0:
            print(f"{finished_num} images finished")

        img = Image.open(os.path.join(data_path, fname)).convert("RGB")
        img = img.resize((224, 224))
        img = np.array(img) / 255.0
        img[..., 0] = (img[..., 0] - img[..., 0].mean()) / img[..., 0].std()
        img[..., 1] = (img[..., 1] - img[..., 1].mean()) / img[..., 1].std()
        img[..., 2] = (img[..., 2] - img[..., 2].mean()) / img[..., 2].std()

        latent_feature = run_one_image(img, model_, arch)

        name_list.append(fname)
        feature_list.append(latent_feature.detach().cpu().numpy())

    return name_list, feature_list


# ---- download RETFound_mae_shanghai checkpoint ----
chkpt_dir = hf_hub_download(
    repo_id="YukunZhou/RETFound_mae_shanghai",
    filename="RETFound_mae_shanghai.pth",
)
print("Checkpoint downloaded to:", chkpt_dir)

# ---- run feature extraction ----
name_list, feature = get_feature(data_path, chkpt_dir, device, arch="RETFound_mae")

# ---- save embeddings to Drive ----
df_feature = pd.DataFrame(feature)
df_imgname = pd.DataFrame(name_list, columns=["name"])
df_visualization = pd.concat([df_imgname, df_feature], axis=1)

column_name_list = [f"feature_{i}" for i in range(df_feature.shape[1])]
df_visualization.columns = ["name"] + column_name_list

output_dir = "/content/drive/MyDrive/brset_retfound_embeddings"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "Embeddings_brset_RETFound_mae_shanghai.csv")
df_visualization.to_csv(output_path, index=False)

print("Embeddings saved to:", output_path)

In [None]:
from huggingface_hub import login, hf_hub_download

TOKEN = "your-token here"

login(TOKEN)  # or skip this and just pass token=TOKEN below

chkpt_dir = hf_hub_download(
    repo_id="YukunZhou/RETFound_dinov2_shanghai",
    filename="RETFound_dinov2_shanghai.pth",
    token=TOKEN,  # force this token
)
print("Downloaded to:", chkpt_dir)

In [None]:
import os
import torch
import numpy as np
import pandas as pd
from PIL import Image
import models_vit as models
from huggingface_hub import hf_hub_download

np.set_printoptions(threshold=np.inf)
np.random.seed(1)
torch.manual_seed(1)

# ---- paths ----
# Use the SAME path you used when you saw "1000 images finished" etc.
data_path = "/content/drive/MyDrive/Image_data/Data/brset/images_224"
assert os.path.isdir(data_path), f"data_path does not exist: {data_path}"

output_dir = "/content/drive/MyDrive/brset_retfound_embeddings"
os.makedirs(output_dir, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Minimal args object for RETFound_dinov2 constructor
class DummyArgs:
    pass

args = DummyArgs()
args.model_arch = "retfound_dinov2"
args.nb_classes = 5

def prepare_model_dinov2(chkpt_dir):
    # Build DINOv2-based RETFound backbone
    model = models.RETFound_dinov2(
        args,
        num_classes=5,
    )

    # Load teacher weights (as in main_finetune.py)
    checkpoint = torch.load(chkpt_dir, map_location="cpu", weights_only=False)
    checkpoint_model = checkpoint["teacher"]

    # Non-strict load (ignore mismatched head etc.)
    _ = model.load_state_dict(checkpoint_model, strict=False)
    return model

def run_one_image(img, model):
    x = torch.tensor(img)
    x = x.unsqueeze(dim=0)
    x = torch.einsum("nhwc->nchw", x)
    x = x.to(device, non_blocking=True)

    # Use backbone features and pool patch tokens -> 1024-D
    with torch.no_grad():
        latent = model.forward_features(x.float())   # (1, 257, 1024) typically

    # Average over patch tokens (exclude CLS at index 0)
    latent = latent[:, 1:, :].mean(dim=1)  # (1, 1024)
    latent = torch.squeeze(latent)         # (1024,)
    return latent

def get_feature_dinov2(data_path, chkpt_dir, device):
    model_ = prepare_model_dinov2(chkpt_dir)
    model_.to(device)
    model_.eval()

    img_list = os.listdir(data_path)
    name_list = []
    feature_list = []

    finished_num = 0
    for fname in img_list:
        finished_num += 1
        if finished_num % 1000 == 0:
            print(f"{finished_num} images finished")

        img = Image.open(os.path.join(data_path, fname)).convert("RGB")
        img = img.resize((224, 224))
        img = np.array(img) / 255.0
        img[..., 0] = (img[..., 0] - img[..., 0].mean()) / img[..., 0].std()
        img[..., 1] = (img[..., 1] - img[..., 1].mean()) / img[..., 1].std()
        img[..., 2] = (img[..., 2] - img[..., 2].mean()) / img[..., 2].std()

        latent_feature = run_one_image(img, model_)
        name_list.append(fname)
        feature_list.append(latent_feature.detach().cpu().numpy())  # (1024,)

    return name_list, feature_list

# ---- download RETFound_dinov2_shanghai checkpoint ----
chkpt_dir = hf_hub_download(
    repo_id="YukunZhou/RETFound_dinov2_shanghai",
    filename="RETFound_dinov2_shanghai.pth",
)
print("Checkpoint downloaded to:", chkpt_dir)

# ---- run feature extraction ----
name_list, feature = get_feature_dinov2(data_path, chkpt_dir, device)

# ---- save embeddings to Drive ----
df_feature = pd.DataFrame(feature)  # shape: (N_images, 1024)
df_imgname = pd.DataFrame(name_list, columns=["name"])
df_visualization = pd.concat([df_imgname, df_feature], axis=1)

column_name_list = [f"feature_{i}" for i in range(df_feature.shape[1])]
df_visualization.columns = ["name"] + column_name_list

output_path = os.path.join(output_dir, "Embeddings_brset_RETFound_dinov2_shanghai.csv")
df_visualization.to_csv(output_path, index=False)

print("Embeddings saved to:", output_path)