In [1]:
import torch
from tqdm import tqdm
from torchvision import transforms, models
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os
import random

import numpy as np
from tqdm import tqdm
from PIL import Image
import ssl
import gc  

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.hub.list('facebookresearch/dinov2')  # List all models available

Using cache found in /home/hchen19/.cache/torch/hub/facebookresearch_dinov2_main


['dinov2_vitb14',
 'dinov2_vitb14_dd',
 'dinov2_vitb14_lc',
 'dinov2_vitb14_ld',
 'dinov2_vitb14_reg',
 'dinov2_vitb14_reg_lc',
 'dinov2_vitg14',
 'dinov2_vitg14_dd',
 'dinov2_vitg14_lc',
 'dinov2_vitg14_ld',
 'dinov2_vitg14_reg',
 'dinov2_vitg14_reg_lc',
 'dinov2_vitl14',
 'dinov2_vitl14_dd',
 'dinov2_vitl14_lc',
 'dinov2_vitl14_ld',
 'dinov2_vitl14_reg',
 'dinov2_vitl14_reg_lc',
 'dinov2_vits14',
 'dinov2_vits14_dd',
 'dinov2_vits14_lc',
 'dinov2_vits14_ld',
 'dinov2_vits14_reg',
 'dinov2_vits14_reg_lc']

In [3]:

ssl._create_default_https_context = ssl._create_unverified_context
dinov2_model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)
dinov2_model.to(device)  # Move your model to the GPU

def get_labels(files):
    """
    This function takes a list of file paths and returns a list of unique labels extracted from the
    directory names in the file paths.
    
    :param files: a list of file paths (strings) that include the directory and filename, separated
    by backslashes ("\") on Windows or forward slashes ("/") on Unix-based systems
    :return: a list of unique labels extracted from the file paths provided in the `files` parameter.
    """
    labels = []
    for file_path in files:
        directory, _ = file_path.split("\\")
        directory_parts = directory.split("/")
        label = directory_parts[-1]
        if label not in labels:
            labels.append(label)
    return labels


def list_files(dataset_path):
    print("Listing files in:", dataset_path)
    images = []
    for root, _, files in sorted(os.walk(dataset_path)):
        for name in sorted(files):
            if name.lower().endswith('.tif'):
                images.append(os.path.join(root, name))
    print(f"Found {len(images)} .tif files.")
    return images


class CustomImageDataset(Dataset):
    """The above class is a custom dataset class for images in PyTorch."""
    def __init__(self, img_dir):
        self.img_dir = img_dir
        self.images = list_files(self.img_dir)
        print(self.images[:100])
        self.transform =  transforms.Compose([
                            transforms.Resize(252),
                            transforms.CenterCrop(252),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406],
                                                [0.229, 0.224, 0.225])
                        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, img_path








Using cache found in /home/hchen19/.cache/torch/hub/facebookresearch_dinov2_main


Using device: cuda


In [4]:


dir_path = "/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/"
dataset = CustomImageDataset(dir_path)
print(dataset.__len__())
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=False)

final_img_features = []
final_img_filepaths = []

for image_tensors, file_paths in tqdm(train_dataloader):
    try:
        with torch.no_grad():  # Disable gradient computation
            img_t = image_tensors.to(device)
            image_features = dinov2_model(img_t) #384 small, #768 base, #1024 large
            image_features /= image_features.norm(dim=-1, keepdim=True)
            image_features = image_features.cpu()
            image_features = image_features.tolist()

            # Append data to lists 
            final_img_features.extend(image_features)
            final_img_filepaths.extend(list(file_paths))

        # Explicitly delete tensors to free up memory
        del img_t
        del image_features
        torch.cuda.empty_cache()  # Clear memory cache

    except Exception as e:
        print("Exception occurred: ", e)
        break
    finally:
        # Force garbage collection to run (optional, could be expensive)
        gc.collect()


Listing files in: /mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/
Found 21294 .tif files.
['/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-25_fundus_298.tif', '/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-25_fundus_542.tif', '/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-25_fundus_786.tif', '/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-28_fundus_298.tif', '/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-28_fundus_542.tif', '/mnt/gvd0n1/Abbas/Projects/Dyer/Danielle/Fundus_batch6_April29/crop/DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-28_fundus_786.tif', '/mnt/gvd0n1

  0%|          | 0/333 [00:00<?, ?it/s]

100%|██████████| 333/333 [34:12<00:00,  6.16s/it]


In [5]:
"1","DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-25_fundus_298.tif","DR"


('1',
 'DL6-1-23-3_B-T87L964D_OCT_Image_LE_Line-2023-01-10-09-16-25_fundus_298.tif',
 'DR')

In [7]:
if not os.path.exists("/mnt/storage1/Haoran/projects/retina/retina-dinov2/feature/vitl14_pretrained"):
    os.makedirs("/mnt/storage1/Haoran/projects/retina/retina-dinov2/feature/vitl14_pretrained")
np.save("/mnt/storage1/Haoran/projects/retina/retina-dinov2/feature/vitl14_pretrained/feature.npy", final_img_features)