In [1]:
import os
import cv2
import pickle
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from skimage import io
from PIL import ImageFile
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

ImageFile.LOAD_TRUNCATED_IMAGES = True

  warn(


In [18]:
CUDA_DEVICE = "6"  
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_DEVICE

#model_dir = 'model_VPC_Zurich'
embedding_layer_name = 'avgpool'
model_name = '256_aug_model'
magnifications = [10, 20]
folds = ['fold1']
patch_size = 512

In [30]:
os.environ.pop("CUDA_VISIBLE_DEVICES", None)  
print("torch:", torch.__version__)
print("compiled_with_cuda:", torch.version.cuda)
print("cuda.is_available:", torch.cuda.is_available())
print("device_count:", torch.cuda.device_count())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    try:
        print("Using GPU:", torch.cuda.get_device_name(0))
    except Exception:
        print("GPU visible but get_device_name failed (continuing).")
else:
    print("⚠ Running on CPU")

torch: 2.4.1
compiled_with_cuda: 12.1
cuda.is_available: False
device_count: 1
⚠ Running on CPU


In [21]:
# Input/output
root_patch_dir = '../data/VPC/multiscale_patches_Train/'
output_embedding_dir = '../data/VPC/Embeddings/'

In [22]:
class NN(nn.Module):
    def __init__(self, num_classes=6):
        super().__init__()
        self.model = torchvision.models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, num_classes)

    def forward(self, x_dict):
        return {'label': self.model(x_dict['img'])}


In [23]:
def dict_to_device(d, device='cuda'):
    return {k: v.to(device) for k, v in d.items()}


def directory_maker(path):
    if not os.path.exists(path):
        os.makedirs(path)


def get_activation_hook(layer_name):
    activation = {}

    def hook(model, input, output):
        activation[layer_name] = output

    return activation, hook

In [24]:
def load_model(model_name, embedding_layer):
    model = torchvision.models.resnet18(pretrained=True).cuda()
    activation = {}

    def hook(module, input, output):
        activation[embedding_layer] = output

    model.avgpool.register_forward_hook(hook)
    model.eval()
    return model, activation


In [25]:
def save_patch_embeddings(model, activation, patch_paths, output_path, embedding_layer):
    if os.path.exists(output_path):
        print(f"✅ Already exists: {output_path}")
        return

    transform = transforms.ToTensor()
    embeddings = {}

    for img_path in patch_paths:
        if not img_path.endswith('.png'):
            continue
        try:
            img = io.imread(img_path)
            if img.shape[2] == 4: img = img[:, :, :3]
        except:
            print(f"Failed to read image: {img_path}")
            continue

        img = cv2.resize(img, (256, 256), interpolation=cv2.INTER_CUBIC)
        img_tensor = transform(img).unsqueeze(0).cuda()

        with torch.no_grad():
            model(dict_to_device({'img': img_tensor}))
        patch_name = os.path.basename(img_path)[:-4]
        emb = activation[embedding_layer].squeeze(0).cpu().numpy()
        embeddings[patch_name] = emb

    directory_maker(os.path.dirname(output_path))
    with open(output_path, 'wb') as f:
        pickle.dump(embeddings, f)
    print(f"✅ Saved: {output_path}")

In [26]:
def collect_patch_paths(root_dir, magnifications, sizes):
    patch_dict = {mag: {} for mag in magnifications}
    for core in os.listdir(root_dir):
        for size in sizes:
            for mag in magnifications:
                patch_folder = os.path.join(root_dir, core, str(size), str(mag))
                if not os.path.exists(patch_folder): continue
                patch_paths = [os.path.join(patch_folder, f) for f in os.listdir(patch_folder) if f.endswith('.png')]
                patch_dict[mag][core] = patch_paths
    return patch_dict


In [None]:
def compute_similarity(pkl_path, metric='cosine', topk=None, save_csv=True):
    with open(pkl_path, 'rb') as f:
        emb_dict = pickle.load(f)

    patch_names = sorted(emb_dict.keys())
    embeddings = np.array([emb_dict[k] for k in patch_names])

    if metric == 'cosine':
        sim_matrix = cosine_similarity(embeddings)
    elif metric == 'l2':
        sim_matrix = -np.linalg.norm(embeddings[:, None] - embeddings[None, :], axis=-1)
    else:
        raise ValueError("Unsupported metric")

    similarity_list = []
    for i, name_i in enumerate(patch_names):
        sims = sim_matrix[i]
        indices = np.argsort(-sims)  # descending
        for j in indices:
            if i == j:
                continue  # skip self-similarity
            if topk and len(similarity_list) >= topk * len(patch_names):
                break
            name_j = patch_names[j]
            similarity_list.append((name_i, name_j, float(sims[j])))

    if save_csv:
        import pandas as pd
        df = pd.DataFrame(similarity_list, columns=['patch_i', 'patch_j', 'similarity'])
        out_path = pkl_path.replace('.pkl', f'_{metric}_similarity_pairs.csv')
        df.to_csv(out_path, index=False)

    return similarity_list

In [None]:
def run_embedding_pipeline():
    patch_dict = collect_patch_paths(root_patch_dir, magnifications, [patch_size])

    for fold in folds:
        for mag in magnifications:
            # Load ImageNet-pretrained ResNet18 with forward hook
            model, activation = load_model(model_name, embedding_layer_name)

            for core_name, patch_list in patch_dict[mag].items():
                out_pkl = os.path.join(output_embedding_dir, fold, core_name, str(patch_size), str(mag),
                                       f'{model_name}_{embedding_layer_name}.pkl')
                save_patch_embeddings(model, activation, patch_list, out_pkl, embedding_layer_name)

                # Optional: generate similarity 
                compute_similarity(out_pkl, metric='cosine')


In [29]:
if __name__ == "__main__":
    run_embedding_pipeline()




RuntimeError: No CUDA GPUs are available