In [1]:
import time
import os
from os import makedirs
from os.path import join, basename, splitext
import pickle
import glob
import operator
import copy
import math
from collections import Counter, defaultdict
import requests
import json
import io
import datetime
import sys
import yaml

import h5py
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import openslide
from matplotlib import pyplot as plt

sys.path.append("../../")
from database import HistoDatabase

# Functions

In [None]:
def unpickle_object(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)

In [None]:
def get_link(name):
    fields = ["file_name"]
    fields = ",".join(fields)

    # We want all svs files from 'Breas' primary site.
    filters = {
        "op": "in",
        "content":{
            "field": "file_name",
            "value": [name + ".svs"]
            }
    }

    params = {
        "filters": json.dumps(filters),
        "fields": fields,
        "format": "CSV",
        "size": "1"
    }

    response = requests.post(FILES_ENDPOINT, headers={"Content-Type": "application/json"}, json=params)
    df = pd.read_csv(io.StringIO(response.content.decode('utf-8')), dtype='object')
    
    return VIEW_URL + df.id[0]

In [None]:
def Uncertainty_Cal(bag, weight, is_organ=False):
    """
    Implementation of Weighted-Uncertainty-Cal in the paper.
    Input:
        bag (list): A list of dictionary which contain the searhc results for each mosaic
    Output:
        ent (float): The entropy of the mosaic retrieval results
        label_count (dict): The diagnois and the corresponding weight for each mosaic
        hamming_dist (list): A list of hamming distance between the input mosaic and the result
    """
    if len(bag) >= 1:
        label = []
        hamming_dist = []
        label_count = defaultdict(float)
        for bres in bag:
            if is_organ:
                label.append(bres['site'])
            else:
                label.append(bres['diagnosis'])
            hamming_dist.append(bres['hamming_dist'])

        # Counting the diagnoiss by weigted count
        # If the count is less than 1, round to 1
        for lb_idx, lb in enumerate(label):
            label_count[lb] += (1. / (lb_idx + 1)) * weight[lb]
        for k, v in label_count.items():
            if v < 1.0:
                v = 1.0
            else:
                label_count[k] = v

        # Normalizing the count to [0,1] for entropy calculation
        total = 0
        ent = 0
        for v in label_count.values():
            total += v
        for k in label_count.keys():
            label_count[k] = label_count[k] / total
        for v in label_count.values():
            ent += (-v * np.log2(v))
        return ent, label_count, hamming_dist
    else:
        return None, None, None

In [None]:
def Clean(len_info, bag_summary):
    """
    Implementation of Clean in the paper
    Input:
        len_info (list): The length of retrieval results for each mosaic
        bag_summary (list): A list that contains the positional index of mosaic,
        entropy, the hamming distance list, and the length of retrieval results
    Output:
        bag_summary (list): The same format as input one but without low quality result
        (i.e, result with large hamming distance)
        top5_hamming_distance (float): The mean of average hamming distance in top 5
        retrival results of all mosaics
    """
    LOW_FREQ_THRSH = 3
    LOW_PRECENT_THRSH = 5
    HIGH_PERCENT_THRSH = 95
    len_info = [b[-1] for b in bag_summary]
    if len(set(len_info)) <= LOW_FREQ_THRSH:
        pass
    else:
        bag_summary = [b for b in bag_summary if b[-1]
                       > np.percentile(len_info, LOW_PRECENT_THRSH)
                       and b[-1] < np.percentile(len_info, HIGH_PERCENT_THRSH)]

    # Remove the mosaic if its top5 mean hammign distance is bigger than average
    top5_hamming_dist = np.mean([np.mean(b[2][0:5]) for b in bag_summary])

    bag_summary = sorted(bag_summary, key=lambda x: (x[1]))  # sort by certainty
    bag_summary = [b for b in bag_summary if np.mean(b[2][0:5]) <= top5_hamming_dist]
    return bag_summary, top5_hamming_dist

In [None]:
def Filtered_BY_Prediction(bag_summary, label_count_summary):
    """
    Implementation of Filtered_By_Prediction in the paper
    Input:
        bag_summary (list): The same as the output from Clean
        label_count_summary (dict): The dictionary storing the diagnosis occurrence 
        of the retrieval result in each mosaic
    Output:
        bag_removed: The index (positional) of moaic that should not be considered 
        among the top5
    """
    voting_board = defaultdict(float)
    for b in bag_summary[0:5]:
        bag_index = b[0]
        for k, v in label_count_summary[bag_index].items():
            voting_board[k] += v
    final_vote_candidates = sorted(voting_board.items(), key=lambda x: -x[1])
    fv_pointer = 0
    while True:
        final_vote = final_vote_candidates[fv_pointer][0]
        bag_removed = {}
        for b in bag_summary[0:5]:
            bag_index = b[0]
            max_vote = max(label_count_summary[bag_index].items(), key=operator.itemgetter(1))[0]
            if max_vote != final_vote:
                bag_removed[bag_index] = 1
        if len(bag_removed) != len(bag_summary[0:5]):
            break
        else:
            fv_pointer += 1
    return bag_removed

In [None]:
def calculate_weights(site):
    if site == "organ":
        factor = 30
        # Count the number of slide in each diagnosis (organ)
        latent_all = join(DATA_DIR, "PATCHES", "*", "*", "*", "patches", "*")
        type_of_organ = [basename(e) for e in glob.glob(join(DATA_DIR, "PATCHES", "*"))]
        total_slide = {k: 0 for k in type_of_organ}
        for latent_path in glob.glob(latent_all):
            anatomic_site = latent_path.split("/")[-5]
            total_slide[anatomic_site] += 1
    else:
        factor = 10
        # Count the number of slide in each site (organ)
        latent_all = join(DATA_DIR, "PATCHES", site, "*", "*", "patches", "*")
        type_of_diagnosis = [basename(e) for e in glob.glob(join(DATA_DIR, "PATCHES", site, "*"))]
        total_slide = {k: 0 for k in type_of_diagnosis}
        for latent_path in glob.glob(latent_all):
            diagnosis = latent_path.split("/")[-4]
            total_slide[diagnosis] += 1
    
    # Using the inverse count as a weight for each diagnosis
    sum_inv = 0
    for v in total_slide.values():
        sum_inv += (1./v)

    # Set a parameter k  to make the weight sum to k (k = 10, here)
    norm_fact = factor / sum_inv
    weight = {k: norm_fact * 1./v for k, v in total_slide.items()}
    return weight

In [None]:
def save_results(query_slides, extension, site, experiment, pre_step, succ_step, C, T, thrsh, codebook_semantic="../../checkpoints/codebook_semantic.pt"):  
    save_dir = join(TEST_DATA_DIR, experiment, "Results", site)
    makedirs(save_dir, exist_ok=True)
    
    queries_latent_all = join(TEST_DATA_DIR, experiment, "LATENT", "*", "*", "*", "vqvae", "*")
    
    database_index_path = join(DATA_DIR, "DATABASES", site, "index_tree", "veb.pkl")
    index_meta_path = join(DATA_DIR, "DATABASES", site, "index_meta", "meta.pkl")
    db = HistoDatabase(database_index_path, index_meta_path, codebook_semantic)
    
    results = {}
    for latent_path in glob.glob(queries_latent_all):
        resolution = latent_path.split("/")[-3]
        diagnosis = latent_path.split("/")[-4]
        anatomic_site = latent_path.split("/")[-5]
        slide_id = basename(latent_path).replace(".h5", "")

        densefeat_path = latent_path.replace("vqvae", "densenet").replace(".h5", ".pkl")
        slide_path = os.path.join(WSI_DIR, anatomic_site, diagnosis, f"{slide_id}.{extension}")

        db.leave_test_slides(list(query_slides.keys()))

        with h5py.File(latent_path, 'r') as hf:
            feat = hf['features'][:]
            coords = hf['coords'][:]
        with open(densefeat_path, 'rb') as handle:
            densefeat = pickle.load(handle)

        tmp_res = []
        for idx, patch_latent in enumerate(feat):
            res = db.query(patch_latent, densefeat[idx], pre_step, succ_step, C, T, thrsh)
            tmp_res.append(res)

        key = slide_id
        results[key] = {'results': None, 'label_query': None}
        results[key]['results'] = tmp_res
        if site == 'organ':
            results[key]['label_query'] = anatomic_site
        else:
            results[key]['label_query'] = diagnosis
    
    with open(join(save_dir, f"results.pkl"), 'wb') as handle:
        pickle.dump(results, handle)
    
    return results

In [None]:
def query(results, site, topK_mMV):
    query_results = []
    for test_slide in results.keys():
        test_slide_result = results[test_slide]['results']
        
        # Filter out complete failure case (i.e.,
        # All mosaics fail to retrieve a patch that meet the criteria)
        ttlen = 0
        for tt in test_slide_result:
            ttlen += len(tt)
        if ttlen == 0:
            continue

        bag_result = []
        bag_summary = []
        len_info = []
        label_count_summary = {}
        weight = calculate_weights(site)
        for idx, bag in enumerate(test_slide_result):
            if site == "organ":
                ent, label_cnt, dist = Uncertainty_Cal(bag, weight, is_organ=True)
            else:
                ent, label_cnt, dist = Uncertainty_Cal(bag, weight, is_organ=False)

            if ent is not None:
                label_count_summary[idx] = label_cnt
                bag_summary.append((idx, ent, dist, len(bag)))
                len_info.append(len(bag))

        bag_summary_dirty = copy.deepcopy(bag_summary)
        bag_summary, hamming_thrsh = Clean(len_info, bag_summary)
        bag_removed = Filtered_BY_Prediction(bag_summary, label_count_summary)

        # Process to calculate the final ret slide
        ret_final = []
        visited = {}
        for b in bag_summary:
            bag_index = b[0]
            uncertainty = b[1]
            res = results[test_slide]['results'][bag_index]
            for r in res:
                if uncertainty == 0:
                    if r['slide_name'] not in visited:
                        if site == "organ":
                            ret_final.append((r['slide_name'], r['hamming_dist'], r['site'], uncertainty, bag_index))
                        else:
                            ret_final.append((r['slide_name'], r['hamming_dist'], r['diagnosis'], uncertainty, bag_index))
                        visited[r['slide_name']] = 1
                else:
                    if (r['hamming_dist'] <= hamming_thrsh) and (r['slide_name'] not in visited):
                        if site == "organ":
                            ret_final.append((r['slide_name'], r['hamming_dist'], r['site'], uncertainty, bag_index))
                        else:
                            ret_final.append((r['slide_name'], r['hamming_dist'], r['diagnosis'], uncertainty, bag_index))
                        visited[r['slide_name']] = 1

        ret_final_tmp = [(e[1], e[2], e[3], e[-1]) for e in sorted(ret_final, key=lambda x: (x[3], x[1]))
                         if e[-1] not in bag_removed]
        ret_final = [(e[0], e[1], e[2]) for e in sorted(ret_final, key=lambda x: (x[3], x[1]))
                     if e[-1] not in bag_removed][0:topK_mMV]

        query_results.append((test_slide, ret_final))
    return query_results

In [None]:
def show_results(test_slide, extension, ret_final, metadata, experiment, site_retrieval, topK_mMV):
    if site_retrieval:
        save_dir = join("../../TEST_DATA_RESULTS", experiment, "search_result_images", "wsi_site_retrieval", query_slides_proxies[test_slide], f"topK_mMV_{topK_mMV}")
    else:
        save_dir = join("../../TEST_DATA_RESULTS", experiment, "search_result_images", "wsi_vertical", query_slides_proxies[test_slide], f"topK_mMV_{topK_mMV}")
    makedirs(save_dir, exist_ok=True)
        
    query_path = join(TEST_WSI_DIR, experiment, f"{test_slide}.{extension}")
    query_slide = openslide.open_slide(query_path)
    query_thumbnail = query_slide.get_thumbnail((300, 300))
    query_slide.close()

    fig = plt.figure(figsize=(20, 10))  # adjust as necessary

    plt.subplot(1, len(ret_final) + 2, 1)
    plt.imshow(query_thumbnail)
    plt.axis('off')  # to hide the x and y axis
    plt.title(f"Query\n{query_slides_proxies[test_slide]}")

    # Plot black line
    plt.subplot(1, len(ret_final) + 2, 2)
    plt.plot([0, 0], [0, 1], color='black', transform=plt.gca().transAxes, linewidth=2.0)
    plt.axis('off')

    links = []
    for i, result in enumerate(ret_final, 2):
        file_name, sim, site = result
        file_name = file_name + ".svs"

        site = sites_dict[metadata.loc[file_name, "primary_site"]]
        diagnosis = diagnoses_dict[metadata.loc[file_name, "project_name"]]
        
        # path = join("/home/data/GDC_BBCLL", metadata.loc[file_name, "id"], file_name)
        path = join(WSI_DIR, site, diagnosis, file_name)
        slide = openslide.open_slide(path)
        thumbnail = slide.get_thumbnail((300, 300))
        slide.close()

        plt.subplot(1, len(ret_final) + 2, i)
        plt.imshow(thumbnail)
        plt.axis('off')  # to hide the x and y axis
        plt.title(f'Result {i-2}: {site} - {diagnosis}\n Hamming Dist: {sim :.1f}')  # add caption

        # returning GDC link to slides
        links.append(VIEW_URL + metadata.loc[file_name, "id"])

        # saving to file
        thumbnail.save(join(save_dir, f"result_{i-1}.png"))

    plt.tight_layout()
    plt.show()
    
    fig.savefig(join(save_dir, "all.png"), bbox_inches='tight')
    fig.savefig(join(save_dir, "all.eps"), format='eps', bbox_inches='tight')
    
    return links

## Patch Functions

In [None]:
def save_results_patch(query_slides, experiment, patch_label_file_path, database_index_path, index_meta_path, save_name, pre_step, succ_step, C, T, thrsh, codebook_semantic="../../checkpoints/codebook_semantic.pt"):
    
    save_dir = join(TEST_DATA_DIR, experiment, "_Results_Patch")
    makedirs(save_dir, exist_ok=True)

    patch_label_file = pd.read_csv(patch_label_file_path)
    db = HistoDatabase(database_index_path, index_meta_path, codebook_semantic, is_patch=True)
    db.leave_test_slides(list(query_slides.keys()))
    
    results = {}
    # for idx in range(len(patch_label_file)):
    # [30594, 30595]
    for idx in range(0, 12):
        patch_name = patch_label_file.loc[idx, 'Patch Names']
        label = patch_label_file.loc[idx, 'label']

        latentfeat_path = join(TEST_DATA_DIR, experiment, "DATA_PATCH", f"{experiment}_latent", 'vqvae', patch_name + ".h5")
        densefeat_path = join(TEST_DATA_DIR, experiment, "DATA_PATCH", f"{experiment}_latent", 'densenet', patch_name + ".pkl")

        with h5py.File(latentfeat_path, 'r') as hf:
            feat = hf['features'][:]
        with open(densefeat_path, 'rb') as handle:
            densefeat = pickle.load(handle)

        tmp_res = db.query(feat[0], densefeat, pre_step, succ_step, C, T, thrsh)

        tmp_clean = []
        for r in tmp_res:
            if r['patch_name'] == patch_name:
                continue
            else:
                # tmp_clean.append((r['hamming_dist'], r['diagnosis'], r['patch_name']))
                # tmp_clean.append(r)
                tmp_clean.append(tuple(r.values()))

        # top5 = sorted(tmp_clean, key=lambda x: x[0])
        # top5 = sorted(tmp_clean, key=lambda x: x['hamming_dist'])
        top5 = sorted(tmp_clean, key=lambda x: x[3])
        results[patch_name] = {'results': None, 'label_query': None}
        results[patch_name]['results'] = top5
        results[patch_name]['label_query'] = label

    with open(join(save_dir, f"{save_name}.pkl"), 'wb') as handle:
        pickle.dump(results, handle)
    
    return results

In [None]:
def show_results_patch(test_slide, ret_final, metadata, experiment):
    slide_name, patch_coord1, patch_coord2 = splitext(test_slide)[0].split("_")
    slide_name = query_slides_proxies[slide_name]
    patch_name = f"patch_{patch_coord1}_{patch_coord2}"
    save_dir = join("TEST_DATA_RESULTS", experiment, "search_result_images", "patch_retrieval", slide_name, patch_name)
    makedirs(save_dir, exist_ok=True)
    
    test_slide = splitext(test_slide)[0]
    query_slide = test_slide.split("_")[0]
    
    query_path = join(TEST_WSI_DIR, experiment, query_slide + ".svs")
    slide = openslide.open_slide(query_path)
    try:
        objective_power = int(slide.properties['openslide.objective-power'])
    except KeyError:
        objective_power = 20
    slide.close()
        
    query_file = join(TEST_DATA_DIR, experiment, "PATCHES", experiment, experiment, f"{objective_power}x", "patches", query_slide + ".h5")

    coord = [int(test_slide.split("_")[1]), int(test_slide.split("_")[2])]
    
    with h5py.File(query_file, 'r') as f:
        dataset = f['coords']
        patch_level = dataset.attrs["patch_level"]
        patch_size = dataset.attrs["patch_size"]   
    
    slide = openslide.open_slide(query_path)
    patch = slide.read_region((coord[0], coord[1]), patch_level, (patch_size, patch_size)).convert("RGB")
    slide.close()

    fig = plt.figure(figsize=(20, 10), dpi=1200)  # adjust as necessary

    plt.subplot(1, len(ret_final) + 2, 1)
    plt.imshow(patch)
    plt.axis('off')  # to hide the x and y axis
    plt.title(f"Query Patch\n{slide_name} - {patch_name}")

    # Plot black line
    plt.subplot(1, len(ret_final) + 2, 2)
    plt.plot([0, 0], [0, 1], color='black', transform=plt.gca().transAxes, linewidth=2.0)
    plt.axis('off')

    links = []
    for i, result in enumerate(ret_final, 2):
        slide_path, slide_name, sim, site, diagnosis, coords, patch_level, patch_size = result[6], result[5], result[3], result[7], result[8], result[9], result[10], result[11]
        
        slide = openslide.open_slide(slide_path)
        patch = slide.read_region((coords[0], coords[1]), patch_level, (patch_size, patch_size)).convert("RGB")
        slide.close()

        plt.subplot(1, len(ret_final) + 2, i)
        plt.imshow(patch)
        plt.axis('off')  # to hide the x and y axis
        plt.title(f'Result {i-2}: {site} - {diagnosis}\n Hamming Dist: {sim :.1f}')  # add caption

        # returning GDC link to slides
        links.append(VIEW_URL + metadata.loc[slide_name, "id"])

        # saving to file
        patch.save(join(save_dir, f"result_{i-1}.png"))

    plt.tight_layout()
    plt.show()

    fig.savefig(join(save_dir, "all.png"),  dpi='figure', bbox_inches='tight')
    fig.savefig(join(save_dir, "all.eps"), format='eps', dpi='figure', bbox_inches='tight')
    fig.savefig(join(save_dir, "all.pdf"), format='pdf', dpi='figure', bbox_inches='tight')
    
    return links

# Constants

In [None]:
diagnoses_dict = {
    "Brain Lower Grade Glioma": "LGG",
    "Glioblastoma Multiforme": "GBM",
    "Breast Invasive Carcinoma": "BRCA",
    "Lung Adenocarcinoma": "LUAD",
    "Lung Squamous Cell Carcinoma": "LUSC",
    "Colon Adenocarcinoma": "COAD",
    "Liver Hepatocellular Carcinoma": "LIHC",
    "Cholangiocarcinoma": "CHOL",
}

sites_dict = {
    "Brain": "brain",
    "Breast": "breast",
    "Bronchus and lung": "lung",
    "Colon": "colon",
    "Liver and intrahepatic bile ducts": "liver",
}

sites_diagnoses_dict = {
    "brain": ["LGG", "GBM"],
    "breast": ["BRCA"],
    "lung": ["LUAD", "LUSC"],
    "colon": ["COAD"],
    "liver": ["LIHC", "CHOL"],
}

In [None]:
VIEW_URL = "https://portal.gdc.cancer.gov/files/"
FILES_ENDPOINT = "https://api.gdc.cancer.gov/files"

DATA_DIR = "../../FEATURES/DATABASE/"
TEST_DATA_DIR = "../../FEATURES/TEST_DATA/"

WSI_DIR = "/raid/nejm_ai/DATABASE/"
TEST_WSI_DIR = "/raid/nejm_ai/TEST_DATA/"

In [None]:
experiment = "BRCA_HER2"
extension = "svs" # 'ndpi' for GBM_MICROSCOPE_UPENN, 'svs' everywhere

In [None]:
with open(f"../../FEATURES/TEST_DATA/{experiment}/query_slides.yaml", 'r') as f:
    query_slides = yaml.safe_load(f)

query_slides_proxies = dict()

for key in query_slides.keys():
    query_slides_proxies[splitext(key)[0]] = splitext(key)[0]

In [None]:
metadata = pd.read_csv("../../FEATURES/DATABASE/sampled_metadata.csv")
metadata = metadata.set_index('file_name')

In [None]:
pre_step = 375
succ_step = 375
C = 50
T = 10
thrsh = 128

# Saving the Results

In [None]:
# Only run once, then comment out.
sites = ["organ", "brain", "breast", "lung", "colon", "liver"]
for site in sites:
    save_results(query_slides, extension, site, experiment, pre_step, succ_step, C, T, thrsh)

# Patch results
patch_label_file_path = f"../../FEATURES/TEST_DATA/{experiment}/DATA_PATCH/summary.csv"
database_index_path = f"../../FEATURES/TEST_DATA/{experiment}/DATABASES_PATCH/{experiment}/index_tree/veb.pkl"
index_meta_path = f"../../FEATURES/TEST_DATA/{experiment}/DATABASES_PATCH/{experiment}/index_meta/meta.pkl"
save_name="results_READER_STUDY"

results = save_results_patch(query_slides, experiment, patch_label_file_path, database_index_path, index_meta_path, save_name, pre_step, succ_step, C, T, thrsh)

# Organ Retrieval

In [None]:
topK_mMV = 10
site = "organ"

results = unpickle_object(f"../../FEATURES/TEST_DATA/{experiment}/Results/{site}/results.pkl")
query_results = query(results, site, topK_mMV)

for test_slide, ret_final in query_results:
    if test_slide in query_slides_proxies:
        show_results(test_slide, extension, ret_final, metadata, experiment, site_retrieval=True, topK_mMV=topK_mMV)

In [None]:
topK_mMV = 5
site = "organ"

results = unpickle_object(f"../../FEATURES/TEST_DATA/{experiment}/Results/{site}/results.pkl")
query_results = query(results, site, topK_mMV)

for test_slide, ret_final in query_results:
    if test_slide in query_slides_proxies:
        show_results(test_slide, extension, ret_final, metadata, experiment, site_retrieval=True, topK_mMV=topK_mMV)

# Sub-Type Retrieval

In [None]:
topK_mMV = 5

for slide, sites in query_slides.items():
    for site in sites:
        results = unpickle_object(f"../../FEATURES/TEST_DATA/{experiment}/Results/{site}/results.pkl")
        query_results = query(results, site, topK_mMV)

        for test_slide, ret_final in query_results:
            if test_slide + ".svs" == slide:
                show_results(test_slide, extension, ret_final, metadata, experiment, site_retrieval=False, topK_mMV=topK_mMV)

# Patch Retrieval

In [None]:
results = unpickle_object(f"./Results_Patch_READER_STUDY/results_READER_STUDY.pkl")

In [None]:
test_patches = [
    "MSB-09151-01-11_23185_27168.json",
    "MSB-09151-01-11_6801_10784.json",
    "MSB-09151-01-11_21137_6688.json",
    "MSB-09151-01-11_23185_6688.json",
]
site = "colon"
experiment = "READER_STUDY"

topk_MV = 5

for test_p in test_patches:
    res_final = []
    for result in results[test_p]["results"]:
        if result[7] == site:
            res_final.append(result)
    res_final = res_final[:topk_MV]
    links = show_results_patch(test_p, res_final, metadata, experiment)
    print(links)

In [None]:
test_patches = [
    "MSB-09977-01-22_19206_21062.json",
    "MSB-09977-01-22_29929_53329.json",
    "MSB-09977-01-22_74872_30600.json",
    "MSB-09977-01-22_76920_32648.json",
]
site = "lung"
experiment = "READER_STUDY"

topk_MV = 5

for test_p in test_patches:
    res_final = []
    for result in results[test_p]["results"]:
        if result[7] == site:
            res_final.append(result)
    res_final = res_final[:topk_MV]
    links = show_results_patch(test_p, res_final, metadata, experiment)
    print(links)

In [None]:
test_patches = [
    "Her2Pos_Case_66_3072_13472.json",
    "Her2Pos_Case_66_19456_1184.json",
    "Her2Pos_Case_66_25328_12291.json",
    "Her2Pos_Case_66_27376_9219.json",
]
site = "breast"
experiment = "READER_STUDY"

topk_MV = 5

for test_p in test_patches:
    res_final = []
    for result in results[test_p]["results"]:
        if result[7] == site:
            res_final.append(result)
    res_final = res_final[:topk_MV]
    links = show_results_patch(test_p, res_final, metadata, experiment)
    print(links)