In [None]:
import pandas as pd

In [None]:
# file path for mimic data splits
file_path = '/opt/gpudata/mimic-cxr/mimic-cxr-2.0.0-split.csv.gz'

split_df = pd.read_csv(file_path)
validate_df = split_df[split_df['split'] == 'validate']
validate_df.loc[:, "file_path"] = None

In [None]:
def create_paths(df):
    '''Creates a column with the path for each individual image,
    takes a datafram with columns subject_ids, study_ids, and dicom_ids'''
    for index, row in df.iterrows():
        #retrieve each sub folder value to get path
        patient_id = row['subject_id']
        study_id = row['study_id']
        dicom_id = row["dicom_id"]

        #assign path in df
        path = ("/opt/gpudata/mimic-cxr/jpg/p"+ str(patient_id)[:2] + "/p" + str(patient_id)+ "/s" + str(study_id) + "/" + str(dicom_id) + ".jpg")
        df.loc[index, "file_path"] = path
    
create_paths(validate_df)
print(validate_df["file_path"])

In [None]:
pip_source = "hi-ml-multimodal"

In [None]:
%pip install {pip_source}

In [None]:
from typing import List
from typing import Tuple

import tempfile
from pathlib import Path

import torch
from IPython.display import display
from IPython.display import Markdown

from health_multimodal.common.visualization import plot_phrase_grounding_similarity_map
from health_multimodal.text import get_bert_inference
from health_multimodal.text.utils import BertEncoderType
from health_multimodal.image import get_image_inference
from health_multimodal.image.utils import ImageModelType
from health_multimodal.vlp import ImageTextInferenceEngine

In [None]:
text_inference = get_bert_inference(BertEncoderType.BIOVIL_T_BERT)
image_inference = get_image_inference(ImageModelType.BIOVIL_T)

In [None]:
image_text_inference = ImageTextInferenceEngine(
    image_inference_engine=image_inference,
    text_inference_engine=text_inference,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_text_inference.to(device)

In [None]:
TypeBox = Tuple[float, float, float, float]

def plot_phrase_grounding(image_path: Path, text_prompt: str, bboxes: List[TypeBox]) -> None:
    similarity_map = image_text_inference.get_similarity_map_from_raw_data(
        image_path=image_path,
        query_text=text_prompt,
        interpolation="bilinear",
    )
    plot_phrase_grounding_similarity_map(
        image_path=image_path,
        similarity_map=similarity_map,
        bboxes=bboxes
    )

def cosine_similarity(image_path: Path, text_prompt: str) -> None:
    similarity_score = image_text_inference.get_similarity_score_from_raw_data(
        image_path=image_path,
        query_text=text_prompt,
    )
    return similarity_score

def plot_phrase_grounding_from_url(image_url: str, text_prompt: str, bboxes: List[TypeBox]) -> None:
    image_path = Path(tempfile.tempdir, "downloaded_chest_xray.jpg")
    !curl -s -L -o {image_path} {image_url}
    plot_phrase_grounding(image_path, text_prompt, bboxes)

In [None]:
def find_cossim(image_path_name, text_prompt):
    '''Finds cosine similarity between an image and text given the image path and text prompt,
    computes embeddings for each and calculates cosine similarity'''
    #text_prompt = "atelectasis seen"
    # Ground-truth bounding box annotation(s) for the input text prompt
    image_path = Path(image_path_name)
    bboxes = [
        (306, 168, 124, 101),
    ]

    text = (
        'The ground-truth bounding box annotation for the phrase'
        f' *{text_prompt}* is shown in the middle figure (in black).'
    )

    display(Markdown(text))
    #plot_phrase_grounding(image_path, text_prompt, bboxes)
    return cosine_similarity(image_path, text_prompt)


In [None]:
'''Create new dataframe with the cosine similarity values'''

columns = ['dicom_id', 'atelelectasis_cos_sim']
cosine_similarity_df = pd.DataFrame(columns=columns)

for index, row in validate_df.iterrows():
    image_path = row["file_path"]
    text_prompt = "atelectasis seen"
    cos_sim = find_cossim(image_path, text_prompt) 
    new_row = [row["dicom_id"], cos_sim]

    cosine_similarity_df.loc[index] = new_row


In [None]:
cosine_similarity_df["file_path"] = validate_df["file_path"]
new_order = ["dicom_id", "file_path", "atelelectasis_cos_sim"]
cosine_similarity_df  = cosine_similarity_df[new_order]
print(cosine_similarity_df)

In [None]:
file_path = "/home/imadejski/ctds-search-model/cosine_sim_new.csv"
cosine_similarity_df.to_csv(file_path, index=False)