In [1]:
from huggingface_hub import login
import torch
from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
from qwen_vl_utils import process_vision_info
import yaml
import pandas as pd
from evaluate import load
from sklearn.model_selection import train_test_split

In [2]:
login(token="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")
with open("config.YAML","r") as f:
    config = yaml.safe_load(f.read())

In [3]:
model_id = "Qwen/Qwen2-VL-7B-Instruct" 
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForVision2Seq.from_pretrained(
    model_id,
    device_map="auto",
    # attn_implementation="flash_attention_2", # not supported for training
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
processor = AutoProcessor.from_pretrained(model_id)

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [4]:
model

Qwen2VLForConditionalGeneration(
  (visual): Qwen2VisionTransformerPretrainedModel(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False)
    )
    (rotary_pos_emb): VisionRotaryEmbedding()
    (blocks): ModuleList(
      (0-31): 32 x Qwen2VLVisionBlock(
        (norm1): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (norm2): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (attn): VisionSdpaAttention(
          (qkv): Linear8bitLt(in_features=1280, out_features=3840, bias=True)
          (proj): Linear8bitLt(in_features=1280, out_features=1280, bias=True)
        )
        (mlp): VisionMlp(
          (fc1): Linear8bitLt(in_features=1280, out_features=5120, bias=True)
          (act): QuickGELUActivation()
          (fc2): Linear8bitLt(in_features=5120, out_features=1280, bias=True)
        )
      )
    )
    (merger): PatchMerger(
      (ln_q): LayerNorm((1280,), eps=1e-06, elementwise_affin

In [5]:
from peft import LoraConfig
peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.05,
        r=8,
        bias="none",
        target_modules=["qkv","fc1","fc2","q_proj","v_proj","k_proj","o_proj","gate_proj","up_proj","down_proj","lm_head","(2): Linear8bitLt(in_features=5120, out_features=3584, bias=True)","(0): Linear8bitLt(in_features=5120, out_features=5120, bias=True)","(proj): Linear8bitLt(in_features=1280, out_features=1280, bias=True)"],
        task_type="CAUSAL_LM", 
)

In [6]:
messages = [
    {
        "role": "system",
        "content":[{"type": "text", "text": "You are an expert radiologist. Can you tell me which disease is presenting in this Chest XRAY?."}]
    },
    {
        "role": "user",
        "content": [
            {"type": "image", "image": "./Images/CXR1_1_IM-0001-4001.png"},
        ],
    }
]

In [7]:
text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
inputs = inputs.to("cuda")

# Inference: Generation of the output
generated_ids = model.generate(**inputs, max_new_tokens=128)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(output_text)



["The chest X-ray in the image shows a significant amount of fluid in the pleural space, which is the space between the lungs and the chest wall. This fluid accumulation is indicative of pleural effusion. Pleural effusion can be caused by various conditions, including heart failure, lung disease, infections, or tumors. It is important to correlate the X-ray findings with the patient's clinical history and other diagnostic tests to determine the underlying cause."]


In [8]:
def create_prompts(ex):
    messages = [
                {"role":"system","content":[{"type":"text","text":config["prompts"]["sys_prompt_label"]}]},
                {"role": "user","content": [{"type":"text","text":config["prompts"]["user_prompt_label"]},
                                            {"type": "image", "image": f'./Images/{ex["image"]}.png'}]},
                {"role":"assistant","content":[{"type":"text","text":ex["reports"]}]}
                ]
    return messages

In [9]:
def clean_reports(text):
    sents = text.split(".")
    sents = [sent for sent in sents if "XXXX" not in sent]
    return ".".join(sents)

In [10]:
df = pd.read_json("image_added_dataset_partially_cleaned.json").dropna().reset_index(drop = True)
df["reports"] = df["reports"].apply(clean_reports)
print(df["reports"])
prompts = df.apply(create_prompts,axis = 1)

0       The cardiac silhouette and mediastinum size ar...
1       The cardiac silhouette and mediastinum size ar...
2       Borderline cardiomegaly. Midline sternotomy ob...
3       Borderline cardiomegaly. Midline sternotomy ob...
4       There are diffuse bilateral interstitial and a...
                              ...                        
6468    The cardiomediastinal silhouette and pulmonary...
6469    The lungs are clear. Heart size is normal. No ...
6470    The lungs are clear. Heart size is normal. No ...
6471    Heart size within normal limits. Small, nodula...
6472    Heart size within normal limits. Small, nodula...
Name: reports, Length: 6473, dtype: object


In [15]:
prompts[0]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into JSON Labels.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology labels from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR1_1_IM-0001-3001.png'}]},
 {'role': 'assistant',
  'content': [{'type': 'text',
    'text': 'The cardiac silhouette and mediastinum size are within normal limits. There is no pulmonary edema. There is no focal consolidation. There are no signs of a pleural effusion. There is no evidence of pneumothorax.'}]}]

In [16]:
glue_metric = load('glue', 'sst2')
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return glue_metric.compute(predictions=predictions, references=labels)

In [17]:
train_prompts,val_prompts = train_test_split(prompts,test_size = 0.015)

In [18]:
val_prompts = val_prompts.apply(lambda x: x[:-1])

In [19]:
val_prompts.iloc[0]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into JSON Labels.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology labels from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR364_IM-1804-2001.png'}]}]

In [20]:
len(val_prompts)

98

In [68]:
testing_dataset = pd.DataFrame(zip(val_prompts,df.loc[val_prompts.index,"reports"]))

In [17]:
from trl import SFTConfig
from transformers import Qwen2VLProcessor
from qwen_vl_utils import process_vision_info
 
args = SFTConfig(
    output_dir="r2gen2-3", # directory to save and repository id
    num_train_epochs=15,                     # number of training epochs
    per_device_train_batch_size=1,          # batch size per device during training
    gradient_accumulation_steps=8,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=50,                       # log every 10 steps
    save_strategy="steps",
    evaluation_strategy = "steps",
    save_steps = 50,                  # save checkpoint every epoch
    learning_rate=2e-4,
    do_eval = True,
    eval_steps = 50,
    bf16=True,                              # use bfloat16 precision
    tf32=True,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to=None,                # report metrics to tensorboard
    gradient_checkpointing_kwargs = {"use_reentrant": False}, # use reentrant checkpointing
    dataset_text_field="", # need a dummy field for collator
    dataset_kwargs = {"skip_prepare_dataset": True} # important for collator
)
args.remove_unused_columns=False



In [18]:
def collate_fn(examples):
    # Get the texts and images, and apply the chat template
    texts = [processor.apply_chat_template(example, tokenize=False) for example in examples]
    image_inputs = [process_vision_info(example)[0] for example in examples]
 
    # Tokenize the texts and process the images
    batch = processor(text=texts, images=image_inputs, return_tensors="pt", padding=True)
 
    # The labels are the input_ids, and we mask the padding tokens in the loss computation
    labels = batch["input_ids"].clone()
    labels[labels == processor.tokenizer.pad_token_id] = -100  #
    # Ignore the image token index in the loss computation (model specific)
    if isinstance(processor, Qwen2VLProcessor):
        image_tokens = [151652,151653,151655]
    else: 
        image_tokens = [processor.tokenizer.convert_tokens_to_ids(processor.image_token)]
    for image_token_id in image_tokens:
        labels[labels == image_token_id] = -100
    batch["labels"] = labels
 
    return batch

In [19]:
from trl import SFTTrainer
 
trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset= train_prompts.reset_index(drop = True),
    eval_dataset = val_prompts.reset_index(drop = True),
    data_collator=collate_fn,
    dataset_text_field="", # needs dummy value
    peft_config=peft_config,
    tokenizer=processor.tokenizer,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


In [None]:
# start training, the model will be automatically saved to the hub and the output directory
trainer.train("r2gen2-3/checkpoint-7700")
 
# save model 
trainer.save_model(args.output_dir)

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
7750,0.606,0.041295
7800,0.6263,0.055383
7850,0.6064,0.046802
7900,0.6398,0.04763
7950,0.612,0.054043
8000,0.6294,0.052212
8050,0.6018,0.057538
8100,0.6319,0.053237
8150,0.6478,0.060529
8200,0.6263,0.059064




In [21]:
from peft import PeftModel
model_id = "Qwen/Qwen2-VL-7B-Instruct" 
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForVision2Seq.from_pretrained(
    model_id,
    device_map="auto",
    # attn_implementation="flash_attention_2", # not supported for training
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
processor = AutoProcessor.from_pretrained(model_id)
m = PeftModel.from_pretrained(model, "./r2gen2")
m = m.merge_and_unload()

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]



In [22]:
text = processor.apply_chat_template(
    prompts.iloc[35][:-1], tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(train_prompts.iloc[35][:-1])
inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
inputs = inputs.to("cuda")

# Inference: Generation of the output
generated_ids = m.generate(**inputs, max_new_tokens=128,do_sample = True,temperature = 0.5)
generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(output_text)



['{\n  "Sentences": [\n    {\n      "anatomical entity": "heart",\n      "location_descriptor": null,\n      "procedure": [null],\n      "clinical findings": [\n        {\n          "clinical finding": "heart size",\n          "existence": "neg_dx",\n          "observation": "within normal limits"\n        },\n        {\n          "clinical finding": "pulmonary vascularity",\n          "existence": "neg_dx",\n          "observation": "within normal limits"\n        }\n      ]\n    },\n    {\n      "anatomical entity": "lungs",\n      "location_descriptor": null,\n     ']


In [143]:
prompts[0][:-1]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into JSON Labels.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology labels from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR1_1_IM-0001-3001.png'}]}]

In [35]:
train_prompts.iloc[1]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into Text Reports.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology report from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR3612_IM-1785-1001.png'}]},
 {'role': 'assistant',
  'content': [{'type': 'text',
    'text': 'The heart is normal in size. The mediastinum is stable. Calcified right paratracheal lymph nodes are seen. Aorta is atherosclerotic. The lungs are mildly hypoinflated without focal consolidation. There is no pleural effusion.'}]}]

In [30]:
val_prompts.iloc[10]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into Text Reports.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology report from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR2978_IM-1367-4001.png'}]}]

In [9]:
model

Qwen2VLForConditionalGeneration(
  (visual): Qwen2VisionTransformerPretrainedModel(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False)
    )
    (rotary_pos_emb): VisionRotaryEmbedding()
    (blocks): ModuleList(
      (0-31): 32 x Qwen2VLVisionBlock(
        (norm1): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (norm2): LayerNorm((1280,), eps=1e-06, elementwise_affine=True)
        (attn): VisionSdpaAttention(
          (qkv): Linear8bitLt(in_features=1280, out_features=3840, bias=True)
          (proj): Linear8bitLt(in_features=1280, out_features=1280, bias=True)
        )
        (mlp): VisionMlp(
          (fc1): Linear8bitLt(in_features=1280, out_features=5120, bias=True)
          (act): QuickGELUActivation()
          (fc2): Linear8bitLt(in_features=5120, out_features=1280, bias=True)
        )
      )
    )
    (merger): PatchMerger(
      (ln_q): LayerNorm((1280,), eps=1e-06, elementwise_affin

In [23]:
def gen_output(messages):
    text = processor.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
    text=[text],
    images=image_inputs,
    videos=video_inputs,
    padding=True,
    return_tensors="pt",
)
    inputs = inputs.to("cuda")

# Inference: Generation of the output
    generated_ids = m.generate(**inputs, max_new_tokens=1000,do_sample = True,temperature = 0.5)
    generated_ids_trimmed = [
    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
    )
    return(output_text)

In [24]:
glue_metric = load('glue', 'sst2')
def compute_glue(gened,truth):
    tokenizer = processor.tokenizer
    gened = tokenizer(gened).input_ids
    truth = tokenizer(truth).input_ids
    if len(truth)>len(gened):
        gened.extend([-100]*(len(truth)-len(gened)))
    elif len(gened)>len(truth):
        truth.extend([-100]*(len(gened)-len(truth)))
    return glue_metric.compute(predictions=gened, references=truth)

In [74]:
testing_dataset["glue"] = testing_dataset.progress_apply(lambda x:compute_glue(x["model_generated_text"][0],x.loc[1]),axis = 1)

100%|██████████| 98/98 [00:00<00:00, 235.70it/s]


In [11]:
testing_dataset["glue"].apply(lambda x:x["accuracy"]).mean()

0.8302


In [25]:
from tqdm import tqdm
tqdm.pandas()
prompts["gened_Labels"] = prompts.progress_apply(lambda x:gen_output(x[:-1]))

  0%|          | 4/6473 [05:04<136:44:34, 76.10s/it]

KeyboardInterrupt



In [None]:
import pickle
with open("all_prompts_df_labels.pkl","wb") as f:
    pickle.dump(prompts,f)

In [12]:
prompts

0     [{'role': 'system', 'content': [{'type': 'text...
1     [{'role': 'system', 'content': [{'type': 'text...
2     [{'role': 'system', 'content': [{'type': 'text...
3     [{'role': 'system', 'content': [{'type': 'text...
4     [{'role': 'system', 'content': [{'type': 'text...
                            ...                        
93    [{'role': 'system', 'content': [{'type': 'text...
94    [{'role': 'system', 'content': [{'type': 'text...
95    [{'role': 'system', 'content': [{'type': 'text...
96    [{'role': 'system', 'content': [{'type': 'text...
97    [{'role': 'system', 'content': [{'type': 'text...
Length: 98, dtype: object

In [21]:
df = pd.DataFrame([prompts,prompts["gened"]])

In [26]:
df = df.T.dropna()

In [27]:
df

Unnamed: 0,0,1
0,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
1,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
2,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
3,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
4,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
...,...,...
6467,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
6468,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
6469,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...
6470,"[{'role': 'system', 'content': [{'type': 'text...",[The heart is normal in size. The mediastinum ...


In [28]:
df[0][0]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into Text Reports.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology report from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR1_1_IM-0001-3001.png'}]},
 {'role': 'assistant',
  'content': [{'type': 'text',
    'text': 'The cardiac silhouette and mediastinum size are within normal limits. There is no pulmonary edema. There is no focal consolidation. There are no signs of a pleural effusion. There is no evidence of pneumothorax.'}]}]

In [29]:
df[0].apply(lambda x:x[-1]["content"][0]["text"])

0       The cardiac silhouette and mediastinum size ar...
1       The cardiac silhouette and mediastinum size ar...
2       Borderline cardiomegaly. Midline sternotomy ob...
3       Borderline cardiomegaly. Midline sternotomy ob...
4       There are diffuse bilateral interstitial and a...
                              ...                        
6467    The cardiomediastinal silhouette and pulmonary...
6468    The cardiomediastinal silhouette and pulmonary...
6469    The lungs are clear. Heart size is normal. No ...
6470    The lungs are clear. Heart size is normal. No ...
6471    Heart size within normal limits. Small, nodula...
Name: 0, Length: 6472, dtype: object

In [30]:
df[0] = df[0].apply(lambda x:x[-1]["content"][0]["text"])

In [32]:
df["glue"] = df.apply(lambda x: compute_glue(x[0],x[1][0]),axis = 1)

In [41]:
df["glue"] = df["glue"].apply(lambda x: x["accuracy"])

In [62]:
df[df["glue"]>0.28][:98].to_csv("test_dataset.csv")

In [65]:
rouge = load("rouge")

In [72]:
rouge.compute(references = df[0].tolist(),predictions = df[1].apply(lambda x: x[0]).tolist())

{'rouge1': 0.3308709336075233,
 'rouge2': 0.12475152227142038,
 'rougeL': 0.2549634474882989,
 'rougeLsum': 0.25511529196002464}

In [73]:
rouge.compute(references = df[df["glue"]>0.28][:98][0].tolist(),predictions = df[df["glue"]>0.28][:98][1].apply(lambda x: x[0]).tolist())

{'rouge1': 0.8132885984416938,
 'rouge2': 0.7616551949968295,
 'rougeL': 0.8139090287553181,
 'rougeLsum': 0.8144462709334119}

In [75]:
bert = load("bertscore")

In [123]:
res = bert.compute(references = df[df["glue"]>0][:98][0].tolist(),predictions = df[df["glue"]>0.28][:98][1].apply(lambda x: x[0]).tolist(),lang = "en")

In [125]:
pd.Series(res["precision"]).mean()

0.9134664182760277

In [26]:
labels_test = pd.read_csv("test_dataset.csv")

In [28]:
labels_prompts = prompts.iloc[labels_test.iloc[:,0]]

In [29]:
labels_prompts[52]

[{'role': 'system',
  'content': [{'type': 'text',
    'text': 'You are an expert Radiologist.\nYour only task is to convert the medical findings present in the given images into JSON Labels.'}]},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Please generate an appropriate radiology labels from the given image? Please keep in mind to give me all important features you can find including the heart, lungs, ribcage and the spine.'},
   {'type': 'image', 'image': './Images/CXR32_IM-1511-1001.png'}]},
 {'role': 'assistant',
  'content': [{'type': 'text',
    'text': 'The heart is normal in size. The mediastinum is unremarkable. Mild blunting of right costophrenic Angle. The lungs are otherwise grossly clear.'}]}]

In [30]:
def gen_prompt_from_report_prompt(ex):
    messages = [
                {"role":"system","content":[{"type":"text","text":config["prompts"]["sys_prompt_label"]}]},
                {"role": "user","content": [{"type":"text","text":config["prompts"]["user_prompt_label"]},
                                            {"type": "image", "image": f'{ex[1]["content"][1]["image"]}'}]},
                ]
    return messages

In [31]:
labels_prompts = labels_prompts.apply(gen_prompt_from_report_prompt)

In [32]:
labels_prompts

52      [{'role': 'system', 'content': [{'type': 'text...
53      [{'role': 'system', 'content': [{'type': 'text...
62      [{'role': 'system', 'content': [{'type': 'text...
63      [{'role': 'system', 'content': [{'type': 'text...
95      [{'role': 'system', 'content': [{'type': 'text...
                              ...                        
2428    [{'role': 'system', 'content': [{'type': 'text...
2442    [{'role': 'system', 'content': [{'type': 'text...
2443    [{'role': 'system', 'content': [{'type': 'text...
2481    [{'role': 'system', 'content': [{'type': 'text...
2482    [{'role': 'system', 'content': [{'type': 'text...
Length: 98, dtype: object

In [33]:
from tqdm import tqdm
tqdm.pandas()
labels_prompts["gened_Labels"] = labels_prompts.progress_apply(lambda x:gen_output(x))

100%|██████████| 98/98 [2:37:17<00:00, 96.30s/it]  


In [34]:
import pickle
with open("labels_genned.pkl","wb") as f:
    pickle.dump(labels_prompts,f)

In [25]:
labels_prompts

52              [{'role': 'system', 'content': [{'type': 'text...
53              [{'role': 'system', 'content': [{'type': 'text...
62              [{'role': 'system', 'content': [{'type': 'text...
63              [{'role': 'system', 'content': [{'type': 'text...
95              [{'role': 'system', 'content': [{'type': 'text...
                                      ...                        
2442            [{'role': 'system', 'content': [{'type': 'text...
2443            [{'role': 'system', 'content': [{'type': 'text...
2481            [{'role': 'system', 'content': [{'type': 'text...
2482            [{'role': 'system', 'content': [{'type': 'text...
gened_Labels    52      [{\n  "Sentences": [\n    {\n      "an...
Length: 99, dtype: object

In [26]:
df = pd.DataFrame([labels_prompts.iloc[:98],labels_prompts["gened_Labels"]])

In [27]:
df = df.T

In [28]:
df

Unnamed: 0,0,1
52,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
53,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
62,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
63,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
95,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
...,...,...
2428,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
2442,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
2443,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."
2481,"[{'role': 'system', 'content': [{'type': 'text...","[{\n ""Sentences"": [\n {\n ""anatomical..."


In [29]:
df[0] = df[0].apply(lambda x:x[-1]["content"][0]["text"])

In [30]:
df["truth"] = prompts[df.index].apply(lambda x:x[-1]["content"][0]["text"])

In [63]:
words = df[1][52][0].replace("\n","").replace("}","").replace("{","").replace('"','').replace(":","").replace("[","").replace("]","").replace(",","").split(" ")

In [67]:
[word for word in words if word not in ["","Sentences","anatomical","entity","location_descriptor","null","procedure","clinical","findings","finding","existence","pos_dx","neg_dx","unc_dx","observation"]]


['heart',
 'heart',
 'size',
 'within',
 'normal',
 'limits',
 'pulmonary',
 'vascularity',
 'within',
 'normal',
 'limits',
 'lungs']

In [4]:
import joblib
df = joblib.load("labels_genned.pkl")

In [15]:
data = pd.DataFrame([df.iloc[:98],df["gened_Labels"]]).T
data.columns = ["prompts","labels"]

In [21]:
data["truth"] = prompts.iloc[data.index]

In [29]:
data["truth"] = data["truth"].apply(lambda x:x[-1]["content"][0]["text"])

In [77]:
def evaluate_labels(ex):
    words = ex["labels"][0].replace("\n","").replace("}","").replace("{","").replace('"','').replace(":","").replace("[","").replace("]","").replace(",","").split(" ")
    words = [word for word in words if word not in ["","Sentences","anatomical","entity","location_descriptor","null","procedure","clinical","findings","finding","existence","pos_dx","neg_dx","unc_dx","observation"]]
    counter = 0
    for word in words:
        if word in ex["truth"]:
            counter = counter+1
    return counter/len(words)

In [13]:
data.apply(evaluate_labels,axis = 1).mean()

0.7632


In [None]:
data["labels"]