#### imports

In [1]:
import torch
import os
import csv
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from PIL import Image
from transformers import AutoModel, AutoTokenizer
from torchvision import transforms
from tqdm import tqdm
from IPython.display import display

#### model

In [2]:
torch.manual_seed(100)

<torch._C.Generator at 0x7fc4680558d0>

In [3]:
cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)

CUDA available: True


In [4]:
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True, torch_dtype=torch.bfloat16)
model = model.to(device='cuda', dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### helpers

In [32]:
def save_to_csv(context, question, image_str, output, actual, filename='output.csv'):
    # Check if the file already exists
    file_exists = os.path.isfile(filename)
    
    # Open the file in append mode, create if it doesn't exist
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        # If the file does not exist, write the headers
        if not file_exists:
            writer.writerow(['Context', 'Question', 'Image_Path', 'Output', 'Actual'])
            file_exists = True
        # Write the data row
        writer.writerow([context, question, image_str, output, actual])

In [42]:
def load_ground_truths(file_path):
    gt_path = os.path.join(file_path, 'test_im_ec_quantification.csv')
    df = pd.read_csv(gt_path)
    return dict(zip(df['image name'], df['# of ec']))

In [43]:
def eval_img(file_path, ground_truths, img_path, img_context, question, temperature, filename):

    tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
    model.eval()
    full_image = os.path.join(file_path, img_path)
    img = Image.open(full_image).convert('RGB')

    img_tif = img_path.split(".")[0] + ".tif"
    ground_truth = ground_truths.get(img_tif, 'Unknown')
    
    msgs = [{'role': 'user', 'content': question}]

    
    res, context, _ = model.chat(
        image=img,
        msgs=msgs,
        context=img_context,
        tokenizer=tokenizer,
        sampling=True,
        temperature=temperature
    )
    save_to_csv(img_context, question, img_path, res, ground_truth, filename + ".csv")
        
    return res, img

#### workspace

In [None]:
file_path = "../test_im_data"
file_path_l = os.path.join(file_path, "labels")
temperature = 0.7
context = 'You are a pathologist examing cell imagery.'
question = 'Identify the count of ecDNA. Return a numerical answer only'
filename = 'test'

ground_truth = load_ground_truths(file_path)

files = [f for f in os.listdir(file_path_l) if f.endswith('.png')]


results = []
for img_str in tqdm(files, desc="Processing Images"):
    res, img = eval_img(file_path_l, ground_truth, img_str, context, question, temperature, filename)
    results.append((img_str, res, img))

Processing Images:  39%|███▉      | 465/1189 [07:43<13:37,  1.13s/it]

In [9]:
# torch.manual_seed(100)

# tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)

# image = Image.open('../sampled/11.png').convert('RGB')

# # First round chat 
# question = "What is the landform in the picture?"
# msgs = [{'role': 'user', 'content': [image, question]}]

# answer = model.chat(
#     msgs=msgs,
#     tokenizer=tokenizer
# )
# print(answer)

# # Second round chat, pass history context of multi-turn conversation
# msgs.append({"role": "assistant", "content": [answer]})
# msgs.append({"role": "user", "content": ["What should I pay attention to when traveling here?"]})

# answer = model.chat(
#     msgs=msgs,
#     tokenizer=tokenizer
# )
# print(answer)

### todo
* n-shot learning with 0, 1, and say 10
* graph results of different learning types
* understand minicpm llm aspect
* different context analysis
* descriptions of ecdna