#### imports

In [1]:
import torch
import os
import csv
import zipfile

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from PIL import Image
from transformers import AutoModel, AutoTokenizer
from torchvision import transforms
from tqdm import tqdm
from IPython.display import display

#### model

In [2]:
torch.manual_seed(100)

<torch._C.Generator at 0x7ff1f23098f0>

In [3]:
cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)

CUDA available: True


In [4]:
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True, torch_dtype=torch.bfloat16)
model = model.to(device='cuda', dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### helpers

In [5]:
def save_to_csv(context, question, temperature, image_str, output, actual, filename='output.csv'):
    # Check if the file already exists
    file_exists = os.path.isfile(filename)
    
    # Open the file in append mode, create if it doesn't exist
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        # If the file does not exist, write the headers
        if not file_exists:
            writer.writerow(['Context', 'Question', 'Temperature', 'Image_Path', 'Output', 'Actual'])
            file_exists = True
        # Write the data row
        writer.writerow([context, question, temperature, image_str, output, actual])

In [6]:
def load_ground_truths(file_path):
    gt_path = os.path.join(file_path, 'test_im_ec_quantification.csv')
    df = pd.read_csv(gt_path)
    return dict(zip(df['image name'], df['# of ec']))

#### example prompt strategies

Chain-of-Thought Prompting (CoT):
* Give example question and answer before real question and answer

In [None]:
img_context = "ecDNA is usually smaller than chromosomes or nuclei"

question1 = "Identify the count of ecDNA. Return a numerical answer only"
img1 = "2244.png"
answer1 = "0"

question2 = "Identify the count of ecDNA. Return a numerical answer only"
img2 = "949.png"
answer2 = "2"

question = "Identify the count of ecDNA. Return a numerical answer only"
img = "1730.png"

tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
model.eval()
full_image = os.path.join(file_path, img_path)
img = Image.open(full_image).convert('RGB')

img_tif = img_path.split(".")[0] + ".tif"
ground_truth = ground_truths.get(img_tif, 'Unknown')

msgs = [{'role': 'user', 'content': question}]


res, context, _ = model.chat(
    image=img,
    msgs=msgs,
    context=img_context,
    tokenizer=tokenizer,
    sampling=True,
    temperature=temperature
)
save_to_csv(img_context, question, temperature, img_path, res, ground_truth, filename + ".csv")
    
print(res)

Self-Consistency Prompting:
* Do multiple trials and aggregate results using a transform

In [None]:
# Just do the code multiple times

Zero-Shot and Few-Shot Prompting:
* Zero-shot prompting uses no prior examples, relying solely on the model's pre-existing knowledge, while few-shot prompting provides a small number of examples to guide the model's response.

Prompt Chaining:
* Use a series of prompts that build on each other to guide the model through a multi-step process or complex reasoning sequence.

Meta-Prompting:
* Prompts that include instructions or goals about how to generate subsequent prompts.

#### THING TO CHANGE

In [7]:
def eval_img(file_path, ground_truths, img_path, img_context, question, temperature, filename):

    # just make the code here have your model take in:
    # image, context, question, and temp 
    # return the result as res
    
    tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
    model.eval()
    full_image = os.path.join(file_path, img_path)
    img = Image.open(full_image).convert('RGB')

    img_tif = img_path.split(".")[0] + ".tif"
    ground_truth = ground_truths.get(img_tif, 'Unknown')
    
    msgs = [{'role': 'user', 'content': question}]

    
    res, context, _ = model.chat(
        image=img,
        msgs=msgs,
        context=img_context,
        tokenizer=tokenizer,
        sampling=True,
        temperature=temperature
    )
    save_to_csv(img_context, question, temperature, img_path, res, ground_truth, filename + ".csv")
        
    return res

#### workspace

In [None]:
# vars to change, should be self explanatory, file_path is just test vs train
# filename is where to save stuff

context = 'ecDNA is usually smaller than chromosomes or nuclei'

for i in range(5):
    file_path = "../test_im_data"
    temperature = 0.7
    question = 'Identify the count of ecDNA. Return a numerical answer only'
    filename = 'test'
    
    file_path_l = os.path.join(file_path, "labels")
    
    ground_truth = load_ground_truths(file_path)
    
    files = [f for f in os.listdir(file_path_l) if f.endswith('.png')]
    
    
    results = []
    for img_str in tqdm(files, desc="Processing Images"):
        res = eval_img(file_path_l, ground_truth, img_str, context, question, temperature, filename)
        results.append((img_str, res))

Processing Images:   0%|          | 0/1189 [00:00<?, ?it/s]The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
Processing Images: 100%|██████████| 1189/1189 [20:36<00:00,  1.04s/it]
Processing Images: 100%|██████████| 1189/1189 [17:50<00:00,  1.11it/s]
Processing Images:  15%|█▍        | 177/1189 [02:57<12:15,  1.38it/s] 

### todo
* n-shot learning with 0, 1, and say 10
* graph results of different learning types
* understand minicpm llm aspect
* different context analysis
* descriptions of ecdna