In [None]:
input_file = "/kaggle/input/txtfile/requirements.txt"  # Replace with your file name
output_file = "/kaggle/working/install_requirements.txt"

with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    for line in infile:
        line = line.strip()
        if line:  # Avoid processing empty lines
            outfile.write(f"pip install {line}\n")

print(f"Modified file saved as {output_file}")


In [None]:
!pip install datasets

In [None]:
!pip install einops

In [None]:
!pip install openai

In [None]:
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
import torch
from torchvision import transforms
from PIL import Image

def load_image(image, input_size=448):
    if isinstance(image, Image.Image):
        # Convert RGBA images to RGB (discarding the alpha channel)
        if image.mode == 'RGBA':
            image = image.convert('RGB')

        # Define the transformation (resize, convert to tensor, and normalize)
        transform = transforms.Compose([
            transforms.Resize((input_size, input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        # Apply the transformations to the image
        tensor = transform(image).unsqueeze(0)  # Add batch dimension

        # Clamp the values of the tensor to be within [0, 1]
        tensor = tensor.clamp(0, 1)

        # Move the tensor to the GPU (if needed) and convert to float32
        return tensor.to(torch.float32).cuda()  # Use float32 here
    else:
        raise ValueError("Input must be a PIL.Image object.")

In [None]:
import google.generativeai as genai
from PIL import Image
import base64
from openai import OpenAI
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import torchvision.transforms as T
from transformers import AutoModel, AutoTokenizer
from transformers import AutoProcessor, LlavaForConditionalGeneration



# Configure the API key
GOOGLE_API_KEY = "KEEp API KEY"
genai.configure(api_key=GOOGLE_API_KEY)

def call_gemini_1_5_pro(text_query, image, temperature=0.4):
    """Call Gemini 1.5 Pro Vision model with text and image input."""
    model = genai.GenerativeModel('gemini-1.5-pro')

    # Open the image

    try:
        # Generate content
        response = model.generate_content(
            [text_query, image],
            generation_config=genai.types.GenerationConfig(
                candidate_count=1,
                max_output_tokens=1024,
                temperature=temperature
            )
        )
        # Return the generated response
        return response.candidates[0].content.parts[0].text
    except Exception as e:
        print(f"Error during Gemini 1.5 Pro Vision call: {e}")
        return ""

def call_gemini_1_5_flash(text_query, image, temperature=0.4):
    """Call Gemini 1.5 Flash Vision model with text and image input."""
    model = genai.GenerativeModel('gemini-1.5-flash')

    # Open the image


    try:
        # Generate content
        response = model.generate_content(
            [text_query, image],
            generation_config=genai.types.GenerationConfig(
                candidate_count=1,
                max_output_tokens=1024,
                temperature=temperature
            )
        )
        # Return the generated response
        return response.candidates[0].content.parts[0].text
    except Exception as e:
        print(f"Error during Gemini 1.5 Flash Vision call: {e}")
        return ""


def call_gpt_vision(text_query, image_path,temperature=0.4):
    
    # Initialize the client with API key
    openai.api_key = os.environ.get("OPENAI_API_KEY")
    
    # Get the base64-encoded image
    base64_image = encode_image(image_path)
    
    # Construct the message with both the text query and image
    messages = [
        {
            "role": "user",
            "content": f"{text_query}\nBelow is the image I want you to analyze:",
        },
        {
            "role": "user",
            "content": f"![image](data:image/jpeg;base64,{base64_image})",
        },
    ]
    
    # Send the chat completion request
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        max_tokens=300
    )
    
    return response['choices'][0]['message']['content']

In [None]:
def call_internvl2_vision(text_query, image):
    # Model and Tokenizer initialization
    path = 'OpenGVLab/InternVL2_5-1B'
    model = AutoModel.from_pretrained(
        path,
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True,
        use_flash_attn=True,
        trust_remote_code=True
    ).eval().cuda()

    tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)

    # Load and preprocess the image
    pixel_values = load_image(image, input_size=448).to(torch.bfloat16).cuda()

    # Set generation configuration
    generation_config = dict(max_new_tokens=512, do_sample=True)

    # Create question based on text_query
    question = f'<image>\n{text_query}\n'

    # Perform inference
    response = model.chat(tokenizer, pixel_values, question, generation_config)
    
    # Return the model's response
    return response

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import json
from tqdm import tqdm

# Function definitions remain the same
def call_vision(text_query, image_path, model):
    if model == "gemini_1":
        response = call_gemini_1_5_flash(text_query, image_path)
    elif model == "gemini_2":
        response = call_gemini_1_5_pro(text_query, image_path)
    elif model == "gpt4":
        respone = call_gpt_vision(text_query, image_path)
    elif model == "intern_vl2":
        response =  call_internvl2_vision(text_query, image_path)
    else:
        raise NotImplementedError
    return response

In [None]:
def prepare_vision_prompt(dataset):

    prompts = []

    for row in dataset:
        
        image, label = row["image"], row["True_label"]

        
        prompt = f"""" The given image is a type of cell image captured by microscopy technique. can you identify what kind of the following cell does the given image more representing for?
       1)An adherent cell
       2)A cell with debris 
       3)A cell on the well edge
       4)Image of a cell
        please respond with the option with value, no need of explanation.

        """


    
        prompts.append({
            "image": image,
            "true_label": label,
            "query": prompt
        })

    return prompts

In [None]:
# Model and options setup (replace argparse with direct variables)
model = "intern_vl2"  # Specify the model to use

result_dir = "micronscopic2_benchmark"

if not os.path.exists(result_dir):
    os.makedirs(result_dir)

In [None]:
# Load the dataset
from datasets import load_dataset

dataset = load_dataset("mario-dg/dreambooth-cell-images", split ="train")
 
# change the ids of dataset to have only what comes after the last '/'
dataset = dataset.map(
    lambda x: {
        "image": x["image"],  # Retain the image data as is
        "True_label": x["prompt"],  # Extract the label  # Extract well edge information
    }
)

specific_indices = [164, 258, 610, 652,709,1064,1109,1514,1536,1581]  # Indices of rows to select
dataset = dataset.select(specific_indices)

# Prepare prompts
prompts = prepare_vision_prompt(dataset)

# Initialize results DataFrame
results = []

for prompt_data in tqdm(prompts, desc="Evaluating Prompts"):
    # Extract prompt details
    image = prompt_data["image"]
    true_label = prompt_data["true_label"]
    prompt= prompt_data["query"]

    # Call the vision model
    vision_response = (
        call_vision(prompt, image, model)
        if model in ['gemini_1', 'gemini_2', 'gpt4','intern_vl2']
        else ""
    )
    
    print(f"True Label: {true_label}\nVision Response: {vision_response}")
 
   # Append results
    results.append({
        "prompt": prompt,
        "response": {"vision": vision_response},
        "true_label": true_label
    })

# Convert results to a DataFrame
df_results = pd.DataFrame(results)

# Save results to a JSON file
result_path = f"{result_dir}/{model}_Datset2ObjClass_results.json"
df_results.to_json(result_path, orient="records", indent=4)