In [None]:
import os
import io
import time
import json
import ollama
import pandas as pd
from PIL import Image
from pydantic import BaseModel, Field

# --- Pydantic Schema and Prompt (from original code) ---
class ImageContent(BaseModel):
    image_description: str = Field(description="A general summary of the image content.")
    visible_text: list[str] = Field(description="A list of all words or sentences that are visually present as text within the image.")

content_prompt = """
Using the image, provide a detailed description. Confine the response to one sentence with less than 25 words, highlighting the main subject's appearance and environment. Example: Input Image: [image of a brown dog] Output: A small, shaggy brown dog with a red collar is sitting on a green lawn with a blue ball nearby.
"""
# --------------------------------------------------------

def resize_image_if_needed(image_path, max_width=600, max_height=400):
    """
    Resizes an image proportionally if it's larger than max_width or max_height, 
    and returns the image data as bytes ready for the Ollama API.
    Forces JPEG compression for speed and payload size reduction.
    """
    with Image.open(image_path) as img:
        width, height = img.size
        
        if width > max_width or height > max_height:
            # Calculate new dimensions while maintaining aspect ratio
            img.thumbnail((max_width, max_height), Image.Resampling.LANCZOS)
            print(f"Resized image from {width}x{height} to {img.size[0]}x{img.size[1]}")
            
        # Convert image to bytes in memory as JPEG for efficiency
        img_byte_arr = io.BytesIO()
        # Save as JPEG with moderate quality (85 is good balance of size/quality)
        img.save(img_byte_arr, format='JPEG', quality=85) 
        return img_byte_arr.getvalue()

def process_image_with_llava(image_path):
    if not os.path.exists(image_path):
        print(f"File not found: {image_path}. Skipping.")
        return None, None
    
    try:
        # Get the resized image bytes
        image_bytes = resize_image_if_needed(image_path)

        response = ollama.chat(
            model='llava',
            messages=[{
                'role': 'user',
                'content': content_prompt,
                'images': [image_bytes] 
            }],
            format=ImageContent.model_json_schema(), 
            options={
                "temperature": 0.0,
                # Explicitly set CPU options for best performance on CPU-only setup
                "num_threads": os.cpu_count(), # Use all available CPU cores
                # "num_ctx": 2048 # Adjust if you have memory issues, default is often fine
            }
        )

        json_output_str = response['message']['content']
        details = ImageContent.model_validate_json(json_output_str)
        
        return details.image_description, details.visible_text

    except ollama.ResponseError as e:
        print(f"Ollama API error for {image_path}: {e}")
        # If Ollama server is overloaded, waiting and retrying might help
        time.sleep(10) 
        return None, None
    except Exception as e:
        print(f"An unexpected error occurred processing {image_path}: {e}")
        return None, None

# --- Main script with Caching Logic (Remains mostly the same but added imports) ---
if __name__ == '__main__':
    # Make sure Ollama server is running (e.g., 'ollama run llava' in your terminal)

    df_raw = pd.read_csv("../data/JailBreakV_28K/JailBreakV_28k/JailBreakV_28K.csv")
    
    temp_cache_file = 'temp_image_llm_results.csv'
    processed_results = []

    print("------------------ Starting image processing with caching...")
    print(f"Detected {os.cpu_count()} CPU cores available for Ollama.")

    # Load existing cache if it exists to resume processing
    if os.path.exists(temp_cache_file):
        cached_df = pd.read_csv(temp_cache_file)
        # Ensure column type consistency before set operations
        cached_paths_list = cached_df['img_path'].astype(str).tolist() 
        processed_results = cached_df.to_dict(orient='records')
        print(f"Loaded {len(processed_results)} cached results.")
    
        # Determine which images still need processing
        cached_paths = set(cached_paths_list)
        images_to_process = df_raw[~df_raw['image_path'].isin(cached_paths)]['image_path'].tolist()
    else:
        images_to_process = df_raw['image_path'].tolist()

    # Process remaining images
    for i, image_path in enumerate(images_to_process):
        print(f"Processing {i+1}/{len(images_to_process)}: {image_path}...")
        # Assuming the relative path is correct based on your previous code
        full_image_path = "../data/JailBreakV_28K/JailBreakV_28k/" + image_path
        description, text_list = process_image_with_llava(full_image_path)
        
        if description is not None:
            result_dict = {
                'img_path': image_path,
                'image_description': description,
                'visible_text': json.dumps(text_list) 
            }
            processed_results.append(result_dict)
            # Save intermittently
            pd.DataFrame(processed_results).to_csv(temp_cache_file, index=False)
            print(f"Successfully processed and cached: {image_path}")

    df_results = pd.DataFrame(processed_results)
    df_final = pd.merge(df_raw, df_results, on='img_path', how='left')
    df_final['visible_text'] = df_final['visible_text'].apply(lambda x: json.loads(x) if pd.notna(x) else [])
    print("\n--- Final Merged DataFrame ---")
    print(df_final.head())
