In [None]:
import os
from langchain_community.llms import LlamaCpp
from langchain_core.messages import HumanMessage
from langchain.chains import LLMChain
from langchain_community.chat_models import ChatLlamaAPI
from PIL import Image
import json
from datetime import datetime
from tqdm import tqdm
from transformers import LlavaForConditionalGeneration, LlavaProcessor
import torch

class ImageCaptioner:
    def __init__(self, model_name="llava-hf/llava-1.5-7b-hf", device="cuda" if torch.cuda.is_available() else "cpu"):
        self.model = LlavaForConditionalGeneration.from_pretrained(model_name).to(device)
        self.processor = LlavaProcessor.from_pretrained(model_name)
        self.device = device
        self.results = {}

    def caption_image(self, image_path, prompt="Describe this image in detail."):
        try:
            image = Image.open(image_path)
            inputs = self.processor(
                images=image, 
                text=prompt, 
                return_tensors="pt"
            ).to(self.device)
            
            outputs = self.model.generate(
                **inputs,
                max_length=200,
                num_beams=4,
                temperature=0.8
            )
            
            caption = self.processor.decode(outputs[0], skip_special_tokens=True)
            return caption
        except Exception as e:
            return f"Error processing image: {str(e)}"

    def process_folder(self, folder_path, output_format="both"):
        """
        Process all images in a folder
        output_format: "txt", "json", or "both"
        """
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Prepare output files
        txt_file = f"captions_{timestamp}.txt"
        json_file = f"captions_{timestamp}.json"
        
        # Get list of image files
        image_files = [f for f in os.listdir(folder_path) 
                      if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
        
        # Process images with progress bar
        for filename in tqdm(image_files, desc="Processing images"):
            image_path = os.path.join(folder_path, filename)
            caption = self.caption_image(image_path)
            self.results[filename] = caption
            
            # Write to txt file if requested
            if output_format in ["txt", "both"]:
                with open(txt_file, 'a', encoding='utf-8') as f:
                    f.write(f"Image: {filename}\nCaption: {caption}\n\n")
            
            # Write to JSON if requested
            if output_format in ["json", "both"]:
                with open(json_file, 'w', encoding='utf-8') as f:
                    json.dump(self.results, f, indent=4)
        
        return self.results

# Usage example
if __name__ == "__main__":
    captioner = ImageCaptioner()
    folder_path = "path/to/your/images"
    results = captioner.process_folder(folder_path)
    
    # Print some statistics
    print(f"\nProcessed {len(results)} images")
    print(f"Results saved to captions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt/json")