In [None]:
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image

###############################################################################
# PART 1: MODEL INITIALIZATION
###############################################################################
def initialize_model(model_path: str):
    """
    Initializes the processor and model from the local directory.
    Uses `device_map="auto"` to automatically distribute layers across GPUs if available.
    """
    processor = AutoProcessor.from_pretrained(model_path)
    model = AutoModelForImageTextToText.from_pretrained(
        model_path,
        torch_dtype="auto",
        device_map="auto"
    )
    return processor, model
###############################################################################
# PART 2: DATA PREPARATION (images, text, prompt)
###############################################################################
def prepare_data(buyer_image_path: str, seller_image_path: str):
    """
    Loads and resizes the images, defines buyer/seller descriptions, 
    constructs chat messages, and prepares the model inputs.
    """
    try:
        # Load & resize images
        buyer_image = Image.open(buyer_image_path).convert("RGB").resize((524, 524))
        seller_image = Image.open(seller_image_path).convert("RGB").resize((524, 524))
    except Exception as e:
        raise RuntimeError(f"Error loading images: {e}")

    # Example buyer/seller descriptions
    buyer_description = (
          """I was excited to open the package as soon as it arrived. The build feels solid yet not overly heavy, which is great for my small nightstand. I especially like how the display is easy on the eyes—even in the middle of the night, it doesn’t light up the entire room. My favorite part is how seamlessly it charges through my wallet case. It’s just so hassle-free! I’m actually thinking about grabbing another one for the office.

"""
    )
    
    seller_description = (
""" seller description

"""
    )
    
    prompt = (
        "Analyze the alignment between the seller's promises and the buyer's experience by examining the seller's description, the buyer's review, and any accompanying images provided by both. Determine if the visual appearance in the seller’s image matches the product in the buyer’s image and whether the seller’s description aligns with the buyer’s feedback. Identify any irrelevant comments in the buyer's review that praise the product based on personal preferences rather than promised features. Additionally, evaluate if there are any discrepancies between what the seller promised and what the buyer experienced, and assess whether the buyer’s review reflects satisfaction, dissatisfaction, or a personal opinion about the product. Provide a clear conclusion based on your findings."
    )
    
    # Construct messages
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": buyer_image},
                {"type": "text", "text": buyer_description},
                {"type": "image", "image": seller_image},
                {"type": "text", "text": seller_description},
                {"type": "text", "text": prompt},
            ],
        }
    ]
    
    return buyer_image, seller_image, messages

###############################################################################
# PART 3: GENERATE AND DECODE MODEL OUTPUT
###############################################################################
def generate_analysis(processor, model, buyer_image, seller_image, messages):
    """
    Uses the loaded processor and model to transform the messages into a suitable format,
    then generates and decodes the final output.
    """
    try:
        text = processor.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        if text is None:
            raise ValueError("Generated text from apply_chat_template is None.")

        inputs = processor(
            text=[text],
            images=[buyer_image, seller_image],
            padding=True,
            return_tensors="pt",
        ).to("cuda" if torch.cuda.is_available() else "cpu")

        generated_ids = model.generate(
            **inputs,
            max_new_tokens=550,
            temperature=0.7,
            repetition_penalty=1.2,
            top_k=50,
            top_p=0.9
        )

        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]

        output_text = processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False
        )

        return output_text[0]
    except Exception as e:
        raise RuntimeError(f"Error during processing or inference: {e}")

###############################################################################
# MAIN EXECUTION EXAMPLE
###############################################################################
if __name__ == "__main__":
    model_path = "qwen_model"

    processor, model = initialize_model(model_path)

    buyer_image_path = "image/path.jpg"
    seller_image_path = "image/path.jpg"
    buyer_image, seller_image, messages = prepare_data(buyer_image_path, seller_image_path)

    analysis_text = generate_analysis(processor, model, buyer_image, seller_image, messages)
    print("Generated Structured Analysis:\n", analysis_text)

