In [25]:
import re
from PIL import Image
from transformers import (
    BlipProcessor,
    BlipForConditionalGeneration,
    pipeline
)


In [26]:
# VISION MODEL (IMAGE â†’ CAPTION)

blip_processor = BlipProcessor.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)
blip_model = BlipForConditionalGeneration.from_pretrained(
    "Salesforce/blip-image-captioning-base"
)


# NLP MODEL (ZERO-SHOT FALLBACK)
zero_shot_classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli"
)


Device set to use cpu


In [27]:
def generate_caption(image_path: str) -> str:
    """
    Generate a raw image caption.
    Internal use only.
    """
    image = Image.open(image_path).convert("RGB")
    inputs = blip_processor(image, return_tensors="pt")
    output = blip_model.generate(**inputs)
    return blip_processor.decode(
        output[0],
        skip_special_tokens=True
    )


In [28]:
def extract_image_insights(caption: str) -> dict:
    caption = caption.lower()

    product_keywords = {
        "book": ["book", "textbook", "notebook", "notes"],
        "calculator": ["calculator"],
        "laptop": ["laptop"],
        "mobile": ["phone", "mobile"],
        "idol": ["idol", "statue", "ganpati", "ganesha"]
    }

    product_type = None
    category_hint = None

    for product, words in product_keywords.items():
        if any(w in caption for w in words):
            product_type = product
            category_hint = product
            break

    # Zero-shot fallback
    if product_type is None:
        labels = [
            "book", "notes", "electronics",
            "calculator", "stationery", "decor item", "other"
        ]
        result = zero_shot_classifier(caption, labels)
        product_type = result["labels"][0]
        category_hint = product_type

    # Condition inference
    if re.search(r"\b(new|unused)\b", caption):
        condition = "new"
    elif re.search(r"\b(used|old)\b", caption):
        condition = "used but well maintained"
    else:
        condition = "intact and usable"

    return {
        "product_type": product_type,
        "category_hint": category_hint,
        "condition": condition
    }


In [12]:
def analyze_user_description(user_text: str | None) -> dict:
    if not user_text or not user_text.strip():
        return {
            "quality": "none",
            "original_text": ""
        }

    words = user_text.strip().split()

    if len(words) < 4:
        quality = "weak"
    else:
        quality = "usable"

    return {
        "quality": quality,
        "original_text": user_text.strip()
    }


In [29]:
def generate_selling_description(insights: dict, user_analysis: dict) -> str:
    """
    Generate a buyer-oriented, sell-worthy product description
    using image insights and optional user input.
    """

    product_type = insights.get("product_type", "item")
    condition = insights.get("condition", "good condition")

    user_quality = user_analysis.get("quality")
    user_text = user_analysis.get("original_text", "").strip()

    # like a template of descrption
    base_intro = f"This {product_type} is in {condition}."

    usage_map = {
        "book": "It is suitable for regular academic study and exam preparation.",
        "calculator": "It is suitable for engineering and science coursework.",
        "laptop": "It can be used for academic work, coding, and daily tasks.",
        "mobile": "It is suitable for everyday communication and basic usage.",
        "idol": "It is suitable for home decor or personal prayer space.",
        "electronics": "It is suitable for regular daily use.",
        "stationery": "It is suitable for everyday academic needs."
    }

    usage_line = usage_map.get(
        product_type,
        "It is suitable for regular use."
    )

    value_line = "A practical and affordable option for college students."

    # no input
    if user_quality == "none":
        final_description = (
            f"{base_intro} "
            f"{usage_line} "
            f"{value_line}"
        )

    # some part is useful
    elif user_quality == "weak":
        final_description = (
            f"{base_intro} "
            f"{usage_line} "
            f"{value_line}"
        )

    # helpful user description
    else:
        refined_user_text = user_text.rstrip(".")
        final_description = (
            f"{refined_user_text}. "
            f"{usage_line} "
            f"{value_line}"
        )

    return final_description


In [30]:
def generate_selling_description(
    insights: dict,
    user_analysis: dict
) -> str:

    product = insights["product_type"]
    condition = insights["condition"]

    usage_map = {
        "book": "suitable for regular academic study and exam preparation",
        "calculator": "useful for engineering and science coursework",
        "idol": "suitable for home decor or personal prayer space",
        "electronics": "suitable for regular daily use"
    }

    usage = usage_map.get(
        product,
        "suitable for regular use"
    )

    if user_analysis["quality"] == "usable":
        base = user_analysis["original_text"].rstrip(".")
    else:
        base = f"This {product} is in {condition}"

    return (
        f"{base}. It is {usage}. "
        "A practical and affordable option for college students."
    )


In [31]:
def describe_product_from_image(
    image_path: str,
    user_description: str | None = None
) -> str:

    caption = generate_caption(image_path)
    insights = extract_image_insights(caption)
    user_analysis = analyze_user_description(user_description)

    return generate_selling_description(
        insights,
        user_analysis
    )


In [34]:
desc = describe_product_from_image(
    "/content/ganpati.jpeg","idol of god ganapti"
)

print(desc)


idol of god ganapti. It is suitable for home decor or personal prayer space. A practical and affordable option for college students.
