In [1]:
import os
import numpy as np
from vision_agent.tools import *
from typing import *
from pillow_heif import register_heif_opener
register_heif_opener()
import vision_agent as va
from vision_agent.tools import register_tool
from vision_agent.tools import load_image, owlv2_object_detection, florence2_ocr

ModuleNotFoundError: No module named 'vision_agent'

In [None]:
def analyze_shelf_planogram(image_path):
    image = load_image(image_path)
    
    # Detect products
    products = owlv2_object_detection("cleaning products, spray bottles, detergent bottles", image)
    
    # Read text on labels
    text_data = florence2_ocr(image)
    
    # Initialize counters and data structures
    product_counts = {
        "cleaning_products": 0,
        "spray_bottles": 0,
        "detergent_bottles": 0
    }
    shelf_data = {
        "top_shelf": [],
        "middle_shelf": [],
        "bottom_shelf": []
    }
    
    # Analyze product placement
    height, width = image.shape[:2]
    for product in products:
        product_type = product['label']
        product_counts[product_type.replace(" ", "_")] += 1
        
        # Determine shelf based on y-coordinate
        y_center = (product['bbox'][1] + product['bbox'][3]) / 2
        if y_center < 0.33:
            shelf_data["top_shelf"].append(product)
        elif y_center < 0.66:
            shelf_data["middle_shelf"].append(product)
        else:
            shelf_data["bottom_shelf"].append(product)
    
    # Analyze text data
    for text in text_data:
        text_center = ((text['bbox'][0] + text['bbox'][2]) / 2, (text['bbox'][1] + text['bbox'][3]) / 2)
        for shelf in shelf_data.values():
            for product in shelf:
                product_center = ((product['bbox'][0] + product['bbox'][2]) / 2, (product['bbox'][1] + product['bbox'][3]) / 2)
                if abs(text_center[0] - product_center[0]) * width < 50 and abs(text_center[1] - product_center[1]) * height < 50:
                    if 'text' not in product:
                        product['text'] = []
                    product['text'].append(text['label'])
    
    # Generate summary
    summary = create_summary(product_counts, shelf_data)
    
    return summary

def create_summary(product_counts, shelf_data):
    summary = f"Total products detected: {sum(product_counts.values())}\n"
    summary += f"Cleaning products: {product_counts['cleaning_products']}\n"
    summary += f"Spray bottles: {product_counts['spray_bottles']}\n"
    summary += f"Detergent bottles: {product_counts['detergent_bottles']}\n\n"
    
    for shelf, products in shelf_data.items():
        summary += f"{shelf.replace('_', ' ').title()}:\n"
        for product in products:
            summary += f"  - {product['label'].title()}"
            if 'text' in product:
                summary += f" (Label: {' '.join(product['text'][:3])}...)"
            summary += "\n"
        summary += "\n"
    
    return summary
