In [8]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import json
import numpy as np
import os

# Load reference data
reference_data = os.path.join(os.getcwd(), "image_suggestion_system","reference_data.json")
with open("reference_data.json", "r") as f:
    reference_data = json.load(f)

# Load model (downloads automatically first time)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode all reference sentences
ref_sentences = [item["sentence"] for item in reference_data]
ref_embeddings = model.encode(ref_sentences)

In [9]:
def suggest_missing_entities(input_sentence, top_k=2):
    # Encode the input sentence
    input_embedding = model.encode([input_sentence])
    
    # Find similarities
    similarities = cosine_similarity(input_embedding, ref_embeddings)[0]
    
    # Get top-K most similar reference sentences
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    
    # Initialize suggestion sets (to avoid duplicates)
    suggestions = {
        "characters": set(),
        "locations": set(),
        "actions": set(),
        "nouns": set()
    }
    
    # Collect suggestions from top matches
    for idx in top_indices:
        match = reference_data[idx]
        for key in suggestions:
            suggestions[key].update(match[key])  # add all items for this key
    
    # Convert sets back to lists
    for key in suggestions:
        suggestions[key] = list(suggestions[key])
    
    return suggestions

In [10]:
def enrich_annotation(annotation):
    sentence = annotation["sentence"]
    suggestions = suggest_missing_entities(sentence)
    
    # For each category, add ONLY what’s missing
    for key in ["characters", "locations", "actions", "nouns"]:
        existing = set(annotation[key])
        new_suggestions = [item for item in suggestions[key] if item not in existing]
        annotation[key].extend(new_suggestions)  # add new items
    
    return annotation

In [11]:
def build_pixabay_query(annotation):
    parts = []
    
    if annotation["characters"]:
        parts.append(" ".join(annotation["characters"]))
    if annotation["actions"]:
        parts.append(" ".join(annotation["actions"]))
    if annotation["locations"] or annotation["nouns"]:
        place_stuff = annotation["locations"] + annotation["nouns"]
        parts.append("in " + " ".join(place_stuff))
    
    return " ".join(parts).strip()

In [12]:
if __name__ == "__main__":
    # Your original data (from your example)
    test_data = [
        {
            "sentence": "Doubt crept in.",
            "characters": [],
            "locations": [],
            "actions": ["creep"],
            "nouns": []
        },
        {
            "sentence": "Rocks blocked the way.",
            "characters": [],
            "locations": [],
            "actions": ["block"],
            "nouns": ["rock", "way"]
        },
        {
            "sentence": "The wind howled.",
            "characters": [],
            "locations": [],
            "actions": ["howl"],
            "nouns": ["wind"]
        }
    ]

    print("🔍 Enriching annotations and generating Pixabay queries...\n")

    for item in test_data:
        enriched = enrich_annotation(item)
        query = build_pixabay_query(enriched)
        print(f"Sentence: {item['sentence']}")
        print(f"→ Characters: {enriched['characters']}")
        print(f"→ Locations: {enriched['locations']}")
        print(f"→ Actions: {enriched['actions']}")
        print(f"→ Nouns: {enriched['nouns']}")
        print(f"→ Pixabay Query: \"{query}\"")
        print("-" * 60)

🔍 Enriching annotations and generating Pixabay queries...

Sentence: Doubt crept in.
→ Characters: []
→ Locations: []
→ Actions: ['creep']
→ Nouns: ['path']
→ Pixabay Query: "creep in path"
------------------------------------------------------------
Sentence: Rocks blocked the way.
→ Characters: []
→ Locations: []
→ Actions: ['block']
→ Nouns: ['rock', 'way', 'path']
→ Pixabay Query: "block in rock way path"
------------------------------------------------------------
Sentence: The wind howled.
→ Characters: []
→ Locations: []
→ Actions: ['howl', 'grow']
→ Nouns: ['wind', 'trail']
→ Pixabay Query: "howl grow in wind trail"
------------------------------------------------------------
