In [1]:
import gradio as gr
from openai import OpenAI
import os
import numpy as np
import chromadb
import json
from chromadb import Client as ChromaClient
from chromadb.config import Settings
from chromadb.utils import embedding_functions
from dotenv import load_dotenv

In [2]:
load_dotenv(override=True)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
EMBED_MODEL = "text-embedding-3-small"   # or whichever embedding model you prefer
GEN_MODEL = "gpt-4o-mini"                # LLM for final generation
CHUNK_SIZE = 800                         # approx characters per chunk (adjust if desired)
TOP_K = 6                                # number of retrieved chunks to use
CHROMA_DB_DIR = "rag_chroma_db"

In [4]:
# ---------- STEP 1: Initialize Chroma Client ----------
chroma_client = chromadb.PersistentClient(path=CHROMA_DB_DIR)

# Custom embedding function using OpenAI
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.getenv("OPENAI_API_KEY"),
    model_name=EMBED_MODEL
)

In [5]:
files_map = {
    "dialog_template": "aem_knowledge_base/dialog_template.txt",
    "fields_catalog": "aem_knowledge_base/fields_catalog.txt",
    "sling_examples": "aem_knowledge_base/sling_mappings.txt",
    "htl_snippets": "aem_knowledge_base/htl_snippets.txt",
    "js_validation": "aem_knowledge_base/multifield_js_validation.txt",
}


In [6]:
# ---------- STEP 2: Build or Load Vector Store ----------
def build_or_load_chroma():
    print("üì¶ Initializing or loading Chroma collection...")
    collection = chroma_client.get_or_create_collection(
        name="aem_rag_store",
        embedding_function=openai_ef
    )

    if collection.count() > 0:
        print("‚úÖ Existing Chroma collection loaded successfully.")
        return collection

    print("üìö Building new Chroma collection from local files...")

    for name, path in files_map.items():
        if not os.path.exists(path):
            print(f"‚ö†Ô∏è File not found: {path}")
            continue

        with open(path, "r", encoding="utf-8") as f:
            text = f.read()

        # Split into small chunks for better retrieval
        chunk_size = 800
        overlap = 100
        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - overlap)]

        for i, chunk in enumerate(chunks):
            collection.add(
                documents=[chunk],
                metadatas=[{"source": name}],
                ids=[f"{name}_{i}"]
            )

    print(f"‚úÖ Vector store built with {collection.count()} documents.")
    return collection

In [7]:
collection = build_or_load_chroma()

üì¶ Initializing or loading Chroma collection...
‚úÖ Existing Chroma collection loaded successfully.


In [8]:
try:
    with open('aem_knowledge_base/dialog_template.txt', 'r', encoding='utf-8') as file:
        dialog_template = file.read()
    with open('aem_knowledge_base/sling_mappings.txt', 'r', encoding='utf-8') as file:
        sling_mappings = file.read()
    with open('aem_knowledge_base/htl_snippets.txt', 'r', encoding='utf-8') as file:
        htl_snippets = file.read()
    
except FileNotFoundError:
    print("Error: The file was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

In [15]:
def retrieve_context(query, top_k=3):
    try:
        results = collection.query(query_texts=[query], n_results=top_k)
        docs = results.get("documents", [[]])[0]
        sources = results.get("metadatas", [[]])[0]
        if not docs:
            return "No relevant context found."

        context = "\n\n".join(
            [f"üìò From {s['source']}:\n{d}" for s, d in zip(sources, docs)]
        )
        return context
    except Exception as e:
        return f"‚ùå Error retrieving context: {str(e)}"


# --- UI Field Options ---
field_types = [
    "RTE Text Field",
    "Drop down Field (Select)",
    "Tags picker",
    "Text Field",
    "Text Area",
    "Password Field",
    "Number Field",
    "Email Field",
    "Date Picker",
    "Color Field",
    "Check Box",
    "Path Field",
    "Multifield"
]

fields_data = []
extra_context = ""


# --- Gradio callbacks ---
def add_field(selected_type, field_name, field_label, current_list):
    if not field_name or not field_label:
        return current_list, "‚ö†Ô∏è Please enter both a field name and label.", fields_data

    fields_data.append({
        "type": selected_type,
        "name": field_name,
        "label": field_label
    })

    new_entry = f"üß© **{selected_type}** ‚Äî Label: `{field_label}`, Name: `{field_name}`"
    if current_list == "### üìã Fields Added\n_(No fields added yet)_":
        updated_list = f"### üìã Fields Added\n\n{new_entry}"
    else:
        updated_list = current_list + "\n\n" + new_entry

    return updated_list, f"‚úÖ Added {selected_type} field successfully.", fields_data


def reset_fields():
    global fields_data, extra_context
    fields_data = []
    extra_context = ""
    return "### üìã Fields Added\n_(No fields added yet)_", "", "", "", "", ""


def set_context_chat(prompt):
    global extra_context
    extra_context = prompt
    return f"üß† Context added successfully:\n> {prompt}"


# --- Generate AEM Code using RAG and existing Chroma collection ---
def generate_sling_model_with_rag(fields, user_context):
    if not fields:
        return ("‚ö†Ô∏è Please add at least one field before generating.", "", "", "")

    fields_description = "\n".join([
        f"- Type: {f['type']}, Name: {f['name']}, Label: {f['label']}"
        for f in fields
    ])

    query = f"Generate AEM component dialog and sling model for: {fields_description}"
    retrieved_context = retrieve_context(query)

    full_prompt = f"""
You are an experienced Adobe AEM developer. Generate high-quality AEM code using best practices and think of all the basics for component creation.

User context:
{user_context if user_context else "No extra context provided."}

Generate:
1Ô∏è‚É£ AEM Dialog HTML  
2Ô∏è‚É£ Sling Model Java class (no XML)  
3Ô∏è‚É£ HTL file using Sling Model fields  
4Ô∏è‚É£ JS validation snippet (only when logically required)

Use the following as reasoning references ‚Äî do not output them verbatim:
- Field details: {fields_description}
- Retrieved context (from RAG): {retrieved_context}
- Dialog reference: {dialog_template}
- Sling model reference: {sling_mappings}
- HTL reference: {htl_snippets}

When generating code:
- Use the dialog reference only to guide structure and tab placement.
- Derive field names and labels strictly from {fields_description}.
- Infer field types, validations, and component bindings intelligently.
- Ensure all generated code is self-consistent and syntactically valid.
- Keep all responses concise and practical for production use.
- Pelase don't miss data-sly-use in HTL as it is the main part of front-end to JCR mappings
- Don't add explicit JS Validation, if you think aem can handle with properties in dialog.

Do not reproduce or restate the reference text above ‚Äî use it only for reasoning.
Make sure the HTL only has how to access the values from the sling model, don't give me like normal HTML.
Output must be valid JSON in the exact format below:
{{
  "dialog": "<dialog HTML code here>",
  "sling_model": "<Java Sling Model code here>",
  "htl": "<HTL code here>",
  "js_validation": "<JS validation only if required, else empty string>"
}}

Return ONLY this JSON. Do not include any explanations, markdown, or comments.
"""

    print(full_prompt)
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            temperature=0.5,
            messages=[
                {"role": "system", "content": "You are an expert Adobe AEM developer."},
                {"role": "user", "content": full_prompt},
            ],
        )
        ai_output = response.choices[0].message.content
        print(response.choices[0].message.content)
        parsed = json.loads(ai_output)

        dialog = parsed.get("dialog", "").strip()
        sling_model = parsed.get("sling_model", "").strip()
        htl = parsed.get("htl", "").strip()
        js = parsed.get("js_validation", "").strip()
        
        return dialog, sling_model, htl, js
    except Exception as e:
        return (f"‚ùå Error generating: {str(e)}", "", "", "")


# --- Gradio Interface ---
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# üß© AEM Component Builder + RAG-based Sling Model Generator")
    gr.Markdown("Uses pre-built Chroma DB for instant retrieval and AEM code generation.")

    with gr.Row():
        field_type = gr.Dropdown(label="Select Field Type", choices=field_types, value="Text Field")
        field_name = gr.Textbox(label="Field name (e.g., name)")
        field_label = gr.Textbox(label="Field label (e.g., Title)")

    with gr.Row():
        add_btn = gr.Button("‚ûï Add Field", variant="primary")
        reset_btn = gr.Button("üîÑ Reset", variant="secondary")

    field_list = gr.Markdown("### üìã Fields Added\n_(No fields added yet)_")
    status = gr.Markdown("")

    gr.Markdown("## üí¨ Add Context (Optional)")
    context_input = gr.Textbox(label="Add context or prompt", placeholder="e.g., Make title required, validate number range 0-100")
    context_status = gr.Markdown("")
    context_input.submit(set_context_chat, inputs=[context_input], outputs=[context_status])

    generate_btn = gr.Button("üöÄ Generate Component Code", variant="primary", size="lg")

    dialog_output = gr.Code(label="üß© Dialog", language="html", lines=18)
    sling_output = gr.Code(label="‚òï Sling Model")
    htl_output = gr.Code(label="üß± HTL", language="html", lines=18)
    js_output = gr.Code(label="üßÆ JS Validation", language="javascript", lines=18)

    generate_btn.click(
        fn=lambda user_ctx: generate_sling_model_with_rag(fields_data, user_ctx),
        inputs=[context_input],
        outputs=[dialog_output, sling_output, htl_output, js_output],
    )

    reset_btn.click(
        fn=reset_fields,
        inputs=[],
        outputs=[field_list, status, dialog_output, sling_output, htl_output, js_output],
    )

    add_btn.click(
        fn=add_field,
        inputs=[field_type, field_name, field_label, field_list],
        outputs=[field_list, status, gr.State(fields_data)],
    )

demo.launch(share=False)


* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.





You are an experienced Adobe AEM developer. Generate high-quality AEM code using best practices and think of all the basics for component creation.

User context:
No extra context provided.

Generate:
1Ô∏è‚É£ AEM Dialog HTML  
2Ô∏è‚É£ Sling Model Java class (no XML)  
3Ô∏è‚É£ HTL file using Sling Model fields  
4Ô∏è‚É£ JS validation snippet (only when logically required)

Use the following as reasoning references ‚Äî do not output them verbatim:
- Field details: - Type: Email Field, Name: email, Label: Email
- Retrieved context (from RAG): üìò From fields_catalog:
jcr:primaryType="nt:unstructured"
    sling:resourceType="granite/ui/components/coral/foundation/form/numberfield"
    fieldDescription="Caption is used for accessibility purposes to provide a description of the table contents."
    fieldLabel="Accessibility Caption:"
    name="./caption"
    required="{Boolean}true"/>

Email Field - Resource Type - granite/ui/components/coral/foundation/form/textfield
---------------------