In [None]:
import os
import json
import torch
import pandas as pd
from pathlib import Path
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig

# --------------------------
# Config
# --------------------------
MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"
IMAGE_DIR = "inference_images"
OUTPUT_JSON_DIR = "outputs_hierarchy_json_25"
OUTPUT_CSV = "qwen25vl_component_hierarchy.csv"
MAX_NEW_TOKENS = 2500

PROMPT_HIERARCHY = (
    "You are an expert UI layout analyzer. "
    "Analyze this wireframe and output all visible components in a hierarchical JSON structure.\n\n"
    "Each component should be represented as an object with:\n"
    "- 'type': the component name (e.g., header, nav, hero, button, image, card, footer)\n"
    "- 'attributes': a dictionary with attributes like color, position, size, alignment, and text content if visible\n"
    "- 'children': a list of nested components inside it\n\n"
    "The root node should represent the full page as 'page'.\n"
    "Follow the visual hierarchy (top to bottom, left to right). Output valid JSON only‚Äîno text outside the JSON."
)

# --------------------------
# Load Model
# --------------------------
def load_model_and_processor():
    has_cuda = torch.cuda.is_available()
    quant = None
    dtype = "auto"
    device_map = "auto" if has_cuda else "cpu"

    if has_cuda:
        quant = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )

    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        MODEL_NAME,
        quantization_config=quant,
        torch_dtype=dtype,
        device_map=device_map,
        low_cpu_mem_usage=True
    )
    processor = AutoProcessor.from_pretrained(MODEL_NAME)
    return model, processor


# --------------------------
# Inference Function
# --------------------------
def extract_hierarchy(image_path, model, processor):
    messages = [{
        "role": "user",
        "content": [
            {"type": "image", "image": image_path},
            {"type": "text", "text": PROMPT_HIERARCHY},
        ],
    }]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(text=[text], images=[image_path], return_tensors="pt").to(model.device)

    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=False)

    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    return output_text.strip()


# --------------------------
# Main Function
# --------------------------
def main():
    model, processor = load_model_and_processor()
    os.makedirs(OUTPUT_JSON_DIR, exist_ok=True)

    results = []
    image_dir = Path(IMAGE_DIR)

    for fname in sorted(os.listdir(image_dir)):
        if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
            continue

        img_path = str(image_dir / fname)
        stem = Path(fname).stem
        print(f"üîç Extracting hierarchy for {fname}...")

        row = {"image": fname, "raw_output": "", "status": ""}

        try:
            raw_json = extract_hierarchy(img_path, model, processor)
            row["raw_output"] = raw_json

            # Attempt to parse JSON
            try:
                parsed_json = json.loads(raw_json)
                json_path = os.path.join(OUTPUT_JSON_DIR, f"{stem}_hierarchy.json")
                with open(json_path, "w", encoding="utf-8") as f:
                    json.dump(parsed_json, f, indent=2)
                row["status"] = "parsed"
            except json.JSONDecodeError:
                # Save raw text if JSON invalid
                with open(os.path.join(OUTPUT_JSON_DIR, f"{stem}_raw.txt"), "w", encoding="utf-8") as f:
                    f.write(raw_json)
                row["status"] = "invalid_json"

        except Exception as e:
            row["raw_output"] = f"ERROR: {e}"
            row["status"] = "error"

        results.append(row)

    pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
    print(f"\n‚úÖ Extraction complete! Results saved to {OUTPUT_CSV}\nüìÇ JSONs in: {OUTPUT_JSON_DIR}")


if __name__ == "__main__":
    main()


In [None]:
import os
import re
import json
import torch
import pandas as pd
from pathlib import Path

# --------------------------
# Config
# --------------------------
RAW_HIERARCHY_DIR = "outputs_hierarchy_json_25"     # where *_raw.txt files are
IMAGE_DIR = "inference_images"
OUTPUT_HTML_DIR = "outputs_html_verify_25"
OUTPUT_CSV = "qwen25vl_verify_hierarchy_results_from_rawtxt.csv"
MAX_NEW_TOKENS = 2500

PROMPT_HTML_FROM_JSON = (
    "You are an expert front-end developer. "
    "Use BOTH the following hierarchical component JSON and the provided wireframe image "
    "to generate a complete, minimal, responsive HTML5 layout.\n\n"
    "Guidelines:\n"
    "- Each node's 'type' corresponds to an HTML section or element.\n"
    "- Use semantic HTML tags (header, nav, main, section, article, footer, etc.).\n"
    "- Use node 'attributes' to infer inline styles (colors, alignment, size).\n"
    "- Preserve the hierarchy: parent nodes contain their children in proper order.\n"
    "- Include a minimal <style> block in <head> but no external CSS or JavaScript.\n"
    "- Use placeholder text for headings, paragraphs, or buttons.\n"
    "- The visual layout should reflect the image as closely as possible.\n\n"
    "Output ONLY valid HTML code, starting with <!doctype html>."
)

# --------------------------
# Helper: Extract JSON from raw text
# --------------------------
def extract_json_from_text(text: str):
    """
    Extract JSON code from a text block that may contain roles and markdown fences.
    """
    # 1Ô∏è‚É£ Look for ```json ... ``` block first
    match = re.search(r"```json(.*?)```", text, re.DOTALL)
    if match:
        json_str = match.group(1).strip()
    else:
        # 2Ô∏è‚É£ Fallback: look for first JSON-like braces
        match = re.search(r"\{[\s\S]*\}", text)
        if not match:
            print("‚ö†Ô∏è No JSON found in file.")
            return None
        json_str = match.group(0)

    # 3Ô∏è‚É£ Try to parse cleanly
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"‚ö†Ô∏è JSON parse error: {e}")
        # 4Ô∏è‚É£ Try to repair common issues (truncated JSON)
        if json_str.count("{") > json_str.count("}"):
            json_str += "}" * (json_str.count("{") - json_str.count("}"))
        try:
            return json.loads(json_str)
        except Exception:
            return None

# --------------------------
# Core Function
# --------------------------
def generate_html_from_json_and_image(json_data, image_path, model, processor):
    json_text = json.dumps(json_data, indent=2)
    messages = [{
        "role": "user",
        "content": [
            {"type": "image", "image": image_path},
            {"type": "text", "text": PROMPT_HTML_FROM_JSON},
            {"type": "text", "text": f"Here is the JSON:\n\n{json_text}"}
        ],
    }]

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(
        text=[text],
        images=[image_path],
        padding=True,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=False,
            temperature=None
        )

    trimmed = [
        out_ids[len(in_ids):]
        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]

    output_text = processor.batch_decode(
        trimmed,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0].strip()

    return output_text

# --------------------------
# Main
# --------------------------
def main(model, processor):
    os.makedirs(OUTPUT_HTML_DIR, exist_ok=True)
    results = []

    raw_files = sorted([f for f in os.listdir(RAW_HIERARCHY_DIR) if f.endswith("_raw.txt")])

    for fname in raw_files:
        raw_path = os.path.join(RAW_HIERARCHY_DIR, fname)
        stem = Path(fname).stem.replace("_raw", "")
        image_candidates = [os.path.join(IMAGE_DIR, f"{stem}.png"), os.path.join(IMAGE_DIR, f"{stem}.jpg")]
        image_path = next((p for p in image_candidates if os.path.exists(p)), None)

        if not image_path:
            print(f"‚ö†Ô∏è Skipping {fname} ‚Äî No matching image found.")
            continue

        print(f"üîç Processing {fname} with image {Path(image_path).name}...")

        row = {"raw_file": fname, "image_file": Path(image_path).name, "status": "", "html_file": "", "raw_output": ""}

        try:
            with open(raw_path, "r", encoding="utf-8") as f:
                raw_text = f.read()

            json_data = extract_json_from_text(raw_text)
            if json_data is None:
                row["status"] = "error: no valid JSON"
                results.append(row)
                continue

            html_output = generate_html_from_json_and_image(json_data, image_path, model, processor)
            row["raw_output"] = html_output

            out_html_path = os.path.join(OUTPUT_HTML_DIR, f"{stem}__verify.html")
            with open(out_html_path, "w", encoding="utf-8") as f:
                f.write(html_output)

            row["html_file"] = out_html_path
            row["status"] = "success"

        except Exception as e:
            row["status"] = f"error: {e}"

        results.append(row)

    pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
    print(f"\n‚úÖ Verification done! HTMLs saved in {OUTPUT_HTML_DIR}")
    print(f"üìä Log saved to {OUTPUT_CSV}")

# --------------------------
# Example usage
# --------------------------
main(model, processor)
