In [None]:
# @title ## 1. Setup Environment (Fixed Version)
# @markdown Mount Google Drive and clone the repository using a secure token.

from google.colab import drive
import os
import getpass
import subprocess
import sys

# Mount Google Drive
drive.mount('/content/drive')
print("✅ Google Drive mounted.")

# --- GitHub Setup ---
print("Setting up GitHub repository...")
try:
    GITHUB_TOKEN = getpass.getpass('Enter your GitHub PAT: ')
    REPO_URL = f"https://{GITHUB_TOKEN}@github.com/Vinay-O/HouseBrainLLM.git"
    REPO_DIR = "/content/HouseBrainLLM"

    # Clone or update repository
    if os.path.exists(REPO_DIR):
        print("Repository already exists. Pulling latest changes...")
        os.chdir(REPO_DIR)
        result = subprocess.run(["git", "pull"], capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Git pull failed: {result.stderr}")
        else:
            print("✅ Repository updated successfully.")
    else:
        print("Cloning repository...")
        result = subprocess.run(["git", "clone", REPO_URL, REPO_DIR], capture_output=True, text=True)
        if result.returncode != 0:
            print(f"Git clone failed: {result.stderr}")
            raise Exception("Failed to clone repository")
        else:
            print("✅ Repository cloned successfully.")

    # Add repository to Python path
    sys.path.append(REPO_DIR)
    
except Exception as e:
    print(f"❌ Error setting up repository: {e}")
    # Continue without repository for basic functionality
    REPO_DIR = "/content"

# --- Install Dependencies ---
print("Installing dependencies...")
try:
    # Install basic dependencies first
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "pydantic", "requests"], check=True)
    
    # Try to install from requirements if available
    requirements_path = os.path.join(REPO_DIR, "requirements.txt")
    if os.path.exists(requirements_path):
        print("Installing from requirements.txt...")
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", "-r", requirements_path], check=True)
        print("✅ Requirements installed.")
    else:
        print("⚠️ requirements.txt not found. Using basic dependencies.")
        
except subprocess.CalledProcessError as e:
    print(f"⚠️ Some dependencies failed to install: {e}")
    print("Continuing with basic setup...")

print("✅ Environment setup complete.")


In [None]:
# @title ## 2. Configure and Start Ollama Server (Fixed Version)
# @markdown This cell will download and start the Ollama server, then pull the specified model.

import threading
import subprocess
import time
import sys
import requests

# Use a more compatible model - deepseek-r1:32b has known issues
MODEL_NAME = "llama3.1:8b"  # More stable alternative
# MODEL_NAME = "qwen2.5:7b"  # Another good option
# MODEL_NAME = "deepseek-r1:7b"  # Smaller deepseek variant if you want to try

print("Installing Ollama...")
try:
    # Install Ollama
    result = subprocess.run(
        ["curl", "-fsSL", "https://ollama.com/install.sh"], 
        capture_output=True, text=True, check=True
    )
    
    # Execute the install script
    process = subprocess.run("bash", input=result.stdout, text=True, capture_output=True)
    if process.returncode == 0:
        print("✅ Ollama installed successfully.")
    else:
        print(f"⚠️ Ollama installation warning: {process.stderr}")
        
except Exception as e:
    print(f"❌ Ollama installation failed: {e}")

# Install Ollama Python package
print("Installing Ollama Python package...")
try:
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "ollama"], check=True)
    print("✅ Ollama Python package installed.")
except Exception as e:
    print(f"❌ Failed to install Ollama package: {e}")

# Function to run Ollama server
def run_ollama_serve():
    """Run Ollama server in background"""
    try:
        subprocess.run(["ollama", "serve"], check=False, capture_output=True)
    except Exception as e:
        print(f"Ollama server error: {e}")

# Start Ollama server in background
print("🚀 Starting Ollama server...")
ollama_thread = threading.Thread(target=run_ollama_serve, daemon=True)
ollama_thread.start()

# Wait for server to start
print("⏳ Waiting for Ollama server to initialize...")
time.sleep(10)

# Test if server is running
def test_ollama_server():
    """Test if Ollama server is responding"""
    try:
        result = subprocess.run(["ollama", "list"], capture_output=True, text=True, timeout=10)
        return result.returncode == 0
    except:
        return False

if test_ollama_server():
    print("✅ Ollama server is running.")
else:
    print("⚠️ Ollama server may not be fully ready yet.")

# Pull the model
print(f"📦 Pulling model: {MODEL_NAME}...")
try:
    # Use a more robust model pulling approach
    pull_process = subprocess.run(
        ["ollama", "pull", MODEL_NAME],
        capture_output=True, text=True, timeout=600  # 10 minute timeout
    )
    
    if pull_process.returncode == 0:
        print(f"✅ Model {MODEL_NAME} is ready.")
    else:
        print(f"❌ Failed to pull model: {pull_process.stderr}")
        # Try alternative model
        alternative_model = "llama3.1:3b"
        print(f"Trying alternative model: {alternative_model}")
        alt_process = subprocess.run(
            ["ollama", "pull", alternative_model],
            capture_output=True, text=True, timeout=600
        )
        if alt_process.returncode == 0:
            MODEL_NAME = alternative_model
            print(f"✅ Alternative model {MODEL_NAME} is ready.")
        else:
            raise Exception("Failed to pull any model")
            
except subprocess.TimeoutExpired:
    print("⚠️ Model pull timed out. The model might be very large.")
except Exception as e:
    print(f"❌ Error pulling model: {e}")

# Final verification
print("Final verification...")
subprocess.run(["ollama", "list"], check=False)


In [None]:
# @title ## 3. Run the Data Factory (Live LLM Version)

import json
import uuid
import random
import time
import re
from datetime import datetime
import sys
import os
from pydantic import BaseModel, ValidationError
from typing import List, Optional, Dict, Any
from enum import Enum


# --- Configuration ---
DATASET_PATH = "/content/drive/MyDrive/housebrain_platinum_dataset"
MASTER_PROMPT_LIST_PATH = "/content/drive/MyDrive/housebrain_prompts/platinum_prompts.txt"
NUM_PROMPTS_TO_GENERATE = 5  # @param {type:"slider", min:1, max:100, step:1}

os.makedirs(DATASET_PATH, exist_ok=True)

# --- Schema Handling ---
SCHEMA_LOADED = False
try:
    # Attempt to import the official schema from the repository
    # FIX: Removed 'BasicDetails' as it does not exist in the current schema.py
    from src.housebrain.schema import HouseOutput, HouseInput, RoomType
    SCHEMA_LOADED = True
    print("✅ Successfully imported HouseBrain schema from repository.")
except ImportError as e:
    print(f"⚠️ Could not import official HouseBrain schema: {e}")
    print("   Creating a minimal fallback schema for operation.")
    
    # Define a minimal, compatible schema if the import fails
    class RoomType(str, Enum):
        living_room = "living_room"; dining_room = "dining_room"; kitchen = "kitchen"
        bedroom = "bedroom"; bathroom = "bathroom"; balcony = "balcony"; garage = "garage"
        storage = "storage"; study = "study"; utility = "utility"; entrance = "entrance"
        hallway = "hallway"; patio = "patio"; laundry = "laundry"
    
    class BasicDetails(BaseModel): prompt: str
    class HouseInput(BaseModel): basicDetails: BasicDetails
    class Rectangle(BaseModel): x: float; y: float; width: float; height: float
    class Door(BaseModel): id: str; type: str; bounds: Rectangle; room1: str; room2: str
    class Window(BaseModel): id: str; type: str; bounds: Rectangle
    class Furniture(BaseModel): id: str; type: str; bounds: Rectangle
    class Room(BaseModel): id: str; room_type: RoomType; bounds: Rectangle; doors: List[Door] = []; windows: List[Window] = []; furniture: List[Furniture] = []
    class Level(BaseModel): id: str; level_number: int; rooms: List[Room]
    class HouseOutput(BaseModel): id: str; input: HouseInput; total_area: float = 0.0; levels: List[Level]
    
    SCHEMA_LOADED = True
    print("✅ Minimal fallback schema created.")

# --- Ollama Integration ---
try:
    import ollama
    print("✅ Ollama package imported successfully.")
except ImportError:
    print("❌ Ollama package not found. Please run Cell 2 to install.")
    ollama = None # Ensure ollama is defined to prevent NameError

# --- Helper Functions ---
def call_ollama_safe(model, prompt, retries=3, delay=5):
    """Safely call Ollama with retry logic and error handling."""
    if not ollama: return None
    for attempt in range(retries):
        try:
            response = ollama.chat(
                model=model, 
                messages=[{'role': 'user', 'content': prompt}],
                options={'temperature': 0.7, 'top_p': 0.9}
            )
            return response['message']['content']
        except Exception as e:
            print(f"    ❌ Attempt {attempt + 1}/{retries} failed: {e}")
            if attempt < retries - 1: time.sleep(delay)
    print("    ❌ All Ollama attempts failed.")
    return None

def extract_json_from_text(text):
    """Robustly extract a JSON object from a string, ignoring surrounding text."""
    if not text: return None
    match = re.search(r'\{.*\}', text, re.DOTALL)
    if not match: return None
    try:
        return json.loads(match.group(0))
    except json.JSONDecodeError:
        return None

def heal_plan_data(raw_data: dict, prompt_text: str):
    """Applies deterministic fixes to the raw LLM output to conform to schema."""
    # Add the original prompt to the input data
    if 'input' not in raw_data or not isinstance(raw_data.get('input'), dict):
        raw_data['input'] = {}
    if 'basicDetails' not in raw_data['input'] or not isinstance(raw_data['input'].get('basicDetails'), dict):
        raw_data['input']['basicDetails'] = {}
        
    raw_data['input']['basicDetails']['prompt'] = prompt_text

    # Ensure other required fields exist for validation, even if they are dummy values
    # The LLM is expected to provide these, but this prevents crashes if it doesn't.
    details = raw_data['input']['basicDetails']
    details.setdefault('totalArea', raw_data.get('total_area', 1000))
    details.setdefault('unit', 'sqft')
    details.setdefault('floors', len(raw_data.get('levels', [1])))
    details.setdefault('bedrooms', 3)
    details.setdefault('bathrooms', 2)
    details.setdefault('style', 'Modern')
    details.setdefault('budget', 500000)

    if 'plot' not in raw_data['input'] or not isinstance(raw_data['input'].get('plot'), dict):
        raw_data['input']['plot'] = {'shape': 'rectangular', 'length': 80, 'width': 50}

    if 'roomBreakdown' not in raw_data['input'] or not isinstance(raw_data['input'].get('roomBreakdown'), list):
        raw_data['input']['roomBreakdown'] = []


    # Calculate total area if missing
    if 'total_area' not in raw_data or not isinstance(raw_data.get('total_area'), (int, float)):
        total_area = 0
        if 'levels' in raw_data and isinstance(raw_data['levels'], list):
            for level in raw_data['levels']:
                if 'rooms' in level and isinstance(level['rooms'], list):
                    for room in level['rooms']:
                        if 'bounds' in room and all(k in room['bounds'] for k in ['width', 'height']):
                             total_area += room['bounds']['width'] * room['bounds']['height']
        raw_data['total_area'] = total_area

    # Add dummy values for other required top-level fields if they're missing
    raw_data.setdefault('construction_cost', raw_data['total_area'] * 200) # Estimate
    raw_data.setdefault('materials', {})
    raw_data.setdefault('render_paths', {})
    
    return raw_data

# --- Master Prompt ---
ROOM_TYPES_STR = ", ".join([f'"{rt.value}"' for rt in RoomType])
MASTER_PROMPT_TEMPLATE = f"""
You are an expert architectural AI. Generate a house plan in JSON format based on the user request.
Adhere strictly to the schema provided in the documentation. Output only the raw JSON. The `room_type` must be one of {ROOM_TYPES_STR}.
The output must include 'input', 'levels', 'total_area', and 'construction_cost' fields.

User Request: {{user_prompt}}

Your JSON Output:
"""

# --- Main Execution Block ---
if SCHEMA_LOADED and ollama:
    print(f"\n--- Starting Data Factory (Live LLM Mode) ---")
    
    try:
        with open(MASTER_PROMPT_LIST_PATH, 'r') as f:
            all_prompts = [line.strip() for line in f.readlines() if line.strip()]
        print(f"✅ Loaded {len(all_prompts)} prompts.")
    except FileNotFoundError:
        print(f"❌ Prompt file not found. Run Cell 4 to generate it.")
        all_prompts = []

    if all_prompts:
        prompts_to_process = random.sample(all_prompts, min(NUM_PROMPTS_TO_GENERATE, len(all_prompts)))
        print(f"✅ Processing a random batch of {len(prompts_to_process)} prompts...")
        
        successful_generations = 0
        for i, prompt_text in enumerate(prompts_to_process):
            print(f"\n================== PROMPT {i+1}/{len(prompts_to_process)} ==================")
            print(f"'{prompt_text[:100]}...'")
            
            try:
                # 1. Generate with LLM
                final_prompt = MASTER_PROMPT_TEMPLATE.format(user_prompt=prompt_text)
                llm_output = call_ollama_safe(MODEL_NAME, final_prompt)
                if not llm_output: continue

                # 2. Extract JSON
                raw_plan = extract_json_from_text(llm_output)
                if not raw_plan: 
                    print("    ❌ Failed to extract valid JSON from LLM output.")
                    continue
                
                # 3. Heal and Add Metadata
                healed_plan = heal_plan_data(raw_plan, prompt_text)
                
                # 4. Validate
                validated_plan = HouseOutput.model_validate(healed_plan)
                
                # 5. Save to File
                plan_id = str(uuid.uuid4()).replace('-', '_')
                file_path = os.path.join(DATASET_PATH, f"plan_{plan_id}.json")
                with open(file_path, 'w') as f:
                    f.write(validated_plan.model_dump_json(indent=2))
                
                print(f"    ✅ SUCCESS! Saved validated plan to {os.path.basename(file_path)}")
                successful_generations += 1
                
            except ValidationError as e:
                print(f"    ❌ Pydantic Validation Failed:\n{e}")
            except Exception as e:
                print(f"    ❌ An unexpected error occurred: {e}")
        
        print(f"\n🎉 Data Factory completed!")
        print(f"Successfully generated {successful_generations}/{len(prompts_to_process)} plans.")
else:
    print("\nHALTING: Prerequisites not met. Please ensure Cells 1 & 2 ran successfully.")



In [None]:
# @title ## 4. Generate Master Prompt File (Fixed)
# @markdown Generate a comprehensive list of prompts for house design

import os
from pathlib import Path
import random

# Configuration
DRIVE_PROMPT_FILE = "/content/drive/MyDrive/housebrain_prompts/platinum_prompts.txt"
NUM_PROMPTS_TO_GENERATE = 100  # Reduced for testing

# Create directory
Path(DRIVE_PROMPT_FILE).parent.mkdir(parents=True, exist_ok=True)

# Generate varied prompts programmatically
def generate_house_prompts(num_prompts=100):
    """Generate diverse house design prompts"""
    
    # Base components
    sizes = ["small", "medium", "large", "compact", "spacious", "cozy"]
    styles = ["modern", "traditional", "contemporary", "minimalist", "rustic", "colonial"]
    bedroom_counts = ["1-bedroom", "2-bedroom", "3-bedroom", "4-bedroom", "studio"]
    special_features = [
        "with garage", "with balcony", "with study room", "with utility room",
        "with dining area", "with open kitchen", "with master suite", "with guest room",
        "with patio", "with storage room", "with laundry room", "with home office"
    ]
    themes = [
        "family home", "bachelor pad", "retirement home", "starter home",
        "vacation house", "urban apartment", "suburban house", "countryside home"
    ]
    
    prompts = []
    
    for i in range(num_prompts):
        # Random combinations
        size = random.choice(sizes)
        style = random.choice(styles)
        bedrooms = random.choice(bedroom_counts)
        feature = random.choice(special_features)
        theme = random.choice(themes)
        
        # Generate different prompt structures
        templates = [
            f"Design a {size} {style} {bedrooms} {theme} {feature}",
            f"Create a {style} {bedrooms} house with {feature.replace('with ', '')}",
            f"Build a {size} {theme} featuring {bedrooms} and {feature.replace('with ', '')}",
            f"Design a {style} home with {bedrooms} {feature}",
            f"Create a {size} {bedrooms} {style} house for a modern family"
        ]
        
        prompt = random.choice(templates)
        prompts.append(prompt)
    
    # Add some specific prompts
    specific_prompts = [
        "Design a house with an open floor plan and large windows",
        "Create a two-story house with bedrooms upstairs",
        "Build a single-story house suitable for elderly residents", 
        "Design a house with a central courtyard",
        "Create a house with separate living and dining areas",
        "Build a house with an attached garage and workshop",
        "Design a house with a large kitchen island",
        "Create a house with multiple bathrooms",
        "Build a house with a master bedroom suite",
        "Design a house with energy-efficient features"
    ]
    
    prompts.extend(specific_prompts)
    
    # Remove duplicates and return
    return list(set(prompts))[:num_prompts]

print("Generating house design prompts...")
generated_prompts = generate_house_prompts(NUM_PROMPTS_TO_GENERATE)

# Write to file
with open(DRIVE_PROMPT_FILE, 'w') as f:
    for prompt in generated_prompts:
        f.write(prompt + '\n')

print(f"✅ Generated {len(generated_prompts)} prompts")
print(f"Saved to: {DRIVE_PROMPT_FILE}")

# Show first few prompts
print("\nFirst 5 prompts:")
for i, prompt in enumerate(generated_prompts[:5]):
    print(f"{i+1}. {prompt}")


In [None]:
# @title ## 5. Download Generated Dataset (Fixed)
# @markdown Zip the entire generated dataset directory and download it to your local machine.

import shutil
import os
from google.colab import files
from datetime import datetime
import zipfile

# Configuration
source_dir = "/content/drive/MyDrive/housebrain_platinum_dataset"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"housebrain_dataset_{timestamp}.zip"
zip_filepath = f"/content/{zip_filename}"

if os.path.exists(source_dir) and os.listdir(source_dir):
    print(f"Found {len(os.listdir(source_dir))} files in dataset directory")
    
    # Create zip file manually for better control
    print(f"Creating zip file: {zip_filename}")
    with zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files_in_dir in os.walk(source_dir):
            for file in files_in_dir:
                file_path = os.path.join(root, file)
                arc_name = os.path.relpath(file_path, source_dir)
                zipf.write(file_path, arc_name)
                print(f"Added: {arc_name}")
    
    print(f"✅ Zip file created: {zip_filepath}")
    
    # Download the file
    print("Downloading dataset...")
    try:
        files.download(zip_filepath)
        print("✅ Download started successfully!")
    except Exception as e:
        print(f"❌ Download failed: {e}")
        print(f"You can manually download from: {zip_filepath}")
        
else:
    print(f"❌ Dataset directory not found or empty: {source_dir}")
    print("Please run the data generation cell first.")


# HouseBrain Data Factory 3.0: The Diamond Series

This notebook is for generating the **Diamond Dataset**. The goal of this dataset is to teach our fine-tuned model how to handle **complex, conflicting, and unconventional** architectural challenges.

**Our Strategy:**
1.  **Use a specialized script** to generate a smaller, more focused list of 2,500 "Diamond-tier" prompts.
2.  **Use our best "Journeyman" model** (fine-tuned on the Platinum dataset) as the generator to create draft plans.
3.  **Save the validated outputs** to a new `housebrain_diamond_dataset` folder in your Google Drive.
4.  Use this dataset for a second round of fine-tuning to elevate our model from a "Journeyman" to a "Master Architect."

## Instructions
1.  **Set Your GitHub PAT**: Ensure your GitHub token is ready.
2.  **Run All Cells**: The notebook will set up the environment, generate the complex prompts, and begin the data generation process.


In [None]:
# @title ## 1. Setup Environment
# @markdown Mount Google Drive and clone the repository using a secure token.
from google.colab import drive
import os
import getpass
import subprocess

# Mount Google Drive
drive.mount('/content/drive')
print("✅ Google Drive mounted.")

# --- GitHub Setup ---
#@markdown Enter your GitHub Personal Access Token (PAT) with repo access.
GITHUB_TOKEN = getpass.getpass('Enter your GitHub PAT: ')
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/Vinay-O/HouseBrainLLM.git"
REPO_DIR = "/content/HouseBrainLLM"

# Clone the repository
if os.path.exists(REPO_DIR):
    print("Repository already exists. Pulling latest changes...")
    subprocess.run(f"cd {REPO_DIR} && git pull", shell=True, check=True)
else:
    print("Cloning repository...")
    subprocess.run(f"git clone {REPO_URL} {REPO_DIR}", shell=True, check=True)

print("✅ Repository is ready.")

# --- Install Dependencies ---
#@markdown Install necessary Python packages.
!pip install -q pydantic GitPython

print("✅ Dependencies installed.")


In [None]:
# @title ## 2. Configure and Start Ollama Server
# @markdown This cell will download and start the Ollama server, then pull the specified model.
# @markdown **Important:** For the Diamond run, we should ideally use our fine-tuned "Journeyman" model. For now, we will continue to use a powerful base model like Mixtral.

MODEL_NAME = "mixtral:instruct" # @param ["mixtral:instruct", "qwen2:7b", "llama3:8b", "mistral:7b-instruct"]

# Download and start Ollama
!curl -fsSL https://ollama.com/install.sh | sh
import threading
import subprocess
import time

def run_ollama():
    try:
        subprocess.run("ollama serve", shell=True, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f"Ollama server failed: {e.stderr}")

print("🚀 Starting Ollama server in the background...")
ollama_thread = threading.Thread(target=run_ollama)
ollama_thread.daemon = True
ollama_thread.start()

# Wait for the server to be ready
print("⏳ Waiting for Ollama server to initialize...")
time.sleep(10)

# Pull the model
print(f"📦 Pulling model: {MODEL_NAME}. This may take a while...")
try:
    subprocess.run(f"ollama pull {MODEL_NAME}", shell=True, check=True, capture_output=True, text=True)
    print(f"✅ Model {MODEL_NAME} is ready.")
except subprocess.CalledProcessError as e:
    print(f"Failed to pull model. Trying default tag...")
    base_model = MODEL_NAME.split(':')[0]
    subprocess.run(f"ollama pull {base_model}", shell=True, check=True)
    print(f"✅ Model {base_model} is ready.")

# Verify Ollama is running
!ollama list


In [None]:
# @title ## 3. Generate Diamond Prompts & Run the Factory
# @markdown This cell first generates 2,500 complex prompts and then runs the assembly line for each one.

import os
from datetime import datetime
import textwrap
import subprocess

# --- 1. Generate Diamond Prompts ---
os.chdir(REPO_DIR)
prompt_script_path = "scripts/generate_diamond_prompts.py"
prompt_output_file = "/content/diamond_prompts.txt"
num_diamond_prompts = 2500

print(f"--- Generating {num_diamond_prompts} Diamond-tier prompts ---")
prompt_command = [
    "python3",
    prompt_script_path,
    "--num-prompts", str(num_diamond_prompts),
    "--output-file", prompt_output_file
]
subprocess.run(prompt_command)
print(f"✅ Diamond prompts saved to {prompt_output_file}")
print("-" * 50)


# --- 2. Load Prompts ---
print(f"Loading prompts from {prompt_output_file}...")
try:
    with open(prompt_output_file, 'r') as f:
        prompts = [line.strip() for line in f if line.strip()]
    print(f"✅ Successfully loaded {len(prompts)} prompts.")
except FileNotFoundError:
    print(f"ERROR: Prompt file not found at {prompt_output_file}.")
    prompts = []

# --- 3. Run the Data Factory ---
#@markdown Specify the output directory in your Google Drive for the Diamond dataset.
DRIVE_OUTPUT_DIR = "/content/drive/MyDrive/housebrain_diamond_dataset"

if prompts:
    assembly_line_script_path = "scripts/run_complete_assembly_line.py"
    run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join(DRIVE_OUTPUT_DIR, f"run_{run_timestamp}")

    print(f"\nOutput directory is ready at: {output_dir}")
    print(f"Found {len(prompts)} prompts to process.")
    print("="*50)

    for i, prompt in enumerate(prompts):
        print(f"Processing prompt {i+1}/{len(prompts)}")
        prompt_short = textwrap.shorten(prompt, width=100, placeholder="...")
        print(f"PROMPT: {prompt_short}")
        print("="*50)

        run_name = f"prompt_{i+1:04d}"
        command = [
            "python3", assembly_line_script_path,
            "--prompt", prompt,
            "--output-dir", output_dir,
            "--run-name", run_name,
            "--model", MODEL_NAME,
            "--max-retries", "5"
        ]
        subprocess.run(command)
        print("\n" + "-"*50 + "\n")

    print("🎉 Diamond Data Factory run complete! Check your Google Drive for the generated files.")
else:
    print("No prompts to process. Please check your configuration.")



In [None]:
# @title ## 4. (Optional) Download Generated Diamond Dataset
# @markdown Run this cell after the data generation is complete to compress and download the entire output folder.

import shutil
import os
from google.colab import files

# Define the source directory in Google Drive and the target zip file path
source_dir = "/content/drive/MyDrive/housebrain_diamond_dataset"
zip_filename = "housebrain_diamond_dataset.zip"
zip_filepath = f"/content/{zip_filename}"

if os.path.exists(source_dir):
    print(f"Compressing '{source_dir}' into '{zip_filepath}'...")
    shutil.make_archive(zip_filepath.replace('.zip', ''), 'zip', source_dir)
    print("✅ Compression complete.")

    # Provide a download link
    print(f"\nDownloading '{zip_filename}'...")
    files.download(zip_filepath)
else:
    print(f"ERROR: The source directory '{source_dir}' was not found. Please ensure the Data Factory ran correctly.")

