In [None]:
# --- 1. Install and Start Ollama ---
# This is the most critical step. We must install the Ollama service in the Colab environment.
# The '!' runs shell commands. The '&' at the end runs the server in the background.

print("Installing Ollama...")
!curl -fsSL https://ollama.com/install.sh | sh
print("Starting Ollama server in the background...")
!ollama serve &

# Give the server a moment to start up
import time
time.sleep(5)
print("✅ Ollama server should be running.")


In [None]:
# --- 1. Environment Setup ---
# Install the essential libraries for connecting to Ollama and creating interactive widgets.
%pip install -q ollama ipywidgets pandas tqdm


In [None]:
# --- 1a. Mount Google Drive ---
# This step is necessary to access files like our prompt list.
from google.colab import drive
import os
drive.mount('/content/drive')

# --- Verify File Access ---
# This path is now set based on your screenshot (My Drive > housebrain_prompts)
PROMPT_FILE_PATH = "/content/drive/MyDrive/housebrain_prompts/platinum_prompts.txt" 

print("---")
if os.path.exists(PROMPT_FILE_PATH):
    print(f"✅ Successfully found prompt file at: {PROMPT_FILE_PATH}")
else:
    print(f"❌ ERROR: Could not find prompt file at the specified path.")
    print("If your file is in a different location, please update the PROMPT_FILE_PATH variable.")
print("---")


In [None]:
# --- 2. Configuration & Model Selection ---
import ipywidgets as widgets
from IPython.display import display
import os

# --- Model Selection ---
# Create a dropdown widget with all the models we want to test.
model_options = [
    "phi4-reasoning:latest", "phi4-reasoning:plus", "phi4-reasoning:14b",
    "phi4-reasoning:14b-plus-q4_K_M", "phi3:instruct", "llama3:instruct", "qwen2.5:3b"
]
model_dropdown = widgets.Dropdown(
    options=model_options, value='phi4-reasoning:latest', description='Select Model:',
    disabled=False, style={'description_width': 'initial'}
)
display(model_dropdown)

# --- Tier Configuration ---
# Define the tier for this generation run. This will create a subfolder.
# Tiers: gold_tier, platinum_tier, diamond_tier
DATASET_TIER = "gold_tier"

# --- Generation Parameters ---
# The root directory where all raw generated data will be saved.
BASE_OUTPUT_DIR = "raw_generated_data"

# How many architectural plans do you want to generate in this run?
NUM_PLANS_TO_GENERATE = 15000 # Goal for the Gold Tier

# --- Create Directory Structure ---
# The final output path will be nested (e.g., raw_generated_data/gold_tier/)
TIER_OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR, DATASET_TIER)
os.makedirs(TIER_OUTPUT_DIR, exist_ok=True)

print(f"Configuration loaded. Selected model will be: {model_dropdown.value}")
print(f"Dataset Tier: {DATASET_TIER}")
print(f"Number of samples to generate: {NUM_PLANS_TO_GENERATE}")
print(f"Generated data will be saved under: {TIER_OUTPUT_DIR}")


In [None]:
# --- 3. Setup & Verification ---
import os

# -- A. Load Prompts from File --
def load_prompts_from_file(filepath):
    """Loads prompts from a text file, one prompt per line."""
    print(f"Loading prompts from {filepath}...")
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            prompts = [line.strip() for line in f if line.strip()]
        print(f"Successfully loaded {len(prompts)} prompts.")
        return prompts
    except FileNotFoundError:
        print(f"❌ ERROR: Prompt file not found at {filepath}. Please check the path in Cell 1a.")
        return []

# Load the prompts using the path defined in Cell 1a
ALL_PROMPTS = load_prompts_from_file(PROMPT_FILE_PATH)

# -- B. Prepare Ollama Model --
# Get the selected model from the dropdown widget in the previous cell.
selected_model = model_dropdown.value
print(f"\\n--- Preparing Model: {selected_model} ---")

# This command will pull the model if it's not already present in the Colab instance.
print(f"\\nAttempting to download '{selected_model}' via Ollama... (This may take several minutes)")
!ollama pull {selected_model}

# Verify the model was downloaded by listing all available models
print(f"\\n--- Verifying Model Installation ---")
!ollama list

# -- C. Final Readiness Check --
print("\\n" + "="*50)
if ALL_PROMPTS:
    print("✅ Prompt file loaded successfully.")
    print(f"✅ Model '{selected_model}' is ready.")
    print("🚀 You are now ready to start the data generation process in the next cell.")
else:
    print("❌ Generation HALTED. Please fix the prompt file path in Cell 1a.")
print("="*50)


In [None]:
# --- 4. Run Data Generation ---
# This cell contains the core logic for generating the plans.
# All setup (prompt loading, model download) is handled in the previous cells.

# --- Core Generation Function ---
# (This function remains unchanged)
def generate_and_save_raw_plan(prompt, model_name, tier_dir):
    """
    Generates a house plan using the specified model and saves the raw output.
    """
    prompt_hash = hashlib.md5(prompt.encode()).hexdigest()[:10]
    timestamp = int(time.time())
    unique_id = f"prompt_{prompt_hash}_{timestamp}"
    
    run_output_dir = os.path.join(tier_dir, model_name, unique_id)
    os.makedirs(run_output_dir, exist_ok=True)
    
    output_filename = os.path.join(run_output_dir, "raw_output.json")
    
    try:
        structured_prompt = f"""
        Please act as an expert architect specializing in Indian residential and commercial design...
        (Full prompt text is the same as before)
        """
        
        response = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': structured_prompt}],
            format='json'
        )
        raw_output = response['message']['content']
        with open(output_filename, 'w') as f:
            f.write(raw_output)
        return True, output_filename

    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        with open(os.path.join(run_output_dir, "error.log"), 'w') as f:
            f.write(error_message)
        return False, None

# --- Main Generation Loop ---
# Check if prompts were loaded successfully in the PREVIOUS cell before starting
if 'ALL_PROMPTS' in locals() and ALL_PROMPTS:
    print("\\nStarting data generation process for the Gold Tier...")

    successful_generations = 0

    for i in tqdm(range(NUM_PLANS_TO_GENERATE), desc=f"Generating with {selected_model}"):
        current_prompt = random.choice(ALL_PROMPTS)
        
        success, filepath = generate_and_save_raw_plan(
            prompt=current_prompt,
            model_name=selected_model,
            tier_dir=TIER_OUTPUT_DIR
        )
        
        if success:
            successful_generations += 1

    print("\\n" + "="*50)
    print("Gold Tier Data Generation Complete!")
    print(f"Successfully generated {successful_generations} / {NUM_PLANS_TO_GENERATE} raw plan files.")
    print(f"All outputs are saved in the '{TIER_OUTPUT_DIR}/{selected_model}' directory.")
    print("="*50)
else:
    print("\\n🛑 Generation HALTED. Please run the 'Setup & Verification' cell (Cell 3) successfully first.")


In [None]:
# --- 4. Package and Download Results ---
import shutil
from google.colab import files
import time

# This cell packages the output from the last generation run into a single zip file
# for easy download.

# Identify the output directory from the last run based on the tier
# Note: This relies on the 'selected_model' and 'DATASET_TIER' variables from Cell 2.
output_directory_path = os.path.join(BASE_OUTPUT_DIR, DATASET_TIER, selected_model)
zip_filename = f"{DATASET_TIER}_{selected_model}_raw_data_{int(time.time())}"
zip_filepath = f"/content/{zip_filename}"

print(f"Locating generated data in: {output_directory_path}...")

if os.path.isdir(output_directory_path):
    print(f"Found data. Compressing into '{zip_filename}.zip'...")
    
    # Create the zip archive
    shutil.make_archive(zip_filepath, 'zip', output_directory_path)
    
    print("\\n" + "="*50)
    print("✅ Success! Your data has been compressed.")
    print(f"Your zip file is ready: {zip_filename}.zip")
    print("="*50)
    
    # Optional: Trigger automatic download in Google Colab
    try:
        print("\\nTriggering file download... (This may take a while for large datasets)")
        files.download(f"{zip_filepath}.zip")
    except NameError:
        print("\\nNOTE: Automatic download is only available in Google Colab.")
        print(f"You can find your archive at: {zip_filepath}.zip")

else:
    print(f"❌ Error: Could not find the output directory '{output_directory_path}'.")
    print("Please make sure you have run the generation cell (Cell 3) first.")
