# Mock Run on T4 GPU: End-to-End Test

This notebook is designed for a fast, low-cost mock run of the entire "Human-in-the-Loop" workflow on a T4 GPU.

**Workflow:**
1.  **Generate a small batch of raw drafts.**
2.  **Manually refine the drafts locally** using `scripts/refine_drafts.py`.
3.  **Run a brief fine-tuning job** on the perfected data to ensure the pipeline works from start to finish.


In [None]:
# @title Step 1: Set Up the Environment
import os
# IMPORTANT: PASTE YOUR GITHUB PERSONAL ACCESS TOKEN HERE
GITHUB_TOKEN = ""
os.environ['GITHUB_TOKEN'] = GITHUB_TOKEN

# Clone the repository using your token
!git clone https://$GITHUB_TOKEN@github.com/Vinay-O/HouseBrainLLM.git housebrain_v1_1
%cd housebrain_v1_1

# Install necessary libraries
!pip install --upgrade transformers peft trl accelerate datasets bitsandbytes sentencepiece jsonschema pydantic


In [None]:
# @title Step 2: Authenticate with Hugging Face
from huggingface_hub import login
# You will be prompted to enter your Hugging Face token.
login()


In [None]:
# @title Step 3: Generate a Small Batch of Raw Drafts (T4 Optimized)

# Install Ollama if not present
!if ! command -v ollama &> /dev/null; then curl -fsSL https://ollama.com/install.sh | sh; fi

import subprocess
import time
import requests
import glob
from IPython.display import clear_output
import os

# Start Ollama server in the background
with open("ollama_server.log", "w") as log_file:
    ollama_process = subprocess.Popen(["ollama", "serve"], stdout=log_file, stderr=subprocess.STDOUT)

print("🚀 Starting Ollama server...")
time.sleep(5)

# Health check loop
print("... Waiting for Ollama server to become available...")
server_ready = False
for _ in range(36):
    try:
        response = requests.get("http://localhost:11434")
        if response.status_code == 200:
            print("✅ Ollama server is up and running!")
            server_ready = True
            break
    except requests.exceptions.ConnectionError:
        time.sleep(5)

if server_ready:
    print("\\n⏳ Downloading deepseek-coder model...")
    !ollama pull deepseek-coder:6.7b-instruct

    print("\\n⏳ Starting Raw Draft generation (4 parallel workers)...")
    processes = []
    num_workers = 4  # Reduced for T4
    num_examples = 20 # Small batch for a quick test
    output_dir = "data/training/silver_standard_raw"

    if os.path.exists(output_dir):
        get_ipython().system(f'rm -rf {output_dir}')
    os.makedirs(output_dir)

    for i in range(num_workers):
        command = f"python scripts/generate_raw_drafts.py --num-examples {num_examples} --num-workers {num_workers} --worker-id {i}"
        log_file = open(f"worker_{i}.log", "w")
        proc = subprocess.Popen(command, shell=True, stdout=log_file, stderr=subprocess.STDOUT)
        processes.append((proc, log_file))

    total_examples_to_generate = num_examples
    
    while any(p.poll() is None for p, _ in processes):
        clear_output(wait=True)
        generated_files = glob.glob(f"{output_dir}/*.json")
        progress_percentage = (len(generated_files) / total_examples_to_generate) * 100
        progress_bar = f"[{'#' * int(progress_percentage / 4)}{'.' * (25 - int(progress_percentage / 4))}]"
        
        print("--- Generating Raw Drafts (Mock Run) ---")
        print(f"Progress: {progress_bar} {len(generated_files)}/{total_examples_to_generate} raw drafts generated ({progress_percentage:.2f}%)\\n")
        get_ipython().system('tail -n 3 worker_*.log')
        
        time.sleep(15)
    
    clear_output(wait=True)
    generated_files = glob.glob(f"{output_dir}/*.json")
    print(f"--- Final Count ---")
    print(f"✅ Generated a total of {len(generated_files)} raw drafts.")
    
    for proc, log_file in processes:
        proc.wait()
        log_file.close()

    print("\\n\\n✅ Raw draft generation complete.")
    print("NEXT STEP: Download the 'data/training/silver_standard_raw' directory and use 'scripts/refine_drafts.py' locally.")

else:
    print("🔴 Ollama server failed to start.")
    get_ipython().system('cat ollama_server.log')


---
### **⛔ STOP: Manual Refinement Required ⛔**

1.  **Download the `data/training/silver_standard_raw` directory** from this Colab instance to your local machine.
2.  On your local machine, run the command: `python scripts/refine_drafts.py`
3.  Follow the interactive prompts to review, edit, and validate each draft, creating the final `data/training/silver_standard` dataset.
4.  **Upload the perfected `data/training/silver_standard` directory** to your GitHub repository.
5.  Once the perfected data is on GitHub, you may proceed with the cells below.

---


In [None]:
# @title Step 4: Prepare Refined Datasets for Fine-Tuning
# This step assumes you have completed the manual refinement process and the
# perfected data is now in the `data/training/silver_standard` directory on GitHub.

# We need to pull the latest changes from the repo to get the refined data
!git pull

!python scripts/prepare_data_for_finetuning.py \
    --input-dir data/training/gold_standard \
    --output-dir data/training/gold_standard_finetune_ready

!python scripts/prepare_data_for_finetuning.py \
    --input-dir data/training/silver_standard \
    --output-dir data/training/silver_standard_finetune_ready


In [None]:
# @title Step 5: Run Fine-Tuning (T4 Optimized Mock Run)
!python scripts/run_finetuning.py \
    --model_id "deepseek-ai/deepseek-coder-6.7b-instruct" \
    --dataset_path "data/training/gold_standard_finetune_ready" "data/training/silver_standard_finetune_ready" \
    --output_dir "models/housebrain-v1.0-mock-t4" \
    --epochs 3 \
    --batch_size 1 \
    --learning_rate 2e-4 \
    --use_4bit


In [None]:
# @title Step 6: (Optional) Download the Trained Model Adapter
!zip -r housebrain-v1.0-mock-t4-adapter.zip models/housebrain-v1.0-mock-t4

from google.colab import files
files.download('housebrain-v1.0-mock-t4-adapter.zip')


# HouseBrain Mock Run on T4 GPU

This notebook is designed for a quick, low-cost "mock run" to verify the entire data generation and fine-tuning pipeline on a standard T4 GPU.

**Changes from the main A100 notebook:**
- Generates only **5** "Silver Standard" examples.
- Uses a **batch size of 1** to fit in T4 memory.
- Trains for only **3 epochs** for a faster test run.


In [None]:
# @title Step 1: Set Up the Environment
# -----------------
# IMPORTANT: PASTE YOUR GITHUB TOKEN HERE
# -----------------
import os
GITHUB_TOKEN = "" # PASTE YOUR GITHUB TOKEN HERE
os.environ['GITHUB_TOKEN'] = GITHUB_TOKEN

# Clone the repository using your token for private access
!git clone https://$GITHUB_TOKEN@github.com/Vinay-O/HouseBrainLLM.git housebrain_v1_1
%cd housebrain_v1_1

# Install the necessary libraries
!pip install --upgrade transformers peft trl accelerate datasets bitsandbytes sentencepiece jsonschema pydantic


In [None]:
# @title Step 2: Authenticate with Hugging Face
from huggingface_hub import login
login()


In [None]:
# @title Step 3: Generate "Silver Standard" Dataset in Parallel

# Install Ollama in the Colab environment if it's not already present
!if ! command -v ollama &> /dev/null; then curl -fsSL https://ollama.com/install.sh | sh; fi

import os
import subprocess
import time
import requests

# Start the Ollama server as a background process
with open("ollama_server.log", "w") as log_file:
    ollama_process = subprocess.Popen(["ollama", "serve"], stdout=log_file, stderr=subprocess.STDOUT)

print("🚀 Starting Ollama server in the background...")
time.sleep(5)

# Health Check Loop
max_wait_time = 180
start_time = time.time()
server_ready = False
print("... Waiting for Ollama server to become available...")
while time.time() - start_time < max_wait_time:
    try:
        response = requests.get("http://localhost:11434")
        if response.status_code == 200:
            server_ready = True
            print("✅ Ollama server is up and running!")
            break
    except requests.exceptions.ConnectionError:
        time.sleep(5)
else:
    print("❌ Timed out waiting for Ollama server to start.")

# Model Download, Verification, and Parallel Data Generation
if server_ready:
    print("\\n⏳ Downloading the deepseek-coder model...")
    !ollama pull deepseek-coder:6.7b-instruct
    print("✅ Model download complete.")

    print("\\n📋 Verifying installed models...")
    !ollama list
    print("------------------------------------\\n")

    print("⏳ Starting the Silver Standard data generation process in parallel (4 workers)...")
    # Launch 4 worker processes in the background
    processes = []
    for i in range(4):
        command = f"python scripts/generate_silver_standard_data.py --num-examples 100 --num-workers 4 --worker-id {i}"
        # Redirect output to worker-specific log files
        log_file = open(f"worker_{i}.log", "w")
        proc = subprocess.Popen(command, shell=True, stdout=log_file, stderr=subprocess.STDOUT)
        processes.append((proc, log_file))

    # Wait for all processes to complete
    for proc, log_file in processes:
        proc.wait()
        log_file.close()

    print("\\n✅ All data generation workers have finished.")
    # Optional: print the logs from each worker
    # for i in range(4):
    #   print(f"--- Worker {i} Log ---")
    #   !cat worker_{i}.log
else:
    print("🔴 Ollama server failed to start. Cannot proceed.")
    print("📜 Server logs:")
    !cat ollama_server.log


In [None]:
# @title Step 4: Prepare All Datasets for Fine-Tuning

# Prepare the Gold Standard dataset
!python scripts/prepare_data_for_finetuning.py \
    --input-dir data/training/gold_standard \
    --output-dir data/training/gold_standard_finetune_ready

# Prepare the newly generated Silver Standard dataset
!python scripts/prepare_data_for_finetuning.py \
    --input-dir data/training/silver_standard \
    --output-dir data/training/silver_standard_finetune_ready


In [None]:
# @title Step 5: Run the Fine-Tuning Script (T4 Optimized)

!python scripts/run_finetuning.py \
    --model_id deepseek-ai/deepseek-coder:6.7b-instruct \
    --dataset_path data/training/gold_standard_finetune_ready data/training/silver_standard_finetune_ready \
    --output_dir models/housebrain-mock-t4 \
    --epochs 3 \
    --batch_size 1 \
    --learning_rate 0.0002 \
    --use_4bit


In [None]:
# @title Step 6: (Optional) Download the Trained Mock Model

!zip -r housebrain-mock-t4-adapter.zip models/housebrain-mock-t4

from google.colab import files
files.download('housebrain-mock-t4-adapter.zip')
