# HouseBrain: Senior Architect (v4 - Llama-3) Fine-tuning

This notebook is dedicated to fine-tuning the `meta-llama/Meta-Llama-3-8B-Instruct` model to act as our **Senior Architect v4**. 

**Objective:** To create a model that can identify and correct architectural, geometric, and functional flaws in a given JSON house plan. This is part of a two-track experiment to compare against the Qwen model.

**Dataset:** This training run uses `finetune_dataset_senior_architect_v4.jsonl`, which contains over 6,000 examples of `(flawed_plan -> corrected_plan)` pairs.


## 1. Setup and Authentication


In [None]:
# Mount Google Drive to access files
from google.colab import drive
drive.mount('/content/drive')

# Install necessary libraries
!pip install -q "transformers[torch]" peft bitsandbytes datasets accelerate

# Login to Hugging Face Hub. You will need a token with write access.
# Also, you MUST request access to the Llama-3 model on Hugging Face.
from huggingface_hub import notebook_login
notebook_login()


## 2. Clone Repository & Load Dataset


In [None]:
import os

# Clone the repository to access the dataset
repo_path = '/content/HouseBrainLLM'
if not os.path.exists(repo_path):
    !git clone https://github.com/Vinay-O/HouseBrainLLM.git
else:
    print('Repository already cloned.')

os.chdir(repo_path)
# Make sure we are on the main branch and have the latest files
!git checkout main
!git pull

from datasets import load_dataset

# Load our v4 dataset
dataset_path = '/content/HouseBrainLLM/finetune_dataset_senior_architect_v4.jsonl'
full_dataset = load_dataset('json', data_files=dataset_path, split='train')

print("\n✅ Dataset loaded successfully!")
print(f"   Number of samples: {len(full_dataset)}")
print("\nSample entry:")
print(full_dataset[0])


## 3. Model Preparation and Tokenization


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

# Configure quantization to load the model in 4-bit, which saves memory
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
# Llama-3 does not have a padding token, so we set it to the end-of-sequence token
tokenizer.pad_token = tokenizer.eos_token

# Load the base model with our quantization config
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quantization_config,
    device_map="auto",
    trust_remote_code=True
)

# This function formats and tokenizes our dataset entries
def formatting_and_tokenizing_func(example):
    # This strict system prompt is crucial for ensuring the model ONLY outputs JSON
    system_prompt = {
        "role": "system", 
        "content": "You are an expert AI architect. Your only task is to review a flawed house plan and output the corrected version as a single, raw JSON object. Do not add any conversational text or markdown."
    }
    
    # The user provides the flawed plan
    user_prompt = {"role": "user", "content": example['messages'][1]['content']}
    
    # The assistant provides the corrected plan
    assistant_response = {"role": "assistant", "content": example['messages'][2]['content']}

    # We create a single list of dictionaries for the conversation
    conversation = [system_prompt, user_prompt, assistant_response]
    
    # The tokenizer's `apply_chat_template` method formats this conversation into the model's expected input format
    tokenized_outputs = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=False, return_tensors="pt")
    
    return {'input_ids': tokenized_outputs.squeeze(0)}

# Apply the function to our entire dataset
tokenized_dataset = full_dataset.map(formatting_and_tokenizing_func, remove_columns=list(full_dataset.features))

print("\n✅ Dataset formatted and tokenized.")
print("\nSample tokenized entry:")
print(tokenized_dataset[0])


## 4. PEFT/LoRA Configuration


In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

# Prepare the model for 4-bit training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# Configure LoRA for Llama-3. Note the different `target_modules`.
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply the LoRA config to our model
peft_model = get_peft_model(model, lora_config)

print("\n✅ PEFT model created successfully.")
peft_model.print_trainable_parameters()


## 5. Training


In [None]:
from transformers import TrainingArguments, Trainer

# Define the arguments for the training process
training_args = TrainingArguments(
    output_dir="housebrain_senior_architect_v4_llama3_adapters",
    per_device_train_batch_size=4, # Adjust based on GPU memory (A100 can handle 4 or 8)
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=25, # Log metrics every 25 steps
    num_train_epochs=1, # A single epoch is often sufficient for a large dataset
    save_strategy="epoch",
    bf16=True, # Use bfloat16 for faster training on compatible GPUs (like A100)
    push_to_hub=True, # Push the final adapters to Hugging Face Hub
    report_to="wandb" # Optional: for logging to Weights & Biases
)

# Create the Trainer instance
trainer = Trainer(
    model=peft_model,
    train_dataset=tokenized_dataset,
    args=training_args,
)

print("🚀 Starting Senior Architect (v4 - Llama-3) training...")
# Start the training process
trainer.train()


## 6. (Optional) Save and Archive Adapters

If you want to save the adapters locally in Colab for later download.


In [None]:
import shutil

# The directory where the adapters are saved
adapter_dir = "housebrain_senior_architect_v4_llama3_adapters"

# Create a zip archive of the adapters
shutil.make_archive(adapter_dir, 'zip', adapter_dir)

print(f"\n✅ Adapters archived to {adapter_dir}.zip")
