# Hancock Fine-Tune — CyberViser
**LoRA fine-tune of Mistral 7B Instruct on the Hancock pentest+SOC dataset**

Runtime: `Runtime → Change runtime type → T4 GPU` (free tier is fine)

Steps:
1. Install dependencies
2. Upload `hancock_v2.jsonl`
3. Load Mistral 7B with 4-bit quantization
4. Apply LoRA adapter
5. Train with early stopping
6. Save & download adapter

In [None]:
# ── Cell 1: Check GPU ─────────────────────────────────────────────────────────
import subprocess
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
print(result.stdout if result.returncode == 0 else 'No GPU found — change runtime to T4')

In [None]:
# ── Cell 2: Install dependencies ──────────────────────────────────────────────
# This takes ~3-4 minutes on first run
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q trl transformers accelerate datasets peft bitsandbytes
print('✅ Dependencies installed')

In [None]:
# ── Cell 3: Upload dataset ────────────────────────────────────────────────────
# Upload hancock_v2.jsonl from your machine
from google.colab import files
print('Upload hancock_v2.jsonl from: /home/kalibodo/cyberviser/data/hancock_v2.jsonl')
uploaded = files.upload()

import json, pathlib
dataset_path = pathlib.Path('hancock_v2.jsonl')
samples = [json.loads(l) for l in dataset_path.read_text().splitlines() if l.strip()]
print(f'✅ Loaded {len(samples):,} samples')
print('Sample keys:', list(samples[0].keys()))

In [None]:
# ── Cell 4: Config ────────────────────────────────────────────────────────────
MODEL_NAME      = 'mistralai/Mistral-7B-Instruct-v0.3'
MAX_SEQ_LENGTH  = 4096
LORA_R          = 16
LORA_ALPHA      = 32
MAX_STEPS       = 300   # ~1hr on T4 — increase to 500 for better results
BATCH_SIZE      = 2
GRAD_ACCUM      = 4     # effective batch = 8
LEARNING_RATE   = 2e-4
ES_PATIENCE     = 3     # early stopping patience
OUTPUT_DIR      = 'hancock-adapter'

print(f'Model: {MODEL_NAME}')
print(f'LoRA rank: {LORA_R} | Max steps: {MAX_STEPS} | Effective batch: {BATCH_SIZE * GRAD_ACCUM}')

In [None]:
# ── Cell 5: Load model with 4-bit quantization ────────────────────────────────
from unsloth import FastLanguageModel
import torch

print(f'Loading {MODEL_NAME} with 4-bit quantization...')
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = MODEL_NAME,
    max_seq_length = MAX_SEQ_LENGTH,
    dtype          = None,      # auto-detect
    load_in_4bit   = True,
)
print('✅ Model loaded')
print(f'GPU memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB')

In [None]:
# ── Cell 6: Apply LoRA adapter ────────────────────────────────────────────────
model = FastLanguageModel.get_peft_model(
    model,
    r                   = LORA_R,
    target_modules      = ['q_proj', 'k_proj', 'v_proj', 'o_proj',
                            'gate_proj', 'up_proj', 'down_proj'],
    lora_alpha          = LORA_ALPHA,
    lora_dropout        = 0.05,
    bias                = 'none',
    use_gradient_checkpointing = 'unsloth',
    random_state        = 42,
)
print(f'✅ LoRA applied — trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}')

In [None]:
# ── Cell 7: Prepare dataset ───────────────────────────────────────────────────
from datasets import Dataset

def apply_chat_template(sample):
    text = tokenizer.apply_chat_template(
        sample['messages'],
        tokenize              = False,
        add_generation_prompt = False,
    )
    return {'text': text}

dataset = Dataset.from_list(samples)
dataset = dataset.map(apply_chat_template, remove_columns=['messages'])

# Filter out samples that are too long
def length_filter(sample):
    return len(tokenizer.encode(sample['text'])) <= MAX_SEQ_LENGTH

before = len(dataset)
dataset = dataset.filter(length_filter)
print(f'Samples: {before:,} → {len(dataset):,} (after length filter)')

split = dataset.train_test_split(test_size=0.05, seed=42)
train_dataset = split['train']
eval_dataset  = split['test']
print(f'Train: {len(train_dataset):,} | Eval: {len(eval_dataset):,}')

In [None]:
# ── Cell 8: Train ─────────────────────────────────────────────────────────────
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback
import pathlib

pathlib.Path(OUTPUT_DIR).mkdir(exist_ok=True)

training_args = TrainingArguments(
    output_dir                  = OUTPUT_DIR,
    num_train_epochs            = 1,
    max_steps                   = MAX_STEPS,
    per_device_train_batch_size = BATCH_SIZE,
    gradient_accumulation_steps = GRAD_ACCUM,
    warmup_steps                = 20,
    learning_rate               = LEARNING_RATE,
    fp16                        = not torch.cuda.is_bf16_supported(),
    bf16                        = torch.cuda.is_bf16_supported(),
    logging_steps               = 10,
    evaluation_strategy         = 'steps',
    eval_steps                  = 50,
    save_strategy               = 'steps',
    save_steps                  = 50,
    load_best_model_at_end      = True,
    metric_for_best_model       = 'eval_loss',
    greater_is_better           = False,
    optim                       = 'adamw_8bit',
    weight_decay                = 0.01,
    lr_scheduler_type           = 'cosine',
    report_to                   = 'none',
    run_name                    = 'hancock-v2',
)

trainer = SFTTrainer(
    model              = model,
    tokenizer          = tokenizer,
    train_dataset      = train_dataset,
    eval_dataset       = eval_dataset,
    dataset_text_field = 'text',
    max_seq_length     = MAX_SEQ_LENGTH,
    args               = training_args,
    callbacks          = [EarlyStoppingCallback(early_stopping_patience=ES_PATIENCE)],
)

print(f'Starting training — {MAX_STEPS} steps (early stopping patience: {ES_PATIENCE} evals)...')
print(f'Effective batch size: {BATCH_SIZE * GRAD_ACCUM}')
trainer_stats = trainer.train()
print(f'\n✅ Training complete in {trainer_stats.metrics["train_runtime"]:.0f}s')

In [None]:
# ── Cell 9: Evaluate final loss ───────────────────────────────────────────────
metrics = trainer.evaluate()
print(f"Final eval loss : {metrics['eval_loss']:.4f}")
print(f"Perplexity      : {2 ** metrics['eval_loss']:.2f}")

In [None]:
# ── Cell 10: Test inference ───────────────────────────────────────────────────
FastLanguageModel.for_inference(model)

SYSTEM = """You are Hancock, an elite penetration tester and offensive security specialist built by CyberViser.
You operate STRICTLY within authorized scope. You always confirm authorization before suggesting active techniques."""

test_prompt = "How do I perform Kerberoasting on an authorized Active Directory environment?"

messages = [
    {"role": "system",    "content": SYSTEM},
    {"role": "user",      "content": test_prompt},
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize              = True,
    add_generation_prompt = True,
    return_tensors        = 'pt',
).to('cuda')

from transformers import TextStreamer
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

print(f'\n[Hancock] {test_prompt}\n')
_ = model.generate(
    input_ids  = inputs,
    streamer   = streamer,
    max_new_tokens = 512,
    temperature    = 0.7,
    top_p          = 0.95,
)

In [None]:
# ── Cell 11: Save adapter ─────────────────────────────────────────────────────
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f'✅ Adapter saved to {OUTPUT_DIR}/')

import os
files_saved = os.listdir(OUTPUT_DIR)
print('Files:', files_saved)

In [None]:
# ── Cell 12: Download adapter as zip ─────────────────────────────────────────
import shutil
from google.colab import files

zip_path = shutil.make_archive('hancock-adapter', 'zip', OUTPUT_DIR)
print(f'Zipped: {zip_path}')
files.download('hancock-adapter.zip')
print('✅ Download started — save to /home/kalibodo/cyberviser/hancock-adapter/')

In [None]:
# ── Cell 13 (Optional): Push to Hugging Face Hub ─────────────────────────────
# Uncomment to push adapter to HuggingFace (private repo)

# HF_TOKEN = 'hf_...'   # your HuggingFace write token
# REPO_ID  = 'your-username/hancock-pentest-adapter'
#
# from huggingface_hub import login
# login(token=HF_TOKEN)
#
# model.push_to_hub(REPO_ID, private=True)
# tokenizer.push_to_hub(REPO_ID, private=True)
# print(f'✅ Pushed to https://huggingface.co/{REPO_ID}')

## After downloading the adapter

Extract the zip to `/home/kalibodo/cyberviser/hancock-adapter/`

Then load it locally with Ollama or transformers:

```python
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = 'mistralai/Mistral-7B-Instruct-v0.3',
    max_seq_length = 4096,
    load_in_4bit   = True,
)
model.load_adapter('hancock-adapter/')
FastLanguageModel.for_inference(model)
```

Or run the agent (uses NVIDIA NIM — no GPU needed locally):
```bash
cd /home/kalibodo/cyberviser
python hancock_agent.py
```