[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cyberviser/Hancock/blob/main/Hancock_Colab_Finetune_v3.ipynb)

# 🔐 Hancock Fine-Tuning — CyberViser
**Mistral 7B → Cybersecurity specialist via LoRA**

| Step | Time | Notes |
|------|------|-------|
| Install deps | ~3 min | Unsloth + TRL |
| Load 4-bit model | ~2 min | 7B params, 4GB VRAM |
| Train 3 epochs | ~45 min | v3 dataset on T4 (CISA KEV + Atomic + GHSA) |
| Export GGUF Q4 | ~5 min | Ready for Ollama |

> **Runtime → Change runtime type → T4 GPU** before running!

In [None]:
# @title 1️⃣  Install Dependencies
# Pin versions for compatibility
!pip install -q "unsloth[colab-new]" \
    "trl>=0.8.6,<0.10" \
    "transformers>=4.40,<4.46" \
    "datasets>=2.18" \
    "peft" "huggingface_hub" "accelerate"
import importlib, pkg_resources
for pkg in ['unsloth','trl','transformers','datasets','peft']:
    v = pkg_resources.get_distribution(pkg).version
    print(f'  {pkg}: {v}')
print('✅ Deps installed')

In [None]:
# @title 2️⃣  Clone Hancock Repo
import os
!git clone https://github.com/cyberviser/Hancock.git /content/Hancock
os.chdir('/content/Hancock')
print('✅ Repo cloned')

In [None]:
# @title 3️⃣  Check GPU
import torch
gpu = torch.cuda.get_device_name(0)
vram = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f'GPU: {gpu} | VRAM: {vram:.1f} GB')
assert torch.cuda.is_available(), 'Enable GPU runtime first!'

In [None]:
# @title 4️⃣  Load Training Data
import json
from pathlib import Path

# Try v3 first (CISA KEV + Atomic + GHSA), fall back to v2
dataset_path = Path('data/hancock_v3.jsonl')
if not dataset_path.exists():
    dataset_path = Path('data/hancock_v2.jsonl')
if not dataset_path.exists():
    print('Generating v3 dataset (takes ~5 min)...')
    !python hancock_pipeline.py --phase 3
    dataset_path = Path('data/hancock_v3.jsonl')
    if not dataset_path.exists():
        dataset_path = Path('data/hancock_v2.jsonl')

lines = dataset_path.read_text().strip().splitlines()
data  = [json.loads(l) for l in lines]
print(f'✅ Loaded {len(data):,} training samples from {dataset_path.name}')
print('Sample:', json.dumps(data[0]['messages'][1], indent=2)[:200])

In [None]:
# @title 5️⃣  Load Mistral 7B (4-bit)
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name     = 'mistralai/Mistral-7B-Instruct-v0.3',
    max_seq_length = 2048,
    dtype          = None,
    load_in_4bit   = True,
)
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'],
    lora_alpha=32, lora_dropout=0.05, bias='none',
    use_gradient_checkpointing='unsloth', random_state=42,
)
print(f'✅ Trainable params: {model.num_parameters(only_trainable=True):,}')

In [None]:
# @title 6️⃣  Format Dataset
from datasets import Dataset

texts = [
    tokenizer.apply_chat_template(s['messages'], tokenize=False, add_generation_prompt=False)
    for s in data
]
ds = Dataset.from_dict({'text': texts}).train_test_split(test_size=0.05, seed=42)
print(f'Train: {len(ds["train"]):,} | Eval: {len(ds["test"]):,}')
print('\nSample formatted text:')
print(texts[0][:400])

In [None]:
# @title 7️⃣  Train (~45 min on T4)
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    per_device_train_batch_size  = 2,
    gradient_accumulation_steps  = 4,
    warmup_ratio                 = 0.05,
    num_train_epochs             = 3,
    learning_rate                = 2e-4,
    fp16                         = not torch.cuda.is_bf16_supported(),
    bf16                         = torch.cuda.is_bf16_supported(),
    logging_steps                = 20,
    eval_strategy                = 'steps',  # transformers>=4.45
    eval_steps                   = 100,
    save_strategy                = 'steps',
    save_steps                   = 200,
    save_total_limit             = 2,
    output_dir                   = '/content/hancock_checkpoints',
    report_to                    = 'none',
    optim                        = 'adamw_8bit',
    weight_decay                 = 0.01,
    lr_scheduler_type            = 'cosine',
    seed                         = 42,
)

trainer = SFTTrainer(
    model               = model,
    tokenizer           = tokenizer,
    train_dataset       = ds['train'],
    eval_dataset        = ds['test'],
    dataset_text_field  = 'text',
    max_seq_length      = 2048,
    packing             = True,
    args                = training_args,
)

# Show initial memory usage
gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
used    = torch.cuda.memory_allocated(0) / 1e9
print(f'VRAM: {used:.1f}/{gpu_mem:.1f} GB used before training')

result = trainer.train()
print(f'\n✅ Done! Final loss: {result.training_loss:.4f}')
print(f'Steps: {result.global_step} | Samples/sec: {result.training_loss:.4f}')

In [None]:
# @title 8️⃣  Save LoRA + GGUF Q4
import os

# Always save LoRA adapters (fast, always works)
model.save_pretrained('/content/hancock_lora')
tokenizer.save_pretrained('/content/hancock_lora')
print('✅ LoRA adapters saved → /content/hancock_lora')
print(f'   Size: {sum(os.path.getsize(os.path.join("/content/hancock_lora",f)) for f in os.listdir("/content/hancock_lora")) / 1e6:.1f} MB')

# Export GGUF Q4_K_M (requires ~10 min + llama.cpp build)
try:
    model.save_pretrained_gguf('/content/hancock_gguf', tokenizer,
                               quantization_method='q4_k_m')
    print('✅ GGUF Q4_K_M saved → /content/hancock_gguf')
except Exception as e:
    print(f'⚠️  GGUF export failed (non-fatal): {e}')
    print('   LoRA adapters are saved — use them directly or convert later.')
    print('   Convert offline: python -m llama_cpp.convert ...')

In [None]:
# @title 9️⃣  Test the Fine-Tuned Model
from unsloth import FastLanguageModel
FastLanguageModel.for_inference(model)

messages = [
    {'role': 'system', 'content': 'You are Hancock, an elite cybersecurity AI by CyberViser.'},
    {'role': 'user', 'content': 'Explain CVE-2021-44228 Log4Shell and how to detect it in Splunk.'},
]
inputs = tokenizer.apply_chat_template(
    messages, tokenize=True, add_generation_prompt=True, return_tensors='pt'
).to('cuda')

with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs, max_new_tokens=512,
        use_cache=True, temperature=0.7, do_sample=True,
    )

# Decode only the generated tokens (not the prompt)
response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
print('Hancock says:')
print('=' * 60)
print(response)

In [None]:
# @title 🔟  Push to HuggingFace Hub (optional)
HF_TOKEN = ''  # @param {type:'string'}
if HF_TOKEN:
    model.push_to_hub('cyberviser/hancock-mistral-7b', token=HF_TOKEN)
    tokenizer.push_to_hub('cyberviser/hancock-mistral-7b', token=HF_TOKEN)
    print('✅ Pushed to https://huggingface.co/cyberviser/hancock-mistral-7b')
else:
    print('Skipped — add your HF_TOKEN to push')

In [None]:
# @title 1️⃣1️⃣  Download GGUF to local (for Ollama)
from google.colab import files
import os
for f in os.listdir('/content/hancock_gguf'):
    if f.endswith('.gguf'):
        print(f'Downloading {f}...')
        files.download(f'/content/hancock_gguf/{f}')