 ## Setup and Dependencies

In [3]:
# Cell 1: Install Dependencies
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# Install core libraries without dependency conflicts
!pip install --no-deps "xformers" trl peft accelerate bitsandbytes
# Critical fix: Reinstall compatible protobuf version
!pip install protobuf==3.20.3 --force-reinstall --no-deps
# Install llama-cpp-python for necessary build environment tools
!pip install llama-cpp-python --upgrade --no-cache-dir

# Import Drive for persistence (must be run first in a new session)
from google.colab import drive
drive.mount('/content/drive')

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-1m_tjk9q/unsloth_4c1c52aa34d34b0fbefc43bd52cb6ae6
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-1m_tjk9q/unsloth_4c1c52aa34d34b0fbefc43bd52cb6ae6
  Resolved https://github.com/unslothai/unsloth.git to commit 8c155a2c07fc6521c869e44fb5f43b93fc0b81ff
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.10.1 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.10.1-py3-none-any.whl.metadata (31 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.gi

Collecting xformers
  Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl (117.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.2/117.2 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xformers
Successfully installed xformers-0.0.32.post2
Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.5
    Uninstalling protobuf-5.29.5:
      Successfully uninstalled protobuf-5.29.5
Successfully installed protobuf-3.20.3


Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.16.tar.gz (50.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 MB[0m [31m252.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m198.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.16-cp312-cp312-linux_x86_64.whl size=4503283 sha256=102243677260aa62

## Load and Format Data

In [1]:
# Cell 2: Load and Format Data
from datasets import load_dataset
import torch

# Load your prepared dataset from the dataset.jsonl file
dataset = load_dataset("json", data_files="ltl_training_dataset.jsonl", split="train")

# Define the comprehensive system prompt for the LTL task
def format_instruction(example):
    instruction = example['instruction']
    output = example['output']

    system_context = """You are a specialized translator that converts natural language drone commands into Linear Temporal Logic (LTL) formulas.

Available LTL operators:
- F(φ) = eventually φ will be true
- G(φ) = φ is always true
- X(φ) = φ is true in next step
- φ & ψ = both φ and ψ are true
- φ | ψ = either φ or ψ is true
- !φ = φ is not true
- φ U ψ = φ until ψ

Available predicates and actions:
- at(location): drone at specific location
- near(location, radius): drone within radius of location
- above(altitude): drone above altitude threshold
- below(altitude): drone below altitude threshold
- move_to(location): navigate to location
- hover(duration): maintain position for seconds
- scan(area): perform sensor sweep
- emergency_return(): return to start position
- land(): controlled landing
- clear_of(obstacle): maintain distance from obstacle
- in_bounds(): stay within flight zone
- battery_level(threshold): battery above threshold
- moving(): drone is in motion
- stationary(): drone is not moving

Available waypoints: landing_pad, waypoint_a, waypoint_b, waypoint_c, area_1, obstacle_1, obstacle_2

Respond only with the LTL formula."""

    return {
        "text": f"[INST] {system_context}\n\nTranslate this drone command to LTL: {instruction} [/INST] {output}"
    }

# Apply the formatting and split the dataset
dataset = dataset.map(format_instruction)
dataset = dataset.train_test_split(test_size=0.2, seed=42)

train_dataset = dataset["train"]
eval_dataset = dataset["test"]

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(eval_dataset)}")

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/550 [00:00<?, ? examples/s]

Training samples: 440
Validation samples: 110


##  Load Model, Tokenizer, and LoRA

In [4]:
# Cell 3: Load Model & LoRA Setup
from unsloth import FastLanguageModel

model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
max_seq_length = 512

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

# Optimized LoRA configuration
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
)
print("Model loaded and LoRA adapters attached.")

 Unsloth: Will patch your computer to enable 2x faster free finetuning.
 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.10.1: Fast Mistral patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2025.10.1 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Model loaded and LoRA adapters attached.


## Setup and Run SFTTrainer (Training)

In [5]:
# Cell 4: Run Training
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback

# Set necessary padding tokens and side
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=512,
    packing=False,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
    args=TrainingArguments(
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        gradient_accumulation_steps=8,
        learning_rate=2e-5,
        num_train_epochs=8,
        warmup_steps=10,
        eval_strategy="steps",
        eval_steps=20,
        save_strategy="steps",
        save_steps=20,
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_8bit",
        seed=3407,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        dataloader_drop_last=True,
        remove_unused_columns=True,
        group_by_length=True,
    ),
)

print("Starting LTL translation training...")
trainer_stats = trainer.train()
print("Training complete! Best model weights loaded.")

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/440 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/110 [00:00<?, ? examples/s]

Starting LTL translation training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 440 | Num Epochs = 8 | Total steps = 224
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040 of 7,283,675,136 (0.58% trained)
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


wandb: Paste an API key from your profile and hit enter:

 ··········


wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33misraelavendanojr[0m ([33misraelavendanojr-western-washington-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
20,0.7188,0.662371
40,0.1118,0.126502
60,0.0792,0.086073
80,0.0554,0.077292
100,0.0719,0.072884
120,0.074,0.069335
140,0.0529,0.067239
160,0.0678,0.065554
180,0.0504,0.064601
200,0.0546,0.06375


Unsloth: Not an error, but MistralForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


Training complete! Best model weights loaded.


## Training Persistence and Merging

In [6]:
# Cell 5: Save Model to Google Drive & Merge Weights

import os

# --- Configuration ---
LORA_OUTPUT_DIR = "/content/drive/MyDrive/LTL_FineTuned_Model/ltl_adapter_weights"
FULL_MERGED_DIR = "/content/ltl_translation_gguf"

# 1. Save LoRA Weights to Google Drive (Small, FAST Backup)
os.makedirs(LORA_OUTPUT_DIR, exist_ok=True)
model.save_pretrained(LORA_OUTPUT_DIR)
tokenizer.save_pretrained(LORA_OUTPUT_DIR)
print(f"LoRA adapter weights saved to Google Drive at: {LORA_OUTPUT_DIR}")

# 2. Save the Full Merged 16-bit Model Locally (Input for GGUF)
# This creates the pytorch_model.bin file needed by the converter.
os.makedirs(FULL_MERGED_DIR, exist_ok=True)
model.save_pretrained_merged(FULL_MERGED_DIR, tokenizer, save_method = "merged_16bit")
print(f"Full merged model (pytorch_model.bin) saved locally to: {FULL_MERGED_DIR}")

LoRA adapter weights saved to Google Drive at: /content/drive/MyDrive/LTL_FineTuned_Model/ltl_adapter_weights


config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00003.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  33%|███▎      | 1/3 [04:34<09:08, 274.49s/it]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  67%|██████▋   | 2/3 [07:17<03:28, 208.96s/it]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|██████████| 3/3 [09:49<00:00, 196.60s/it]
Unsloth: Merging weights into 16bit: 100%|██████████| 3/3 [06:29<00:00, 129.73s/it]


Unsloth: Merge process complete. Saved to `/content/ltl_translation_gguf`
Full merged model (pytorch_model.bin) saved locally to: /content/ltl_translation_gguf


## Recovery Cell (Run only if session restarts)

In [None]:
# Cell 6: RECOVERY CELL (Run ONLY if you lost the runtime after training)

from unsloth import FastLanguageModel
from google.colab import drive

# Re-mount Drive (needed after restart)
drive.mount('/content/drive')

# --- Configuration ---
LORA_OUTPUT_DIR = "/content/drive/MyDrive/LTL_FineTuned_Model/ltl_adapter_weights"
model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
max_seq_length = 512
FULL_MERGED_DIR = "/content/ltl_translation_gguf"

# 1. Load the Base Model and LoRA adapters from Drive
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
    # Load the saved adapter weights
    adapter_path=LORA_OUTPUT_DIR,
)

# 2. Merge the model locally for GGUF conversion input
model.save_pretrained_merged(FULL_MERGED_DIR, tokenizer, save_method = "merged_16bit")

print("Model recovered from Google Drive and merged locally. Ready for GGUF conversion.")

## Guaranteed GGUF Export (Manual Shell Method)

In [22]:
# Cell 7: CORRECTED - Convert Merged Model to GGUF (THE WORKING WORKFLOW)

import os

# --- Configuration ---
MERGED_MODEL_DIR = "/content/ltl_translation_gguf"  # This has your merged 16-bit model
QUANTIZATION_METHOD = "Q4_K_M"
FINAL_GGUF_FILE = f"{MERGED_MODEL_DIR}/unsloth_{QUANTIZATION_METHOD}.gguf"
HF_TO_GGUF_SCRIPT = "/content/llama.cpp/convert_hf_to_gguf.py"

print("=" * 80)
print("CORRECTED GGUF CONVERSION WORKFLOW")
print("=" * 80)

# Step 1: Clean start - remove llama.cpp and clone fresh
print("\n--- Step 1: Setting up llama.cpp ---")
!rm -rf llama.cpp
!git clone --recursive https://github.com/ggerganov/llama.cpp
!cd llama.cpp && cmake -B build && cmake --build build --config Release
!pip install -r llama.cpp/requirements.txt --quiet
print(" llama.cpp compiled successfully")

# Step 2: Convert merged HF model to GGUF F16 format
print("\n--- Step 2: Converting merged model to GGUF F16 ---")
F16_GGUF_FILE = f"{MERGED_MODEL_DIR}/unsloth_F16.gguf"

!python3 {HF_TO_GGUF_SCRIPT} \
    {MERGED_MODEL_DIR} \
    --outtype f16 \
    --outfile {F16_GGUF_FILE}

if not os.path.exists(F16_GGUF_FILE):
    print(" ERROR: F16 GGUF conversion failed!")
    raise FileNotFoundError(f"Could not create {F16_GGUF_FILE}")
else:
    size_gb = os.path.getsize(F16_GGUF_FILE) / (1024**3)
    print(f" F16 GGUF created successfully: {size_gb:.2f} GB")

# Step 3: Quantize F16 GGUF to Q4_K_M using llama-quantize
print(f"\n--- Step 3: Quantizing to {QUANTIZATION_METHOD} ---")

!cd llama.cpp/build/bin && ./llama-quantize \
    {F16_GGUF_FILE} \
    {FINAL_GGUF_FILE} \
    {QUANTIZATION_METHOD}

# Step 4: Verify final output
print("\n" + "=" * 80)
if os.path.exists(FINAL_GGUF_FILE):
    final_size_gb = os.path.getsize(FINAL_GGUF_FILE) / (1024**3)
    print(" SUCCESS! Your LTL model is ready for llama.cpp!")
    print(f" Final Model: {FINAL_GGUF_FILE}")
    print(f" Size: {final_size_gb:.2f} GB")
    print(f" Format: {QUANTIZATION_METHOD}")
    print("\n Ready to download! Run the next cell.")
else:
    print(" QUANTIZATION FAILED")
    print(f"Expected file not found: {FINAL_GGUF_FILE}")
print("=" * 80)

CORRECTED GGUF CONVERSION WORKFLOW

--- Step 1: Setting up llama.cpp ---
Cloning into 'llama.cpp'...
remote: Enumerating objects: 64335, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 64335 (delta 1), reused 1 (delta 1), pack-reused 64332 (from 1)[K
Receiving objects: 100% (64335/64335), 169.32 MiB | 21.09 MiB/s, done.
Resolving deltas: 100% (46794/46794), done.
-- The C compiler identification is GNU 11.4.0
-- The CXX compiler identification is GNU 11.4.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
[0mCMAKE_BUILD_TYPE=Release[0m
-- Found Git: /usr/bin/

## Download Final Model

In [None]:
# Cell 8: Download Final GGUF Model (CORRECTED)

from google.colab import files
import os

# FIXED: Correct filename with underscore
MODEL_FILE = "/content/ltl_translation_gguf/unsloth_Q4_K_M.gguf"

print("=" * 80)
print("DOWNLOADING YOUR LTL TRANSLATION MODEL")
print("=" * 80)

if os.path.exists(MODEL_FILE):
    file_size_gb = os.path.getsize(MODEL_FILE) / (1024**3)
    print(f"\nModel found!")
    print(f"File: {MODEL_FILE}")
    print(f"Size: {file_size_gb:.2f} GB")
    print(f"\nStarting download... (This will take a few minutes)")
    print("The file will appear in your browser's download folder.\n")

    files.download(MODEL_FILE)

    print("=" * 80)
    print("DOWNLOAD COMPLETE!")
    print("=" * 80)
    print("\nNext Steps:")
    print("1. The file is now in your Downloads folder")
    print("2. Test it with: llama.cpp/llama-cli -m unsloth_Q4_K_M.gguf -p 'Your prompt'")
    print("3. For LTL translation, use the same system prompt from training")
    print("=" * 80)
else:
    print(f"\nERROR: File not found at expected location")
    print(f"Expected: {MODEL_FILE}")
    print("\nChecking directory contents...")

    dir_path = "/content/ltl_translation_gguf"
    if os.path.exists(dir_path):
        print(f"\nFiles in {dir_path}:")
        for file in os.listdir(dir_path):
            if file.endswith('.gguf'):
                full_path = os.path.join(dir_path, file)
                size_gb = os.path.getsize(full_path) / (1024**3)
                print(f"  • {file} ({size_gb:.2f} GB)")
    else:
        print(f"Directory {dir_path} does not exist!")

    print("\nIf you see a .gguf file above, update MODEL_FILE variable to match it.")

DOWNLOADING YOUR LTL TRANSLATION MODEL

 Model found!
 File: /content/ltl_translation_gguf/unsloth_Q4_K_M.gguf
 Size: 4.07 GB

 Starting download... (This will take a few minutes)
 The file will appear in your browser's download folder.



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

 DOWNLOAD COMPLETE!

 Next Steps:
1. The file is now in your Downloads folder
2. Test it with: llama.cpp/llama-cli -m unsloth_Q4_K_M.gguf -p 'Your prompt'
3. For LTL translation, use the same system prompt from training
