## Install Dependencies

In [None]:
!pip install transformers peft accelerate sentencepiece huggingface_hub
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install bitsandbytes==0.48.2

## Mount Google Drive & Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Configuration

In [None]:
import torch
import json
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

# Model Configuration
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
OUTPUT_DIR = "./minecraft-phi3-bot"

# LoRA Parameters
LORA_R = 32
LORA_ALPHA = 64
LORA_DROPOUT = 0.05

# Training Parameters
EPOCHS = 3
BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
MAX_SEQ_LENGTH = 2048
WARMUP_STEPS = 50

## Load and Prepare Dataset

In [None]:
dataset = load_dataset('json', data_files='minecraft_dataset_cleaned.jsonl', split='train')

## Format Data for Phi-3

In [None]:
def format_phi3_prompt(example):
    """
    Format data for Phi-3 using its chat template:
    <|user|>\nUser message<|end|>\n<|assistant|>\nAssistant response<|end|>
    """

    # prompt + response
    if 'prompt' in example and 'response' in example:
        prompt = example['prompt']
        response = example['response']

        # Skip if None
        if prompt is None or response is None:
            return {"text": ""}

        prompt = prompt.strip()
        response = response.strip()

        # Add memory context if present
        memory_note = ""
        if example.get('memoryRecall'):
            memory_note = " [Using Memory]"

        # Phi-3 chat format
        text = f"""<|system|>
You are an expert Minecraft assistant with memory capabilities. You help players with mining, crafting, building, combat, farming, and navigation. You can recall past events and locations.<|end|>
<|user|>
{prompt}{memory_note}<|end|>
<|assistant|>
{response}<|end|>"""

    # plan (multi-step actions)
    elif 'plan' in example and example['plan'] is not None:
        plan = example['plan']
        plan_json = json.dumps(plan, indent=2)

        # Extract task description
        first_action = plan[0] if plan else {}
        module = first_action.get('module', 'action')
        action = first_action.get('action', 'execute')

        text = f"""<|system|>
You are an expert Minecraft assistant. Create detailed action plans.<|end|>
<|user|>
Execute {module} {action}<|end|>
<|assistant|>
{plan_json}<|end|>"""

    # FALLBACK
    else:
        text = f"<|system|>You are a Minecraft assistant.<|end|><|user|>{str(example)}<|end|><|assistant|><|end|>"

    return {"text": text}

# Apply formatting
dataset = dataset.map(format_phi3_prompt, remove_columns=dataset.column_names)

# Remove empty examples
dataset = dataset.filter(lambda x: len(x['text']) > 10)

# Split into train/validation
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = dataset['train']
eval_dataset = dataset['test']

## Load Phi-3 Model

In [None]:
# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    add_eos_token=True,
    use_fast=True
)

tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.unk_token_id
tokenizer.padding_side = 'right'

# Load model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    attn_implementation="eager",
)

model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
def tokenize_function(examples):
    result = tokenizer(
        examples['text'],
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        padding=False,
    )
    return result

train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=train_dataset.column_names,
    desc="Tokenizing training data"
)

eval_dataset = eval_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=eval_dataset.column_names,
    desc="Tokenizing validation data"
)

## Configure LoRA

In [None]:
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

# Configuring LoRA
peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

# Apply LoRA
model = get_peft_model(model, peft_config)

# Enable gradient checkpointing for memory efficiency
model.gradient_checkpointing_enable()

## Training Configuration

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_steps=WARMUP_STEPS,
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    fp16=True,
    max_grad_norm=0.3,
    group_by_length=True,
    report_to="none",
    load_best_model_at_end=True,
    ddp_find_unused_parameters=False,
    remove_unused_columns=False,
)

## Initialize Trainer

In [None]:
from transformers import Trainer, DataCollatorForLanguageModeling

# data collator with padding
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8,
)

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    data_collator=data_collator,
)

## Start Training

In [None]:
trainer.train()

## Save Model

In [None]:
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

### Test the Model

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gc

MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
ADAPTER_PATH = "/content/drive/MyDrive/CraftAI/minecraft-phi3-bot"

# Clear memory
torch.cuda.empty_cache()
gc.collect()

# Loading Model
try:
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )
except RuntimeError as e:
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16,
        device_map="cpu",
        trust_remote_code=True,
    )
model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
model.eval()

# Loading tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True
)

def generate_response(prompt, max_tokens=150):
    """Generate response from Phi-3"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.95,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            use_cache = False        )

    # Decoding only the necessary tokens (excluding input prompt)
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response

# Test prompts
test_cases = [
    "Check if we have enough sticks. We ran out yesterday.",
    "Go back to the iron cave we found yesterday and mine more.",
    "Defend the farm again from those mobs.",
    "Mine 5 iron ores and return to me.",
    "Craft a diamond pickaxe.",
]


for i, user_message in enumerate(test_cases, 1):
    print(f"Test {i}/{len(test_cases)}")
    print(f"User: {user_message}")

    # Format as Phi-3 prompt
    prompt = f"""<|system|>
You are an expert Minecraft assistant with memory capabilities.<|end|>
<|user|>
{user_message}<|end|>
<|assistant|>
"""

    try:
        # Generate response
        bot_response = generate_response(prompt, max_tokens=100)

        # Clean up response
        if "<|end|>" in bot_response:
            bot_response = bot_response.split("<|end|>")[0].strip()

        # Remove any remaining special tokens
        bot_response = bot_response.replace("<|assistant|>", "").strip()

        print(f"Bot: {bot_response}")

    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Clean up memory
del model
del base_model
torch.cuda.empty_cache()
gc.collect()

### Converting to GGUF

In [None]:
import os
import sys
import shutil
import gc
from pathlib import Path

# Paths
ADAPTER_DRIVE = "/content/drive/MyDrive/CraftAI/minecraft-phi3-bot/"
TEMP = "/tmp"
ADAPTER_TEMP = f"{TEMP}/adapters"
MERGED_TEMP = f"{TEMP}/merged"
OUTPUT_GGUF = f"{TEMP}/minecraft-bot-q4.gguf"


# Clean temp
if os.path.exists(ADAPTER_TEMP):
    shutil.rmtree(ADAPTER_TEMP)
os.makedirs(ADAPTER_TEMP, exist_ok=True)

required_files = [
    "adapter_config.json",
    "adapter_model.safetensors",
    "README.md"
]

for filename in required_files:
    src = os.path.join(ADAPTER_DRIVE, filename)
    dst = os.path.join(ADAPTER_TEMP, filename)

    if os.path.exists(src):
        size_mb = os.path.getsize(src) / (1024*1024)
        print(f"   â€¢ {filename} ({size_mb:.1f}MB)", end="")
        shutil.copy2(src, dst)
    else:
        print(f"  {filename} (not found)")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct"

torch.cuda.empty_cache()
gc.collect()

# Base Model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="cpu",
    trust_remote_code=True,
    low_cpu_mem_usage=True,
)

# Adapters
model = PeftModel.from_pretrained(model, ADAPTER_TEMP)

# Merging weights
model = model.merge_and_unload()

os.makedirs(MERGED_TEMP, exist_ok=True)
model.save_pretrained(MERGED_TEMP, safe_serialization=True, max_shard_size="5GB")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.save_pretrained(MERGED_TEMP)

# Free memory
del model
torch.cuda.empty_cache()
gc.collect()

In [None]:
import subprocess
!pip install -q huggingface_hub gguf

LLAMA_TEMP = f"{TEMP}/llama.cpp"
!git clone --quiet --depth 1 https://github.com/ggerganov/llama.cpp.git {LLAMA_TEMP}
!pip install -q -r {LLAMA_TEMP}/requirements.txt

# Convert to FP16
fp16_temp = f"{TEMP}/minecraft-fp16.gguf"

try:
    !python {LLAMA_TEMP}/convert_hf_to_gguf.py {MERGED_TEMP} \
        --outfile {fp16_temp} \
        --outtype f16

except Exception as e:
    print(f"Conversion error: {e}")
    sys.exit(1)

In [None]:
# Quantize to Q4_K_M
%cd {LLAMA_TEMP}

!sudo apt-get update -y
!sudo apt-get install -y build-essential cmake

# Build llama.cpp
!cmake -B build
!cmake --build build --config Release -j $(nproc)

QUANT_BIN = f"{LLAMA_TEMP}/build/bin/llama-quantize"

!{QUANT_BIN} {fp16_temp} {OUTPUT_GGUF} Q4_K_M

%cd /content

In [None]:
!pip install -q pydrive2

from google.colab import files
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()

gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

file = drive.CreateFile({'title': 'minecraft-bot-q4.gguf'})
file.SetContentFile("OUTPUT_GGUF")
file.Upload()

print("Uploaded successfully!")
