In [None]:
!pip install -q --ignore-installed llama-cpp-python flask flask-cors requests pyngrok python-dotenv

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
import os
import json
import logging
import torch
from flask import Flask, request, jsonify
from flask_cors import CORS
from llama_cpp import Llama
from pathlib import Path
import re

In [None]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)

llm = None
model_info = {}

def initialize_model(model_path, n_gpu_layers=35):
    global llm, model_info

    logger.info(f"Initializing Phi-3 GGUF Model")
    logger.info(f"Model: {model_path}")

    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        logger.info(f"GPU: {gpu_name} ({gpu_memory:.1f}GB)")

    llm = Llama(
        model_path=model_path,
        n_ctx=4096,  # Larger context for multi-step reasoning change according to your convenience
        n_threads=4,
        n_gpu_layers=n_gpu_layers,
        verbose=False,
        temperature=0.7,
        top_p=0.95,
    )

    model_info = {
        'loaded': True,
        'model_path': model_path,
        'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU',
        'gpu_memory': torch.cuda.get_device_properties(0).total_memory / (1024**3) if torch.cuda.is_available() else 0,
    }

    logger.info("âœ… Model loaded successfully!")
    return True

def format_prompt(user_message: str, context: dict = None, requesting_player: str = None) -> str:
    """Format message for Phi-3 with enhanced reasoning"""

    system_prompt = f"""You are an intelligent Minecraft bot assistant. You can understand natural language, break down complex requests into multiple steps, and reason about what needs to be done.

CRITICAL INSTRUCTIONS:
1. "me" or "I" ALWAYS refers to the player who is talking to you: {requesting_player or "the player"}
2. You can execute MULTIPLE actions in sequence - return them as an array
3. Think step-by-step: if player asks for something you don't have, first get it, then give it
4. Be conversational for greetings/questions, but also provide helpful context

AVAILABLE ACTIONS:
1. mine: {{"action":"mine","blockType":"string","count":number}}
2. fight: {{"action":"fight","mobType":"string","radius":number}}
3. harvest: {{"action":"harvest","cropType":"string","radius":number}}
4. craft: {{"action":"craft","itemName":"string","count":number}}
5. navigate: {{"action":"navigate","x":number,"y":number,"z":number}}
6. follow: {{"action":"follow","playerName":"string","distance":number}}
7. give: {{"action":"give","playerName":"string","itemName":"string","count":number}}
8. afk: {{"action":"afk","duration":number}}
9. status: {{"action":"status"}}
10. stop: {{"action":"stop"}}
11. respond: {{"action":"respond","message":"string"}} - For conversations/greetings

RESPONSE FORMATS:

For simple greetings/questions:
{{"action":"respond","message":"I'm doing great! Ready to help you mine, build, or fight!"}}

For single action:
{{"action":"mine","blockType":"diamond_ore","count":5}}

For multiple actions (IMPORTANT - use steps array):
{{
  "steps": [
    {{"action":"mine","blockType":"oak_log","count":2}},
    {{"action":"give","playerName":"{requesting_player or 'player'}","itemName":"oak_log","count":2}}
  ]
}}

For complex requests requiring reasoning:
{{
  "steps": [
    {{"action":"mine","blockType":"diamond_ore","count":3}},
    {{"action":"craft","itemName":"diamond_pickaxe","count":1}},
    {{"action":"give","playerName":"{requesting_player or 'player'}","itemName":"diamond_pickaxe","count":1}}
  ]
}}

EXAMPLES:

User: "how are you?"
Response: {{"action":"respond","message":"I'm doing great! Ready to help you with mining, crafting, or fighting!"}}

User: "mine 2 logs and give them to me"
Response: {{
  "steps": [
    {{"action":"mine","blockType":"oak_log","count":2}},
    {{"action":"give","playerName":"{requesting_player or 'player'}","itemName":"oak_log","count":2}}
  ]
}}

User: "I need a diamond pickaxe"
Response: {{
  "steps": [
    {{"action":"mine","blockType":"diamond_ore","count":3}},
    {{"action":"craft","itemName":"diamond_pickaxe","count":1}},
    {{"action":"give","playerName":"{requesting_player or 'player'}","itemName":"diamond_pickaxe","count":1}}
  ]
}}

User: "get me some wood"
Response: {{
  "steps": [
    {{"action":"mine","blockType":"oak_log","count":5}},
    {{"action":"give","playerName":"{requesting_player or 'player'}","itemName":"oak_log","count":5}}
  ]
}}

User: "craft sticks and give to Steve"
Response: {{
  "steps": [
    {{"action":"craft","itemName":"stick","count":4}},
    {{"action":"give","playerName":"Steve","itemName":"stick","count":4}}
  ]
}}

Remember:
- "me", "I", "my" = {requesting_player or "the player who is talking"}
- Think about dependencies (need materials before crafting)
- Break complex requests into logical steps
- Respond conversationally to greetings
- ALWAYS return valid JSON only, no explanations"""

    context_str = ""
    if context:
        context_str = f"\nBot Status: Health={context.get('health', 20)}/20, Food={context.get('food', 20)}/20, Position={context.get('position', 'unknown')}"

    prompt = f"""<|system|>
{system_prompt}<|end|>
<|user|>
Player {requesting_player or 'Unknown'} says: {user_message}{context_str}<|end|>
<|assistant|>
"""
    return prompt

def extract_json(text: str):
    """Extract JSON from response, handling both single and multi-step formats"""
    try:
        # Remove markdown code blocks if present
        text = re.sub(r'```json\s*|\s*```', '', text)

        # Find JSON object
        start_idx = text.find('{')
        end_idx = text.rfind('}') + 1

        if start_idx >= 0 and end_idx > start_idx:
            json_str = text[start_idx:end_idx]
            parsed = json.loads(json_str)
            return parsed
        return None
    except Exception as e:
        logger.warning(f"JSON extraction error: {e}")
        return None

def resolve_player_references(command, requesting_player):
    """Replace 'me' and 'I' references with actual player name"""
    if isinstance(command, dict):
        for key, value in command.items():
            if isinstance(value, str):
                if value.lower() in ['me', 'i', 'myself']:
                    command[key] = requesting_player
    return command

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        'status': 'online',
        'model_loaded': llm is not None,
        'gpu_available': torch.cuda.is_available(),
        'model_info': model_info
    })

@app.route('/parse', methods=['POST'])
def parse_command():
    if llm is None:
        return jsonify({
            'error': 'Model not loaded',
            'action': None,
            'params': {},
            'steps': []
        }), 503

    data = request.json
    text = data.get('text', '')
    context = data.get('context', {})
    requesting_player = data.get('playerName', 'Player')

    if not text:
        return jsonify({
            'error': 'No text',
            'action': None,
            'params': {},
            'steps': []
        }), 400

    logger.info(f"ðŸ“¥ Parsing from {requesting_player}: {text}")

    try:
        prompt = format_prompt(text, context, requesting_player)

        # Generate response
        output = llm(
            prompt,
            max_tokens=500,  # More tokens for multi-step
            stop=["<|end|>"],
            echo=False,
        )

        response = output["choices"][0]["text"].strip()
        logger.info(f"ðŸ¤– Raw response: {response[:200]}")

        # Extract JSON
        parsed = extract_json(response)

        if not parsed:
            return jsonify({
                'error': 'Could not parse response',
                'action': None,
                'params': {},
                'steps': [],
                'raw_response': response[:200]
            }), 400

        # Handle multi-step commands
        if 'steps' in parsed:
            logger.info(f"Multi-step command with {len(parsed['steps'])} steps")

            # Resolve player references in all steps
            steps = []
            for step in parsed['steps']:
                resolved_step = resolve_player_references(step, requesting_player)
                steps.append(resolved_step)

            return jsonify({
                'action': None,
                'params': {},
                'steps': steps,
                'error': None,
                'is_multistep': True
            })

        # Handle single action
        action = parsed.get('action')

        if not action:
            return jsonify({
                'error': 'No action in response',
                'action': None,
                'params': {},
                'steps': []
            }), 400

        # Resolve player references
        resolved = resolve_player_references(parsed, requesting_player)

        # Extract params
        params = {k: v for k, v in resolved.items() if k != 'action'}

        logger.info(f"Single action: {action}")

        return jsonify({
            'action': action,
            'params': params,
            'steps': [],
            'error': None,
            'is_multistep': False
        })

    except Exception as e:
        logger.error(f"Error: {e}")
        return jsonify({
            'error': str(e),
            'action': None,
            'params': {},
            'steps': []
        }), 500

@app.route('/info', methods=['GET'])
def info():
    return jsonify(model_info)

@app.route('/stats', methods=['GET'])
def stats():
    if not torch.cuda.is_available():
        return jsonify({'gpu_available': False})

    return jsonify({
        'gpu_available': True,
        'gpu_name': torch.cuda.get_device_name(0),
        'gpu_memory_total': torch.cuda.get_device_properties(0).total_memory / (1024**3),
        'gpu_memory_allocated': torch.cuda.memory_allocated() / (1024**3),
        'gpu_memory_cached': torch.cuda.memory_reserved() / (1024**3),
    })

In [None]:
from pyngrok import ngrok
import nest_asyncio
nest_asyncio.apply()

def run_server_with_ngrok(model_path, ngrok_token):
    """Setup and run server with ngrok tunnel"""

    print("\n" + "="*70)
    print("ADVANCED Phi-3 Server - Multi-Step Reasoning")
    print("="*70 + "\n")

    # Check model
    if not Path(model_path).exists():
        print(f"Model not found: {model_path}")
        print("\n Upload options:")
        print("   1. Upload via Files tab")
        print("   2. Mount Drive: /content/drive/MyDrive/phi3_model.gguf")
        return

    # Load model
    print("Loading model...")
    if not initialize_model(model_path, n_gpu_layers=35):
        print("Failed to load model")
        return

    # Setup ngrok
    print("\n Setting up ngrok tunnel...")
    if ngrok_token:
        ngrok.set_auth_token(ngrok_token)

    # Start ngrok tunnel
    public_url = ngrok.connect(5000)
    print("\n" + "="*70)
    print(f" TUNNEL READY!")
    print(f" Public URL: {public_url}")
    print("="*70)
    print("\n COPY THIS URL TO YOUR LOCAL .env FILE:")
    print(f"    COLAB_SERVER_URL={public_url}")
    print("\n" + "="*70)
    print("\n Features enabled:")
    print("   âœ“ Natural conversation (greetings, questions)")
    print("   âœ“ Multi-step command processing")
    print("   âœ“ 'me' reference resolution")
    print("   âœ“ Smart crafting (auto-gather materials)")
    print("   âœ“ Complex request reasoning")
    print("\n" + "="*70 + "\n")

    # Start Flask
    print(" Starting Flask server...")

    from werkzeug.serving import run_simple
    run_simple('0.0.0.0', 5000, app, use_reloader=False, use_debugger=False)

In [None]:
# 1. Set your model path
MODEL_PATH = 'path to your model'  # If uploaded to Colab

# 2. Set your ngrok auth token
NGROK_TOKEN = 'your-ngrok-token'  # Get from https://dashboard.ngrok.com

run_server_with_ngrok(MODEL_PATH, NGROK_TOKEN)