In [None]:
# Cell 1: Install Dependencies
!pip install -q transformers peft accelerate torch
!pip install -q flask flask-cors pyngrok
print("‚úÖ Dependencies installed")

In [None]:
# Cell 2: Upload LoRA Checkpoint
# Option A: From Google Drive (recommended)
from google.colab import drive
drive.mount('/content/drive')

# Copy your checkpoint-224 folder from Drive to Colab
# Adjust path to where you uploaded checkpoint-224 in your Drive
!cp -r "/content/drive/MyDrive/Diet-Plan-AI/checkpoint-224" /content/

# Verify checkpoint exists
import os
if os.path.exists('/content/checkpoint-224'):
    print("‚úÖ Checkpoint-224 found!")
    !ls -lh /content/checkpoint-224
else:
    print("‚ùå Checkpoint not found. Please upload it to Google Drive first.")

In [None]:
# Cell 3: Setup Ngrok
# Get your token from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_TOKEN = "YOUR_NGROK_TOKEN_HERE"  # ‚ö†Ô∏è REPLACE THIS

!ngrok authtoken {NGROK_TOKEN}
print("‚úÖ Ngrok configured")

In [None]:
# Cell 4: Create Flask Server
%%writefile colab_server.py

from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)

model = None
tokenizer = None

def load_model():
    global model, tokenizer
    logger.info("üîÑ Loading fine-tuned Phi-2 model...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            "microsoft/phi-2",
            trust_remote_code=True
        )
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        model = AutoModelForCausalLM.from_pretrained(
            "microsoft/phi-2",
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        
        model = PeftModel.from_pretrained(
            model,
            "/content/checkpoint-224",
            is_trainable=False
        )
        model.eval()
        
        device = next(model.parameters()).device
        logger.info(f"‚úÖ Model loaded successfully on {device}!")
        return True
    except Exception as e:
        logger.error(f"‚ùå Failed to load model: {e}")
        return False

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "healthy",
        "model_loaded": model is not None,
        "device": str(next(model.parameters()).device) if model else "not loaded"
    })

@app.route('/generate', methods=['POST'])
def generate():
    global model, tokenizer
    
    if model is None:
        return jsonify({"error": "Model not loaded"}), 500
    
    try:
        data = request.json
        prompt = data.get('prompt', '')
        max_tokens = data.get('max_tokens', 800)
        temperature = data.get('temperature', 0.7)
        top_p = data.get('top_p', 0.9)
        
        logger.info(f"üìù Generating (max_tokens={max_tokens})...")
        
        inputs = tokenizer(prompt, return_tensors="pt", max_length=2048, truncation=True)
        device = next(model.parameters()).device
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        if "### Response:" in response:
            response = response.split("### Response:")[-1].strip()
        
        logger.info(f"‚úÖ Generated {len(response)} characters")
        
        return jsonify({
            "status": "success",
            "response": response,
            "response_length": len(response)
        })
        
    except Exception as e:
        logger.error(f"‚ùå Error: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    if load_model():
        public_url = ngrok.connect(5000)
        print("\n" + "="*60)
        print("üåê PUBLIC URL (COPY THIS):")
        print(f"   {public_url}")
        print("="*60)
        print("\nüìù Update your local ml_recommender.py:")
        print(f'   USE_COLAB = True')
        print(f'   COLAB_API_URL = "{public_url}"')
        print("\nüöÄ Server starting...\n")
        app.run(host='0.0.0.0', port=5000)
    else:
        print("‚ùå Failed to load model")

In [None]:
# Cell 5: Start Server (KEEP THIS RUNNING)
# This cell will run continuously. Don't stop it!
!python colab_server.py