# 🚀 IBM Granite 3.2 Model Server for EchoVerse

This notebook sets up IBM Granite 3.2 model as an API server for your EchoVerse project.

## Instructions:
1. Run all cells in order
2. Copy the ngrok URL from the last cell
3. Update your Flask app with this URL
4. Keep this notebook running while using EchoVerse

In [None]:
# Install required packages
!pip install transformers torch accelerate bitsandbytes flask pyngrok requests
!pip install huggingface_hub --upgrade

In [None]:
# Import libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from flask import Flask, request, jsonify
from pyngrok import ngrok
import threading
import time
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("✅ Libraries imported successfully!")

In [None]:
# Load IBM Granite 3.2 Model
# Using ibm-granite/granite-3b-code-instruct (official IBM model)
MODEL_NAME = "ibm-granite/granite-3b-code-instruct"  # Official IBM Granite model

print(f"🔄 Loading model: {MODEL_NAME}")

# Configure quantization for memory efficiency
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=quantization_config,
    device_map="auto",
    torch_dtype=torch.float16
)

print("✅ IBM Granite 3.2 Model loaded successfully!")
print(f"📊 Model device: {model.device}")
print(f"🧠 Model parameters: {model.num_parameters():,}")

In [None]:
# Define tone transformation prompts
TONE_PROMPTS = {
    "neutral": "Rewrite the following text in a clear, neutral tone:\n\n{text}\n\nRewritten text:",
    "suspenseful": "Transform this text into a suspenseful, mysterious narrative:\n\n{text}\n\nSuspenseful version:",
    "dramatic": "Rewrite this text with dramatic flair and emotional intensity:\n\n{text}\n\nDramatic version:",
    "inspiring": "Transform this text into an inspiring, motivational narrative:\n\n{text}\n\nInspiring version:",
    "educational": "Rewrite this text in an educational, informative style:\n\n{text}\n\nEducational version:",
    "conversational": "Transform this text into a friendly, conversational style:\n\n{text}\n\nConversational version:",
    "formal": "Rewrite this text in a formal, professional tone:\n\n{text}\n\nFormal version:",
    "calming": "Transform this text into a peaceful, calming narrative:\n\n{text}\n\nCalming version:"
}

def transform_text(text, tone="neutral"):
    """Transform text using IBM Granite model"""
    try:
        # Get prompt template
        prompt = TONE_PROMPTS.get(tone, TONE_PROMPTS["neutral"]).format(text=text)
        
        # Tokenize input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        ).to(model.device)
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id
            )
        
        # Decode response
        generated_text = tokenizer.decode(
            outputs[0][inputs['input_ids'].shape[1]:],
            skip_special_tokens=True
        ).strip()
        
        # Clean up response
        if generated_text:
            # Take first meaningful sentence
            sentences = generated_text.split('.')
            cleaned = sentences[0].strip() if sentences else generated_text
            return cleaned if len(cleaned) > 10 else text
        else:
            return text
            
    except Exception as e:
        logger.error(f"Error in text transformation: {str(e)}")
        return text

print("✅ Text transformation function ready!")

In [None]:
# Create Flask API server
app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'model': MODEL_NAME,
        'device': str(model.device),
        'message': 'IBM Granite Model Server is running!'
    })

@app.route('/transform', methods=['POST'])
def api_transform_text():
    """Transform text with specified tone"""
    try:
        data = request.get_json()
        text = data.get('text', '')
        tone = data.get('tone', 'neutral')
        
        if not text.strip():
            return jsonify({'error': 'Text is required'}), 400
        
        # Transform text
        transformed_text = transform_text(text, tone)
        
        return jsonify({
            'status': 'success',
            'original_text': text,
            'transformed_text': transformed_text,
            'tone': tone,
            'model': MODEL_NAME
        })
        
    except Exception as e:
        logger.error(f"API Error: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/available-tones', methods=['GET'])
def get_available_tones():
    """Get list of available tones"""
    return jsonify({
        'tones': list(TONE_PROMPTS.keys()),
        'count': len(TONE_PROMPTS)
    })

print("✅ Flask API server created!")

In [None]:
# Setup ngrok tunnel
# Get your free ngrok token from: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_TOKEN = "YOUR_NGROK_TOKEN_HERE"  # Replace with your actual token

# Uncomment and set your ngrok token
# ngrok.set_auth_token(NGROK_TOKEN)

# For now, we'll run without authentication (limited time)
print("⚠️  Running without ngrok authentication (limited time)")
print("💡 Get free token from: https://dashboard.ngrok.com/get-started/your-authtoken")

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print(f"\n🌐 IBM Granite Model Server is now accessible at:")
print(f"📡 Public URL: {public_url}")
print(f"\n📋 Copy this URL and update your Flask app configuration!")
print(f"\n🔗 API Endpoints:")
print(f"   Health Check: {public_url}/health")
print(f"   Transform Text: {public_url}/transform")
print(f"   Available Tones: {public_url}/available-tones")

In [None]:
# Start the Flask server
def run_server():
    app.run(host='0.0.0.0', port=5000, debug=False)

# Start server in background thread
server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()

print("🚀 IBM Granite Model Server is now running!")
print("\n" + "="*60)
print("🎯 IMPORTANT: Copy the ngrok URL above and update your Flask app!")
print("📝 Update GRANITE_API_URL in your Flask configuration")
print("⏰ Keep this notebook running while using EchoVerse")
print("="*60)

# Keep the notebook running
try:
    while True:
        time.sleep(60)
        print(f"⏰ Server running... {time.strftime('%H:%M:%S')}")
except KeyboardInterrupt:
    print("\n🛑 Server stopped by user")