In [15]:
import subprocess
import sys

subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/transformers.git@main"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/huggingface/accelerate"])

print("Latest transformers installed from GitHub")


Latest transformers installed from GitHub


In [16]:
import os
import gc
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

os.environ["TOKENIZERS_PARALLELISM"] = "false"

MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"

print("Loading LFM2.5 model...")
print(f"Model ID: {MODEL_ID}")

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("Device: cuda")

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token = tokenizer.eos_token

model.eval()

print("Model loaded")
print(f"Model device: {next(model.parameters()).device}")

# Smoke test with manual prompt format
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Say hello"}
]

# Build prompt manually
prompt_text = f"System: {messages[0]['content']}\nUser: {messages[1]['content']}\nAssistant:"

inputs = tokenizer(prompt_text, return_tensors="pt")
input_ids = inputs["input_ids"].to(model.device)

with torch.inference_mode():
    output = model.generate(
        input_ids,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

decoded = tokenizer.decode(output[0], skip_special_tokens=True)
print(f"\nTest output: {decoded}")
print("\nLFM2.5 working!")

# Set global variables for next cells
tok = tokenizer
DEVICE = str(next(model.parameters()).device)
DTYPE = torch.bfloat16


Loading LFM2.5 model...
Model ID: LiquidAI/LFM2.5-1.2B-Instruct
Device: cuda


Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]



Model loaded
Model device: cuda:0

Test output: System: You are a helpful assistant.
User: Say hello
Assistant: Hello! How can I assist you today?

LFM2.5 working!


In [17]:
def detect_intent(user_text: str) -> str:
    """Detect user intent from text"""
    text_lower = (user_text or "").lower()

    # Greeting intents
    greeting_words = [
        "hello", "hi", "hey", "helo", "hlw",
        "namaste", "namaskar",
        "नमस्ते", "नमस्कार", "हलो", "हेलो",
        "how are you", "हाव आर यू", "हाउ आर यू",
        "कैसे हैं", "कैसे हो"
    ]
    # Only greeting if JUST greeting, not if asking question
    if any(word in text_lower for word in greeting_words) and not any(q in text_lower for q in ["where", "वेर", "कहाँ", "कहां", "order", "ऑर्डर"]):
        return "greeting"

    # Order tracking (NEW - check BEFORE refund)
    elif any(word in text_lower for word in ["where", "वेर", "कहाँ", "कहां", "order", "ऑर्डर", "आर्डर", "track", "status", "delivery"]):
        return "ordertrack"

    # Farewell intents
    elif any(word in text_lower for word in ["bye", "goodbye", "tata", "alvida", "thank", "thanks", "dhanyavaad", "धन्यवाद", "बाय", "थैंक"]):
        return "farewell"

    # Refund intents
    elif any(word in text_lower for word in ["refund", "return", "complaint", "रिफंड", "वापस", "रिटर्न", "शिकायत", "wapas", "shikayat"]):
        return "refundreturn"

    # Login/technical intents
    elif any(word in text_lower for word in ["login", "password", "crash", "error", "bug", "technical", "लॉगिन", "पासवर्ड"]):
        return "logincrash"

    # Sales intents
    elif any(word in text_lower for word in ["buy", "purchase", "price", "cost", "discount", "offer", "kitna", "khareed", "खरीद", "कीमत"]):
        return "sales"

    # Booking intents
    elif any(word in text_lower for word in ["book", "appointment", "schedule", "reserve", "booking", "बुकिंग", "अपॉइंटमेंट"]):
        return "booking"

    # General help
    elif any(word in text_lower for word in ["help", "मदद", "madad"]):
        return "general"

    # General/unknown
    else:
        return "general"

print("Intent detection with order tracking support")


Intent detection with order tracking support


In [18]:
# Templates for Hindi and Hinglish responses
TEMPLATES = {
    "hindi": {
        "greeting": "नमस्ते। मैं आपकी कैसे मदद कर सकता हूं?",
        "farewell": "धन्यवाद। आपका दिन शुभ हो।",
        "refund/return": "मैं आपकी समस्या समझ रहा हूं। कृपया मुझे अपना ऑर्डर नंबर बताएं।",
        "login/crash": "मैं इस तकनीकी समस्या में आपकी मदद करूंगा।",
        "sales": "हमारे पास बेहतरीन ऑफर्स हैं। आप किस प्रोडक्ट में रुचि रखते हैं?",
        "booking": "मैं आपके लिए अपॉइंटमेंट बुक कर सकता हूं। कौन सी तारीख सही रहेगी?",
        "general": "जी हां, मैं आपकी मदद करूंगा। कृपया अपनी समस्या बताएं।",
        "unknown": "क्षमा करें, मैं समझ नहीं पाया। क्या आप फिर से बता सकते हैं?"
    },
    "hinglish": {
        "greeting": "Hello! Main aapki kaise help kar sakta hoon?",
        "farewell": "Thank you! Aapka din accha rahe.",
        "refund/return": "Main aapki problem samajh raha hoon. Please order number batayein.",
        "login/crash": "Main is technical issue me aapki help karunga.",
        "sales": "Hamare paas bahut acche offers hain. Aap kis product me interested hain?",
        "booking": "Main appointment book kar sakta hoon. Konsi date theek rahegi?",
        "general": "Ji haan, main aapki help karunga. Apni problem batayein.",
        "unknown": "Sorry, main samjha nahi. Dobara bata sakte hain?"
    }
}

print("Templates configured")


Templates configured


In [19]:
def generate_hindi_reply(user_text: str, intent: str, persona_key: str) -> str:
    """Generate response in Hindi"""
    device = str(next(model.parameters()).device)

    # Create Hindi-aware system prompt
    sys_prompt = f"""You are a helpful Hindi-speaking customer service agent.
Rules:
- Respond ONLY in Hindi (Devanagari script)
- Keep responses short (1-2 sentences maximum)
- Be polite and professional
- If it's a greeting, greet back
- If it's a complaint, apologize and help
- If it's a question, answer briefly

Customer said: {user_text}

Respond in Hindi:"""

    messages = [
        {"role": "system", "content": "You are a helpful customer service agent who speaks Hindi."},
        {"role": "user", "content": sys_prompt},
    ]

    inputs = build_inputs_for_chat(tok, messages, device)
    input_len = inputs["input_ids"].shape[-1]

    with torch.inference_mode():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.3,
            no_repeat_ngram_size=3,
            pad_token_id=tok.pad_token_id,
            eos_token_id=tok.eos_token_id,
        )

    new_ids = out_ids[0][input_len:]
    text = tok.decode(new_ids, skip_special_tokens=True).strip()

    # If LLM fails to generate Hindi, use template
    if not text or not has_devanagari(text):
        return TEMPLATES["hindi"].get(intent, TEMPLATES["hindi"]["general"])

    return limit_3_sentences(text)


def generate_hinglish_reply(user_text: str, intent: str, persona_key: str) -> str:
    """Generate response in Hinglish (Hindi + English mix)"""
    device = str(next(model.parameters()).device)

    sys_prompt = f"""You are a helpful customer service agent who speaks Hinglish (mix of Hindi and English).
Rules:
- Respond in Hinglish (Roman script with Hindi words mixed with English)
- Keep responses short (1-2 sentences)
- Be friendly and professional
- Example: "Ji haan, main aapki help karunga. Aap apna order number bataiye."

Customer said: {user_text}

Respond in Hinglish:"""

    messages = [
        {"role": "system", "content": "You speak Hinglish (Hindi-English mix)."},
        {"role": "user", "content": sys_prompt},
    ]

    inputs = build_inputs_for_chat(tok, messages, device)
    input_len = inputs["input_ids"].shape[-1]

    with torch.inference_mode():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.3,
            no_repeat_ngram_size=3,
            pad_token_id=tok.pad_token_id,
            eos_token_id=tok.eos_token_id,
        )

    new_ids = out_ids[0][input_len:]
    text = tok.decode(new_ids, skip_special_tokens=True).strip()

    # If LLM fails, use template
    if not text:
        return TEMPLATES["hinglish"].get(intent, TEMPLATES["hinglish"]["general"])

    return limit_3_sentences(text)

print("Hindi and Hinglish generation functions configured")


Hindi and Hinglish generation functions configured


In [20]:
import re

# Improved Personas following the standard template
PERSONAS = {
    "1. Customer Care (General)": {
        "role": "Customer Care Agent",
        "objective": "Handle customer inquiries, resolve issues, and provide support",
        "tone": "polite, calm, empathetic",
        "rules": [
            "If customer is angry, respond calmly and acknowledge their frustration",
            "If issue is unclear, ask one clear question at a time",
            "If policy restricted, say 'I'll help you with the best available option'",
            "Never argue with customer"
        ]
    },
    "2. Sales Lead Qualification": {
        "role": "Sales Representative",
        "objective": "Identify customer interest and qualify leads",
        "tone": "confident, persuasive but not pushy",
        "rules": [
            "No false promises",
            "No pricing unless allowed",
            "Avoid aggressive language",
            "Escalate to human if lead is hot"
        ]
    },
    "3. Appointment Booking": {
        "role": "Booking Assistant",
        "objective": "Schedule appointments and confirm bookings",
        "tone": "structured, clear, time-aware",
        "rules": [
            "Collect date and time",
            "Confirm availability",
            "Repeat details for confirmation"
        ]
    },
    "4. Complaint Handling": {
        "role": "Complaint Resolution Agent",
        "objective": "Handle refunds, escalations, and dissatisfaction",
        "tone": "highly empathetic, patient",
        "rules": [
            "Acknowledge frustration first",
            "Apologize where applicable",
            "Explain next steps clearly",
            "Never argue with customer"
        ]
    },
    "5. General Information Assistant": {
        "role": "Information Assistant",
        "objective": "Answer FAQs and provide business information",
        "tone": "neutral, informative",
        "rules": [
            "Answer factual questions only",
            "Redirect to support if needed"
        ]
    }
}

# Language detection utilities
DEVANAGARI_RE = re.compile(r'[\u0900-\u097F]')
LATIN_RE = re.compile(r'[A-Za-z]')

def has_devanagari(s: str) -> bool:
    return bool(DEVANAGARI_RE.search(s or ""))

def has_latin(s: str) -> bool:
    return bool(LATIN_RE.search(s or ""))

def clean_tts(s: str) -> str:
    return re.sub(r"[\"']", "", s or "").strip()

def limit_3_sentences(text: str) -> str:
    t = clean_tts(text)
    if not t:
        return t
    parts = re.split(r'[.?!।]+', t)
    parts = [p.strip() for p in parts if p.strip()]
    return ". ".join(parts[:3]).strip()

ROMAN_HI_HINTS = {
    "aap","ap","tum","tu","mera","meri","mere","kya","kyu","kyun","kaise","kab","kahan",
    "nahi","nahin","haan","han","hai","ho","hu","hun","thoda","jaldi","abhi",
    "karo","kar","karna","karne","kardo","krdo","batao","batado","bataye","dijiye","do","de"
}

def detect_mode(user_text: str) -> str:
    """Detect language mode"""
    text = (user_text or "").strip()
    if not text:
        return "english"

    has_dev = has_devanagari(text)
    has_lat = has_latin(text)

    # If has both Devanagari and Latin, it's Hinglish
    if has_dev and has_lat:
        return "hinglish"

    # If ONLY Devanagari
    if has_dev and not has_lat:
        # Expanded list of English words written in Devanagari
        hinglish_devanagari = [
            "हाय", "हाव", "हाउ", "आर", "यू", "हलो", "हेलो",
            "बाय", "थैंक", "प्लीज", "ओके",
            "वेर", "इज", "माई", "योर",  # where, is, my, your
            "ऑर्डर", "आर्डर"  # order
        ]

        # Count Hinglish words
        words = text.split()
        hinglish_count = sum(1 for word in words if any(hw in word for hw in hinglish_devanagari))

        # If more than 30% are Hinglish words → Hinglish
        if len(words) > 0 and hinglish_count / len(words) > 0.3:
            return "hinglish"

        return "hindi"

    # If has Latin, check for Hindi hints
    tokens = re.findall(r'[A-Za-z]+', text.lower())
    hint_count = sum(1 for t in tokens if t in ROMAN_HI_HINTS)
    if hint_count >= 1:
        return "hinglish"

    return "english"

print("Improved language detection - better Hinglish recognition")





Improved language detection - better Hinglish recognition


In [21]:
def build_english_system_prompt(persona_key: str) -> str:
    """Build multilingual system prompt"""
    persona = PERSONAS[persona_key]
    role = persona["role"]
    objective = persona["objective"]
    tone = persona["tone"]
    rules = persona["rules"]

    rules_text = "\n".join([f"- {rule}" for rule in rules])

    prompt = f"""You are an AI voice assistant acting as {role}.

Your purpose:
- {objective}

IMPORTANT LANGUAGE RULES:
- If user speaks HINDI (Devanagari script), respond ONLY in HINDI
- If user speaks HINGLISH (Roman Hindi), respond in HINGLISH
- If user speaks ENGLISH, respond in ENGLISH
- Match the user's language exactly

Communication style:
- Tone: {tone}
- Sentence length: short, clear, conversational (1-2 sentences maximum)

Rules:
{rules_text}
- Do not hallucinate information
- Never mention AI, model, or system
- Keep responses under 3 sentences

Response format:
- Voice-friendly (no bullet points)
- No emojis"""

    return prompt


def build_inputs_for_chat(tokenizer, messages, device: str):
    """Build tokenized inputs for chat"""
    # Manual prompt format for Liquid AI
    lines = []
    for m in messages:
        role = (m.get("role") or "user").strip().capitalize()
        content = (m.get("content") or "").strip()
        lines.append(f"{role}: {content}")
    lines.append("Assistant:")

    prompt_text = "\n".join(lines)
    enc = tokenizer(prompt_text, return_tensors="pt")
    return {k: v.to(device) for k, v in enc.items()}


def generate_english_reply(user_text: str, persona_key: str) -> str:
    """Generate multilingual response using Liquid model"""
    device = str(next(model.parameters()).device)
    sys_prompt = build_english_system_prompt(persona_key)

    messages = [
        {"role": "system", "content": sys_prompt},
        {"role": "user", "content": (user_text or "").strip()},
    ]

    inputs = build_inputs_for_chat(tok, messages, device)
    input_len = inputs["input_ids"].shape[-1]

    with torch.inference_mode():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=80,        # Increased for Hindi (needs more tokens)
            do_sample=True,
            temperature=0.8,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.3,
            no_repeat_ngram_size=3,
            pad_token_id=tok.pad_token_id,
            eos_token_id=tok.eos_token_id,
        )

    new_ids = out_ids[0][input_len:]
    text = tok.decode(new_ids, skip_special_tokens=True).strip()

    # Clean up response but keep multilingual content
    text = limit_3_sentences(text)

    if not text:
        text = "I understand. Could you please provide more details?"

    return text


print(" Multilingual generation configured - supports English, Hindi, Hinglish")


 Multilingual generation configured - supports English, Hindi, Hinglish


In [22]:
def liquidbrain_reply(user_text: str, persona_key: str = "1. Customer Care (General)"):
    """
    Main function to generate reply
    Returns: (reply_text, detected_mode, detected_intent)
    """
    detected_mode = detect_mode(user_text)
    intent = detect_intent(user_text)

    print(f"DEBUG: Mode={detected_mode}, Intent={intent}, Input={user_text}")

    # Response templates by language
    RESPONSE_TEMPLATES = {
        "hindi": {
            "greeting": "नमस्ते। मैं आपकी कैसे मदद कर सकता हूं?",
            "farewell": "धन्यवाद। आपका दिन शुभ रहे।",
            "ordertrack": "मैं आपके ऑर्डर की जानकारी देखता हूं। कृपया ऑर्डर नंबर बताएं।",
            "refundreturn": "मैं आपके रिफंड में मदद करूंगा। कृपया ऑर्डर नंबर बताएं।",
            "logincrash": "मैं इस तकनीकी समस्या में आपकी मदद करूंगा। कृपया विवरण बताएं।",
            "sales": "हमारे पास बहुत अच्छे ऑफर हैं। आप किस उत्पाद में रुचि रखते हैं?",
            "booking": "मैं आपकी अपॉइंटमेंट बुक कर सकता हूं। कौन सी तारीख ठीक रहेगी?",
            "general": "मैं आपकी मदद करूंगा। कृपया अपनी समस्या बताएं।",
        },
        "hinglish": {
            "greeting": "Hello! Main aapki kaise help kar sakta hoon?",
            "farewell": "Thank you! Aapka din accha rahe.",
            "ordertrack": "Main aapke order ki details check karta hoon. Order number bataiye please.",
            "refundreturn": "Main aapki refund me madad karunga. Order number bataiye please.",
            "logincrash": "Main is technical issue me aapki help karunga. Problem detail me bataiye.",
            "sales": "Hamare paas bahut acche offers hain. Aap kis product me interested hain?",
            "booking": "Main appointment book kar sakta hoon. Konsi date theek rahegi?",
            "general": "Ji haan, main aapki help karunga. Apni problem bataiye.",
        },
        "english": {
            "greeting": "Hello! How can I help you today?",
            "farewell": "Thank you for contacting us. Have a great day!",
            "ordertrack": "I'll check your order details. Please provide your order number.",
            "refundreturn": "I'll help you with the refund. Please share your order number.",
            "logincrash": "I'll assist you with this technical issue. Please describe the problem in detail.",
            "sales": "We have great offers available. Which product are you interested in?",
            "booking": "I can book an appointment for you. Which date works best?",
            "general": "I'm here to help. Please tell me what you need assistance with.",
        }
    }

    # Get response based on detected mode
    if detected_mode in ["hindi", "hinglish"]:
        # Use templates for Hindi/Hinglish (faster, more consistent)
        responses = RESPONSE_TEMPLATES.get(detected_mode, RESPONSE_TEMPLATES["english"])
        final_text = responses.get(intent, responses["general"])
    else:
        # Use templates for English too (faster than LLM)
        responses = RESPONSE_TEMPLATES["english"]
        final_text = responses.get(intent, responses["general"])

    final_text = clean_tts(final_text)
    return final_text, detected_mode, intent


print(" Liquid Brain reply function - Optimized with English templates")


 Liquid Brain reply function - Optimized with English templates


In [23]:
def convert_numbers_to_words(text, language="hindi"):
    """Convert numbers to words in text based on language"""
    import re

    # Digit to word mappings
    digit_maps = {
        "hindi": {
            "0": "शून्य", "1": "एक", "2": "दो", "3": "तीन", "4": "चार",
            "5": "पांच", "6": "छह", "7": "सात", "8": "आठ", "9": "नौ"
        },
        "english": {
            "0": "zero", "1": "one", "2": "two", "3": "three", "4": "four",
            "5": "five", "6": "six", "7": "seven", "8": "eight", "9": "nine"
        },
        "hinglish": {
            "0": "zero", "1": "ek", "2": "do", "3": "teen", "4": "char",
            "5": "paanch", "6": "chhe", "7": "saat", "8": "aath", "9": "nau"
        }
    }

    digit_map = digit_maps.get(language, digit_maps["english"])

    def replace_number(match):
        number = match.group(0)
        # Convert each digit separately
        words = " ".join([digit_map.get(d, d) for d in number])
        return words

    # Replace all number sequences with words
    text = re.sub(r'\d+', replace_number, text)

    return text

print("Number to words converter configured")


Number to words converter configured


In [24]:
!pip install -q fastapi uvicorn nest-asyncio pyngrok python-multipart


In [25]:
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import nest_asyncio
import uvicorn
from pyngrok import ngrok
from threading import Thread
import getpass

nest_asyncio.apply()


In [26]:
# Voice mapping matching TTS capabilities
VOICE_MAP = {
    # Hindi speakers (Rohit removed)
    "hindi_rani": "Rani speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "hindi_divya": "Divya speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "hindi_aman": "Aman speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",

    # English speakers
    "english_mary": "Mary speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "english_thoma": "Thoma speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "english_swapna": "Swapna speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "english_dinesh": "Dinesh speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "english_meera": "Meera speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
    "english_jatin": "Jatin speaks in a formal, polished tone with precise pronunciation and professional composure suitable for corporate calls",
}

def select_voice_description(detected_mode, intent):
    """Select appropriate voice based on language"""
    import random

    if detected_mode == "hindi" or detected_mode == "hinglish":
        # Use Hindi speakers
        hindi_voices = [
            VOICE_MAP["hindi_rani"],
            VOICE_MAP["hindi_divya"],
            VOICE_MAP["hindi_aman"]
        ]
        return random.choice(hindi_voices)
    else:
        # Use English speakers for English
        english_voices = [
            VOICE_MAP["english_mary"],
            VOICE_MAP["english_thoma"],
            VOICE_MAP["english_swapna"],
            VOICE_MAP["english_dinesh"],
            VOICE_MAP["english_meera"],
            VOICE_MAP["english_jatin"]
        ]
        return random.choice(english_voices)

print("Voice mapping ")


Voice mapping - Rohit removed, only Rani, Divya, Aman for Hindi/Hinglish


In [27]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

app = FastAPI(
    title="Liquid AI LLM API",
    description="Text from STT → Reply + Voice Description for TTS",
    version="2.0"
)

class TTSInput(BaseModel):
    text: str

@app.get("/")
def root():
    return {
        "status": "online",
        "service": "Liquid AI LLM",
        "model": MODEL_ID
    }

@app.post("/generate")
def generate_response(input: TTSInput):
    """STT text → LLM reply with TTS description"""
    try:
        # Generate reply using LLM
        reply_text, detected_mode, detected_intent = liquidbrain_reply(input.text)

        # Convert numbers to words for TTS compatibility
        reply_text = convert_numbers_to_words(reply_text, detected_mode)

        # Select voice description using the configured function
        description = select_voice_description(detected_mode, detected_intent)

        print(f"Mode: {detected_mode}, Intent: {detected_intent}")
        print(f"Voice: {description[:50]}...")

        return {
            "text": reply_text,
            "description": description
        }

    except Exception as e:
        import traceback
        print(traceback.format_exc())
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
def health():
    return {"status": "healthy", "model": MODEL_ID, "device": str(DEVICE)}

print("FastAPI app created with voice selection ")


FastAPI app created with voice selection - Rohit removed


In [29]:
import os
import getpass
import time
from threading import Thread
import uvicorn
from pyngrok import ngrok


# Close existing ngrok tunnels
try:
    ngrok.kill()
except:
    pass

# Start server on port 8003
def run_server():
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=8003,
        log_level="info"
    )

server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("Starting server...")
time.sleep(3)

# Setup ngrok tunnel
token = getpass.getpass("Enter your ngrok auth token: ")
ngrok.set_auth_token(token)
public_url = ngrok.connect(8003, bind_tls=True)

print("Liquid AI LLM API is now live")
print(f"Public URL: {public_url}")
print(f"API Documentation: {public_url}/docs")
print(f"Health Check: {public_url}/health")
print("\nEndpoint for orchestration:")
print(f"POST {public_url}/generate")
print('Request body: {"text": "user text from STT"}')
print("\nResponse: {\"text\": \"reply\", \"description\": \"voice description\"}")



Starting server...


INFO:     Started server process [28135]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8003): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


Enter your ngrok auth token: ··········
Liquid AI LLM API is now live
Public URL: NgrokTunnel: "https://unhealable-jonell-intermittingly.ngrok-free.dev" -> "http://localhost:8003"
API Documentation: NgrokTunnel: "https://unhealable-jonell-intermittingly.ngrok-free.dev" -> "http://localhost:8003"/docs
Health Check: NgrokTunnel: "https://unhealable-jonell-intermittingly.ngrok-free.dev" -> "http://localhost:8003"/health

Endpoint for orchestration:
POST NgrokTunnel: "https://unhealable-jonell-intermittingly.ngrok-free.dev" -> "http://localhost:8003"/generate
Request body: {"text": "user text from STT"}

Response: {"text": "reply", "description": "voice description"}
