# LocalLab Server Test Notebook

This notebook helps test the LocalLab server package on Google Colab.

In [None]:
# First, let's make sure we have a clean environment
!pip uninstall -y locallab
!pip cache purge

In [None]:
# Install required dependencies first
!pip install --upgrade transformers accelerate
!pip install torch pyngrok fastapi uvicorn huggingface_hub

In [None]:
# Install the package from TestPyPI
!pip install --no-cache-dir --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ locallab==0.1.8

In [None]:
# Set environment variables
import os
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Set your ngrok token
NGROK_TOKEN = input("Enter your ngrok token: ").strip()
if not NGROK_TOKEN:
    raise ValueError("Ngrok token is required for running the server")
os.environ["NGROK_AUTH_TOKEN"] = NGROK_TOKEN

# Choose model configuration
print("\nAvailable default models:")
print("1. microsoft/phi-2 (Default, 2.7B parameters)")
print("2. TinyLlama/TinyLlama-1.1B-Chat-v1.0 (1.1B parameters)")
print("3. stabilityai/stable-code-3b (3B parameters)")
print("4. Custom model from Hugging Face")

choice = input("\nChoose model (1-4, default is 1): ").strip() or "1"

if choice == "4":
    custom_model = input("Enter Hugging Face model ID (e.g., meta-llama/Llama-3.2-3B-Instruct): ").strip()
    if not custom_model:
        raise ValueError("Custom model ID is required when choosing option 4")
    os.environ["LOCALLAB_CUSTOM_MODEL"] = custom_model
    os.environ["LOCALLAB_DEFAULT_MODEL"] = custom_model.split("/")[-1]
else:
    models = {
        "1": "phi-2",
        "2": "tinyllama-1.1b",
        "3": "stable-code-3b"
    }
    os.environ["LOCALLAB_DEFAULT_MODEL"] = models.get(choice, "phi-2")

# Configure performance settings
os.environ["LOCALLAB_ENABLE_FLASH_ATTENTION"] = "true"
os.environ["LOCALLAB_ENABLE_ATTENTION_SLICING"] = "true"
os.environ["LOCALLAB_ENABLE_CPU_OFFLOADING"] = "true"
os.environ["LOCALLAB_ENABLE_BETTERTRANSFORMER"] = "true"
os.environ["LOCALLAB_ENABLE_QUANTIZATION"] = "true"
os.environ["LOCALLAB_QUANTIZATION_TYPE"] = "int8"
os.environ["LOCALLAB_MIN_FREE_MEMORY"] = "2000"

# Configure server settings
os.environ["LOCALLAB_MAX_CONCURRENT_REQUESTS"] = "10"
os.environ["LOCALLAB_ENABLE_DYNAMIC_BATCHING"] = "true"
os.environ["LOCALLAB_BATCH_TIMEOUT"] = "100"
os.environ["LOCALLAB_ENABLE_CACHE"] = "true"
os.environ["LOCALLAB_CACHE_TTL"] = "3600"

logging.info(f"Using model: {os.environ.get('LOCALLAB_DEFAULT_MODEL')}")
logging.info(f"Flash Attention: {os.environ.get('LOCALLAB_ENABLE_FLASH_ATTENTION')}")
logging.info(f"Quantization: {os.environ.get('LOCALLAB_QUANTIZATION_TYPE')}")

In [None]:
# Import and start the server
try:
    from locallab import start_server, MODEL_REGISTRY, can_run_model
    print("Successfully imported locallab")
    
    # Check if model can run
    model_id = os.environ.get("LOCALLAB_DEFAULT_MODEL", "phi-2")
    if not can_run_model(model_id):
        print(f"Warning: {model_id} may not run optimally on current resources")
        print("Available models:")
        for model in MODEL_REGISTRY:
            if can_run_model(model):
                print(f"- {model}: {MODEL_REGISTRY[model]['description']}")
        use_model = input("Choose a different model or press Enter to continue anyway: ").strip()
        if use_model:
            os.environ["LOCALLAB_DEFAULT_MODEL"] = use_model
except ImportError as e:
    print(f"Import error: {e}")
    print("\nTrying to find the module:")
    !find /usr/local/lib/python3.* -name "locallab*"
    raise

In [None]:
# Start the server
try:
    start_server(use_ngrok=True)
except ValueError as e:
    if "NGROK_AUTH_TOKEN" in str(e):
        print("Error: Please set your ngrok token in the cell above")
    else:
        print(f"Configuration error: {str(e)}")
        print("\nPlease check your environment variables and try again")
except Exception as e:
    print(f"Error starting server: {str(e)}")
    print("\nTrying to fall back to default model...")
    try:
        os.environ["LOCALLAB_DEFAULT_MODEL"] = "phi-2"
        os.environ["LOCALLAB_ENABLE_QUANTIZATION"] = "true"
        os.environ["LOCALLAB_QUANTIZATION_TYPE"] = "int8"
        start_server(use_ngrok=True)
    except Exception as e2:
        print(f"Fallback also failed: {str(e2)}")
        print("\nPlease try the following:")
        print("1. Restart the runtime")
        print("2. Check your internet connection")
        print("3. Verify your ngrok token")
        print("4. Try a smaller model")
        raise