# Text-to-Image AI Service in Google Colab

This notebook sets up and runs a text-to-image generation service using Stable Diffusion with TensorFlow and KerasCV.

## Requirements
- Enable GPU runtime: Runtime → Change runtime type → Hardware accelerator → GPU
- High-RAM runtime recommended for better performance

## Features
- 🎨 Stable Diffusion image generation
- 🚀 FastAPI REST API
- 🌐 Public URL via ngrok
- 📱 Interactive web interface

## Step 1: Check GPU and Setup Environment

In [None]:
# Check GPU availability
!nvidia-smi

# Check Python version
import sys
print(f"Python version: {sys.version}")

# Set up environment variables
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['COLAB_GPU'] = '1'

print("✅ Environment check complete")

## Step 2: Install Dependencies

In [None]:
# Install system dependencies
!apt-get update -qq
!apt-get install -y -qq libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1

print("✅ System dependencies installed")

In [None]:
# Install Python dependencies
!pip install -q tensorflow>=2.15.0
!pip install -q keras-cv>=0.6.0
!pip install -q fastapi>=0.104.0
!pip install -q uvicorn[standard]>=0.24.0
!pip install -q python-multipart>=0.0.6
!pip install -q pillow>=10.0.0
!pip install -q pydantic>=2.5.0
!pip install -q pydantic-settings>=2.1.0
!pip install -q python-dotenv>=1.0.0
!pip install -q structlog>=23.2.0
!pip install -q nest-asyncio
!pip install -q pyngrok

print("✅ Python dependencies installed")

## Step 3: Setup Authentication for ngrok (Optional but Recommended)

In [None]:
# Set up ngrok authentication (optional but recommended for stable URLs)
# Get your auth token from: https://dashboard.ngrok.com/get-started/your-authtoken

from pyngrok import ngrok, conf

# Uncomment and add your ngrok auth token for better reliability:
# ngrok.set_auth_token("YOUR_NGROK_AUTH_TOKEN_HERE")

print("✅ ngrok configured (add auth token for better reliability)")

## Step 4: Create and Run the Text-to-Image Service

In [None]:
import os
import time
import asyncio
import nest_asyncio
from typing import Optional, List
import tensorflow as tf
import keras_cv
import numpy as np
from PIL import Image
import io
import base64
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
import uvicorn
from pyngrok import ngrok
import threading

# Enable nested asyncio for Colab
nest_asyncio.apply()

print("✅ Imports complete")

In [None]:
# Configure TensorFlow for Colab
def setup_tensorflow():
    """Configure TensorFlow for Colab environment."""
    gpus = tf.config.experimental.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            tf.keras.mixed_precision.set_global_policy("mixed_float16")
            print(f"✅ Configured {len(gpus)} GPU(s) with mixed precision")
        except RuntimeError as e:
            print(f"⚠️ GPU setup error: {e}")
    else:
        print("⚠️ No GPU detected, using CPU (will be slow)")
    return len(gpus) > 0

gpu_available = setup_tensorflow()

In [None]:
# Define API models
class ImageRequest(BaseModel):
    prompt: str = Field(..., min_length=1, max_length=500, description="Text description of the image")
    num_steps: int = Field(25, ge=10, le=50, description="Number of diffusion steps (lower = faster)")
    guidance_scale: float = Field(7.5, ge=1.0, le=15.0, description="How closely to follow the prompt")
    seed: Optional[int] = Field(None, ge=0, description="Random seed for reproducible results")

class ImageResponse(BaseModel):
    image_base64: str
    prompt: str
    generation_time: float
    parameters: dict

print("✅ API models defined")

In [None]:
# Create the Stable Diffusion model class
class ColabStableDiffusion:
    """Simplified Stable Diffusion model for Colab."""

    def __init__(self):
        self.model = None
        self.load_model()

    def load_model(self):
        """Load Stable Diffusion model."""
        print("🔄 Loading Stable Diffusion model (this may take a few minutes)...")

        try:
            self.model = keras_cv.models.StableDiffusion(
                img_width=512,
                img_height=512,
                jit_compile=False,  # Disable for Colab compatibility
            )
            print("✅ Stable Diffusion model loaded successfully!")
        except Exception as e:
            print(f"❌ Model loading failed: {e}")
            raise

    def generate_image(self, prompt: str, num_steps: int = 25,
                      guidance_scale: float = 7.5, seed: Optional[int] = None):
        """Generate image from text prompt."""
        if self.model is None:
            raise RuntimeError("Model not loaded")

        if seed is not None:
            tf.random.set_seed(seed)
            np.random.seed(seed)

        try:
            print(f"🎨 Generating image for: {prompt[:50]}...")
            start_time = time.time()

            generated_images = self.model.text_to_image(
                prompt=prompt,
                batch_size=1,
                num_steps=num_steps,
                guidance_scale=guidance_scale,
            )

            # Convert to PIL Image
            img_array = generated_images[0]
            img_array = (img_array + 1.0) * 127.5
            img_array = np.clip(img_array, 0, 255).astype(np.uint8)
            pil_image = Image.fromarray(img_array)

            generation_time = time.time() - start_time
            print(f"✅ Image generated in {generation_time:.1f} seconds")

            return pil_image

        except Exception as e:
            print(f"❌ Generation failed: {e}")
            raise

# Initialize the model (this will take a few minutes)
print("Initializing Stable Diffusion model...")
sd_model = ColabStableDiffusion()

In [None]:
# Create FastAPI application
app = FastAPI(
    title="Text-to-Image AI (Colab)",
    description="Stable Diffusion text-to-image generation running in Google Colab",
    version="1.0.0"
)

@app.post("/generate", response_model=ImageResponse)
async def generate_image_endpoint(request: ImageRequest):
    """Generate image from text prompt."""
    start_time = time.time()

    try:
        # Run generation in thread pool to avoid blocking
        loop = asyncio.get_event_loop()
        pil_image = await loop.run_in_executor(
            None,
            sd_model.generate_image,
            request.prompt,
            request.num_steps,
            request.guidance_scale,
            request.seed,
        )

        # Convert to base64
        buffer = io.BytesIO()
        pil_image.save(buffer, format="PNG")
        img_base64 = base64.b64encode(buffer.getvalue()).decode()

        generation_time = time.time() - start_time

        return ImageResponse(
            image_base64=img_base64,
            prompt=request.prompt,
            generation_time=generation_time,
            parameters={
                "num_steps": request.num_steps,
                "guidance_scale": request.guidance_scale,
                "seed": request.seed
            }
        )

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")

@app.get("/health")
async def health_check():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "model_loaded": sd_model.model is not None,
        "gpu_available": gpu_available,
        "environment": "Google Colab"
    }

@app.get("/")
async def root():
    """Root endpoint with usage instructions."""
    return {
        "message": "Text-to-Image AI Service running in Google Colab",
        "docs_url": "/docs",
        "health_url": "/health",
        "generate_url": "/generate",
        "gpu_available": gpu_available
    }

print("✅ FastAPI application created")

## Step 5: Start the Server with Public URL

In [None]:
# Start the server with ngrok tunnel
def start_server():
    """Start the FastAPI server."""
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print("\n" + "="*60)
print("🚀 TEXT-TO-IMAGE AI SERVICE IS STARTING!")
print("="*60)
print(f"🌐 Public URL: {public_url}")
print(f"📖 API Documentation: {public_url}/docs")
print(f"❤️ Health Check: {public_url}/health")
print(f"🎨 Generate Images: {public_url}/generate")
print("="*60)
print("\n📱 How to use:")
print("1. Click the API Documentation link above")
print("2. Try the /generate endpoint with a text prompt")
print("3. Or use curl/Python requests to call the API")
print("\n⚠️ Note: Keep this cell running to maintain the service")
print("="*60)

# Start the server in a separate thread
server_thread = threading.Thread(target=start_server)
server_thread.daemon = True
server_thread.start()

print("\n✅ Server started! The service is now accessible via the public URL above.")

## Step 6: Test the API (Optional)

In [None]:
# Test the API directly from the notebook
import requests
import json
from IPython.display import Image as IPImage
from io import BytesIO

def test_generation(prompt, num_steps=25):
    """Test image generation directly."""
    print(f"Testing generation with prompt: {prompt}")

    # Make API request
    response = requests.post(
        f"{public_url}/generate",
        json={
            "prompt": prompt,
            "num_steps": num_steps,
            "guidance_scale": 7.5
        }
    )

    if response.status_code == 200:
        result = response.json()
        print(f"✅ Generation completed in {result['generation_time']:.1f} seconds")

        # Decode and display image
        img_data = base64.b64decode(result['image_base64'])
        return IPImage(img_data)
    else:
        print(f"❌ Error: {response.status_code} - {response.text}")
        return None

# Test with a simple prompt
test_image = test_generation("a cute cat sitting in a garden")
if test_image:
    display(test_image)

## Step 7: Keep the Service Running

In [None]:
# Keep the service running
print("🔄 Keeping the service running...")
print(f"🌐 Your service is available at: {public_url}")
print("\n💡 Tips:")
print("- Keep this cell running to maintain the service")
print("- Use the /docs endpoint for interactive API testing")
print("- Lower num_steps (10-25) for faster generation")
print("- Higher guidance_scale (7.5-15) for more prompt adherence")
print("\n⚠️ Note: Colab sessions timeout after ~12 hours of inactivity")

try:
    # Keep the notebook alive
    while True:
        time.sleep(60)
        print(f"⏰ Service running... {time.strftime('%H:%M:%S')}")
except KeyboardInterrupt:
    print("\n🛑 Service stopped by user")
    ngrok.disconnect(public_url)
    print("✅ Cleanup complete")

## Example API Usage

Once your service is running, you can use it from any application:

### Python Example:
```python
import requests
import base64
from PIL import Image
from io import BytesIO

# Replace with your ngrok URL
url = "YOUR_NGROK_URL/generate"

response = requests.post(url, json={
    "prompt": "a beautiful sunset over mountains",
    "num_steps": 25,
    "guidance_scale": 7.5
})

if response.status_code == 200:
    result = response.json()
    img_data = base64.b64decode(result['image_base64'])
    image = Image.open(BytesIO(img_data))
    image.show()
```

### cURL Example:
```bash
curl -X POST "YOUR_NGROK_URL/generate" \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "a futuristic city at night",
    "num_steps": 30,
    "guidance_scale": 8.0
  }'
```