# Ray Serve - scale deployed models

### Ray serve
Ray serve is a component which makes it easy to spread the serving of an API across several machines. Let's jump into code.

In [None]:
%%writefile simple_api.py

from fastapi import FastAPI
from typing import Dict

app = FastAPI()

@app.get("/status")
def status() -> Dict[str, str]:
    """Simple health check endpoint."""
    return {"status": "ok"}


@app.get("/compute")
def fibonacci(n: int):
    """Compute Fibonacci sequence up to n (inclusive)."""
    if n <= 0:
        return []
    fib = [0, 1]
    while fib[-1] + fib[-2] <= n:
        fib.append(fib[-1] + fib[-2])
    return fib

# fastapi run simple_api.py
# http://localhost:8000/compute?n=10

Normally you run the code above as:

```python
fastapi run simple_apy.py
```

This will run the API on a single machine. 

However, is your startup grows, how do you make sure you can continue to serve clients?

### Let's try to scale this across several machines

If we are not on the same network, use Tailscale to hop on the same vpn.

#### Install Ray

In [None]:
!pip install ray[all]

#### Deploy FastAPI on a cluster (via Ray)

In [None]:
%%writefile simple_api_ray.py

from fastapi import FastAPI
from typing import Dict
from ray import serve
import ray

#ray.init(address="192.168.12.239:10001") 
#ray.init(ignore_reinit_error=True)

app = FastAPI()

@app.get("/status")
def status() -> Dict[str, str]:
    """Simple health check endpoint."""
    return {"status": "ok"}


@app.get("/compute")
def fibonacci(n: int):
    """Compute Fibonacci sequence up to n (inclusive)."""
    if n <= 0:
        return []
    fib = [0, 1]
    while fib[-1] + fib[-2] <= n:
        fib.append(fib[-1] + fib[-2])
    return fib

@serve.deployment
@serve.ingress(app)
class FastAPIWrapper:
    pass

serve.run(FastAPIWrapper.bind(), route_prefix="/")

# python simple_api_ray.py
# http://localhost:8000/compute?n=10

In [None]:
%%writefile simple_api_ray2.py

import requests
from fastapi import FastAPI
from ray import serve

# 1: Define a FastAPI app and wrap it in a deployment with a route handler.
app = FastAPI()


@serve.deployment
@serve.ingress(app)
class FastAPIDeployment:
    # FastAPI will automatically parse the HTTP request for us.
    @app.get("/hello")
    def say_hello(self, name: str) -> str:
        return f"Hello {name}!"


# 2: Deploy the deployment.
serve.run(FastAPIDeployment.bind(), route_prefix="/", )

# 3: Query the deployment and print the result.
# print(requests.get("http://localhost:8000/hello", params={"name": "Theodore"}).json())
# "Hello Theodore!"