# Demo: API Interaction with Titan Takeoff Server


Hardware: Each Model is running with 2 GPUs on a 4xL4 Machine

1. Llama 8B (Text Generation)
2. InternVL (Image Reasoning Model)

In [None]:
import requests
import json

url = "http://rag-demo:3001/reader_groups"

response = requests.request("GET", url, headers=headers, data=payload)

parsed_data = json.loads(response.text)

for reader, details in parsed_data.items():
    if not "reader" in reader:
        print(f"Reader: {reader}")
        for detail in details:
            print(f"  Device: {detail['device']}")
            print(f"  Model Name: {detail['model_name']}")
            print(f"  Model Type: {detail['model_type']}")
            print(f"  Ready: {detail['ready']}")
            print("-" * 40)  # Separator for readability


In [None]:
from IPython.display import Image, display

display(Image(filename="reader.png", width=400, height=500))

## Model Example: Llama 8B (Text Generation Model)

Input: Text \
Output: Text

### Interacting with Takeoff Client Generate Stream Endpoint

In [None]:
'''def generate_stream(text: Union[str, List[str]],
                    sampling_temperature: float = None,
                    sampling_topp: float = None,
                    sampling_topk: int = None,
                    repetition_penalty: float = None,
                    no_repeat_ngram_size: int = None,
                    max_new_tokens: int = None,
                    min_new_tokens: int = None,
                    regex_string: str = None,
                    json_schema: dict = None,
                    prompt_max_tokens: int = None,
                    consumer_group: str = "primary",
                    image_path: Optional[Path] = None) -> Iterator[Event]

                    '''

from takeoff_client import TakeoffClient

client = TakeoffClient(base_url="http://rag-demo")

generator = client.generate_stream('What is Generative AI?',
sampling_temperature=0.1, consumer_group="llama-8b", max_new_tokens=250)
for event in generator:
    print(event.data, end='')

### OpenAI Compatiable Endpoints for Seamless Integration with other frameworks

In [None]:
from openai import OpenAI

client = OpenAI(
    base_url="http://rag-demo:3003/v1",
    api_key="no api needed"
)

chat_completion = client.chat.completions.create(
    model="llama-8b", # should be consumer group in takeoff
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is Generative AI?"},
    ],
    stream=False,
)
print(chat_completion.choices[0].text)

In [None]:
from openai import OpenAI

client = OpenAI(
    base_url="http://rag-demo:3003/v1",
    api_key="no api needed"
)

chat_completion = client.chat.completions.create(
    model="llama-8b", # should be consumer group in takeoff
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is Digital Pathology?"},
    ],
    stream=False,
    max_tokens=250,
)
print(chat_completion.choices[0].text)

## Model Example #2 : internvl-4b (Image Reasoning Model)

Input: Image, Text (Prompt) \
Output: Text

Example #1:

In [None]:
from takeoff_client import TakeoffClient
from pathlib import Path

file_path = Path("image1.jpg")

client = TakeoffClient(base_url="http://rag-demo")

generator = client.generate_stream(' <image> Describe what you see in the image',
sampling_temperature=0.1, consumer_group="internvl-4b", max_new_tokens=250, image_path=file_path)
for event in generator:
    print(event.data, end='')

In [None]:
from IPython.display import Image, display

display(Image(filename="image1.jpg", width=200, height=300))

Example #2:

In [None]:
from IPython.display import Image, display

display(Image(filename="image2.jpg", width=200, height=100))

In [None]:
from takeoff_client import TakeoffClient
from pathlib import Path

file_path = Path("image2.jpg")

client = TakeoffClient(base_url="http://rag-demo")

generator = client.generate_stream(' <image> Describe the injury you see in the image',
sampling_temperature=0.1, consumer_group="internvl-4b", max_new_tokens=300, image_path=file_path)
for event in generator:
    print(event.data, end='')

In [None]:
from IPython.display import Image, display

display(Image(filename="image3.jpg", width=200, height=300))

In [None]:
from takeoff_client import TakeoffClient
from pathlib import Path

file_path = Path("image3.jpg")

client = TakeoffClient(base_url="http://rag-demo")

generator = client.generate_stream(' <image> Describe anything you see in the image',
sampling_temperature=0.1, consumer_group="internvl-4b", max_new_tokens=250, image_path=file_path)
for event in generator:
    print(event.data, end='')

## Metric API via Prometheus

In [None]:
from prometheus_api_client import PrometheusConnect

prom = PrometheusConnect(url = "http://rag-demo:9090", disable_ssl=True)

# Get the list of all the metrics that the Prometheus host scrapes
print(f"Total Metric Count: {len(prom.all_metrics())}")
prom.all_metrics()

In [None]:
from prometheus_api_client import PrometheusConnect, MetricRangeDataFrame
import matplotlib as plt
from datetime import datetime, timedelta

prom = PrometheusConnect(url = "http://rag-demo:9090", disable_ssl=True)

end_time = datetime.now()
start_time = end_time - timedelta(days=1)

metric_data = prom.get_metric_range_data(
    metric_name="http_requests_total",
    start_time=start_time,
    end_time=end_time,
)

df = MetricRangeDataFrame(metric_data)

In [None]:
df

In [None]:
import matplotlib.pyplot as plt

# Step 1: Filter the DataFrame for relevant paths
generate_paths = df[df['path'].str.contains('/generate', case=False)]
image_generate_paths = df[df['path'].str.contains('/image_generate', case=False)]

# Step 2: Group by time or another relevant column (if available)
# Assuming you have a 'timestamp' column, group by it and sum the counts
generate_grouped = generate_paths.groupby('timestamp')['value'].sum()
image_generate_grouped = image_generate_paths.groupby('timestamp')['value'].sum()

# Step 3: Plot the data
plt.figure(figsize=(10, 6))

# Plot the generate data
plt.plot(generate_grouped.index, generate_grouped.values, label='Generate', color='blue')

# Plot the image_generate data
plt.plot(image_generate_grouped.index, image_generate_grouped.values, label='Image Generate', color='green')

# Step 4: Add labels, title, and legend
plt.title("Generate vs Image Generate Requests Over Time")
plt.xlabel("Time")
plt.ylabel("Request Count")
plt.legend()
plt.grid(True)

# Show the plot
plt.show()