# Ray Serve Facebook BART Large CNN Model for Text Summarization

This notebook shows how to use [Ray Serve](../../../charts/machine-learning/training/rayserve/) Helm chart to serve [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn/blob/main/config.json) model for text summarization.

## Setup and Imports

In [None]:
! pip install kubernetes
! pip install boto3

In [None]:
import os
import subprocess
import time
from kubernetes import client, config

# Load Kubernetes configuration
config.load_kube_config()
v1 = client.CoreV1Api()
custom_api = client.CustomObjectsApi()

def get_rayservice_events(rayservice_name:str=None, 
                          namespace: str="kubeflow-user-example-com") -> list:
    try:
        events = v1.list_namespaced_event(namespace=namespace)
        rayservice_events = []
        for event in events.items:
            # Filter for RayService events
            if (event.involved_object.kind == "RayService" and
                event.source.component == "rayservice-controller"):
                
                if event.involved_object.name != rayservice_name:
                    continue
                
                rayservice_events.append(event)
        return rayservice_events

    except client.ApiException as e:
        print(f"Error fetching events: {e}")
        return []
    
def is_running_event(event) -> bool:
    return (
        event.type == "Normal" and
        event.reason == "Running" and
        "running and healthy" in event.message.lower()
    )

def detect_running_events(rayservice_name: str = None, 
                          namespace: str="kubeflow-user-example-com") -> list:
    events = get_rayservice_events(rayservice_name=rayservice_name, namespace=namespace)
    running_events = []
    
    for event in events:
        if is_running_event(event):
            running_events.append(event)
    
    return running_events

def wait_for_rayservice_ready(release_name, namespace='kubeflow-user-example-com', timeout=1800):
    """Wait for RayService to be ready and healthy"""
    print(f"Waiting for RayService '{release_name}' to be ready...")
    start_time = time.time()

    while time.time() - start_time < timeout:
        try:
            # Check RayService status
            rayservices = custom_api.list_namespaced_custom_object(
                group="ray.io",
                version="v1",
                namespace=namespace,
                plural="rayservices"
            )
            
            matching_rayservice = None
            for rs in rayservices['items']:
                if (rs.get('metadata', {}).get('labels', {}).get('app.kubernetes.io/instance') == release_name):
                    matching_rayservice = rs
                    break
            
            if not matching_rayservice:
                print(f"No RayService found for release: {release_name}, waiting...")
                time.sleep(60)
                continue
            
            rayservice_name = matching_rayservice['metadata']['name']
            status = matching_rayservice.get('status', {})
            service_status = status.get('serviceStatus', 'Unknown')
            
            print(f"RayService {rayservice_name}: {service_status}")
            
            # Check if RayService is running
            if service_status.lower() == 'running':
                running_events = []
                while not (running_events := detect_running_events(rayservice_name=rayservice_name)):
                    print("Waiting for RayService event: Running and Healthy")
                    if (time.time() - start_time) > timeout:
                        break
                    time.sleep(60)
                    continue
                
                if running_events:
                    print(f"RayService {rayservice_name} in namespace {namespace} is Running and Healthy!")
                    return True

        except Exception as e:
            print(f"Error checking RayService: {e}")
        
        time.sleep(60)
    
    print(f"Timeout waiting for RayService to be Running and Healthy")
    return False

# Set working directory
os.chdir(os.path.expanduser('~/amazon-eks-machine-learning-with-terraform-and-kubeflow'))
print(f"Working directory: {os.getcwd()}")

## Step 1: Build and Push Docker Container

**Note:** This step builds a custom Docker container for Ray Serve. The region is automatically detected from your AWS configuration.

In [None]:
import sys
import boto3

# Create a Boto3 session
session = boto3.session.Session()

# Access the region_name attribute to get the current region
current_region = session.region_name

cmd = ['./containers/ray-pytorch/build_tools/build_and_push.sh', current_region]

# Start the subprocess with streaming output
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, 
                          text=True, bufsize=1, universal_newlines=True)

# Stream output line by line
for line in process.stdout:
    print(line, end='')  # end='' prevents double newlines
    sys.stdout.flush()   # Force immediate output

# Wait for the process to complete and get the return code
return_code = process.wait()

if return_code != 0:
    print(f"\nProcess exited with return code: {return_code}")
else:
    print("\nProcess completed successfully")

## Step 2: Launch Ray Service

In [None]:
cmd = [
    'helm', 'install', '--debug', 'rayserve-facebook-bart-large-cnn',
    'charts/machine-learning/serving/rayserve/',
    '-f', 'examples/inference/rayserve/facebook-bart-large-cnn/rayservice.yaml',
    '-n', 'kubeflow-user-example-com'
]

result = subprocess.run(cmd, capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr)

In [None]:
# Wait for RayService to be ready
wait_for_rayservice_ready('rayserve-facebook-bart-large-cnn')

## Step 3: Check Service Status

In [None]:
def find_matching_helm_services(release_name, namespace='kubeflow-user-example-com'):
    """Find services managed by a specific Helm release"""
    helm_services = v1.list_namespaced_service(
        namespace=namespace
    )

    matching_services = []
    for service in helm_services.items:
        if (service.metadata.labels and
            service.metadata.labels.get('ray.io/service') == f"rayservice-{release_name}"):
            matching_services.append(service)

    return matching_services

# Check service status
services = find_matching_helm_services('rayserve-facebook-bart-large-cnn')
print(services)

## Step 4: Stop Service

When you're done with the service, run this cell to clean up resources.

In [None]:
cmd = ['helm', 'uninstall', 'rayserve-facebook-bart-large-cnn', '-n', 'kubeflow-user-example-com']
result = subprocess.run(cmd, capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print("STDERR:", result.stderr)