# Testing Ray Serve NLP Deployments

This notebook demonstrates how to deploy and test the three NLP models using Ray Serve.

In [1]:
%load_ext autoreload
%autoreload 2

import ray
from ray import serve
import time
import asyncio

# Import the deployment classes
from example_app.serve.deployments.sentiment import SentimentAnalyzer
from example_app.serve.deployments.classification import TextClassifier
from example_app.serve.deployments.entities import EntityRecognizer
from example_app.serve.deployments.streaming_analyzer import StreamingAnalyzer

# Import config
from example_app.config import RAY_ADDRESS

## Initialize Ray and Ray Serve

First, we need to initialize Ray and start Ray Serve.

In [None]:
# Initialize Ray (if not already initialized)
if not ray.is_initialized():
    ray.init(address=RAY_ADDRESS, namespace="nlp_pipeline")
    print("Ray initialized")
else:
    print("Ray already initialized")
    
# Start Ray Serve
serve.start()
print("Ray Serve started")

## Deploy the Models

Now we'll deploy each of the three NLP models.

In [None]:
serve.run_many([
    serve.RunTarget(
        name="sentiment_analyzer",
        target=SentimentAnalyzer.options(name="sentiment_analyzer").bind(),
        route_prefix="/sentiment"
    ),
    serve.RunTarget(
        name="text_classifier",
        target=TextClassifier.options(name="text_classifier").bind(),
        route_prefix="/text_classifier"
    ),
    serve.RunTarget(
        name="entity_recognizer",
        target=EntityRecognizer.options(name="entity_recognizer").bind(),
        route_prefix="/entity_recognizer"
    ),
    serve.RunTarget(
        name="streaming_analyzer",
        target=StreamingAnalyzer.options(name="streaming_analyzer").bind(),
        route_prefix="/streaming_analyzer"
    )
])

# Wait a moment for deployments to initialize
time.sleep(3)

## Get Deployment Handles

Next, we'll get handles to each deployment to make remote calls.

In [None]:
from ray.serve import get_deployment_handle

# Get handles to the deployments
sentiment_analyzer = get_deployment_handle("sentiment_analyzer", app_name="sentiment_analyzer")
text_classifier = get_deployment_handle("text_classifier", app_name="text_classifier")
entity_recognizer = get_deployment_handle("entity_recognizer", app_name="entity_recognizer")
streaming_analyzer = get_deployment_handle("streaming_analyzer", app_name="streaming_analyzer").options(stream=True)

print("Got handles to all deployments")

## Test SentimentAnalyzer

Let's test the sentiment analyzer with some example sentences.

In [None]:
async def test_sentiment():
    sample_texts = [
        "I really enjoyed this movie, it was fantastic!",
        "The service was terrible and the food was cold.",
        "This product is okay, not great but not bad either."
    ]
    
    print("\n=== Testing Sentiment Analysis ===\n")
    for text in sample_texts:
        result = await sentiment_analyzer.analyze.remote(text)
        print(f"Text: {text}")
        print(f"Sentiment: {result['sentiment']} (Score: {result['score']:.4f})")
        print(f"Processing time: {result['processing_time']:.4f} seconds\n")

await test_sentiment()

## Test TextClassifier

Now let's test the zero-shot text classifier with some examples.

In [None]:
async def test_classification():
    sample_texts = [
        "The iPhone 13 Pro Max has an amazing camera system and excellent battery life.",
        "Scientists have discovered a new species of deep-sea fish that can glow in the dark.",
        "The company reported a 15% increase in quarterly revenue, exceeding analyst expectations."
    ]
    
    sample_labels = [
        ["technology", "sports", "politics", "entertainment"],
        ["science", "arts", "business", "health"],
        ["finance", "technology", "politics", "environment"]
    ]
    
    print("\n=== Testing Text Classification ===\n")
    for i, text in enumerate(sample_texts):
        labels = sample_labels[i]
        result = await text_classifier.classify.remote(text, labels)
        
        print(f"Text: {text}")
        print(f"Labels: {labels}")
        print("Results:")
        for label_info in result["labels"]:
            print(f"  - {label_info['label']}: {label_info['score']:.4f}")
        print(f"Processing time: {result['processing_time']:.4f} seconds\n")

await test_classification()

## Test EntityRecognizer

Finally, let's test the named entity recognition model.

In [None]:
async def test_entity_recognition():
    sample_texts = [
        "Apple CEO Tim Cook announced the new iPhone at their headquarters in Cupertino, California yesterday.",
        "The European Union and the United States are working on new regulations for artificial intelligence.",
        "Microsoft's CEO Satya Nadella will be speaking at the conference in New York on Friday."
    ]
    
    print("\n=== Testing Entity Recognition ===\n")
    for text in sample_texts:
        result = await entity_recognizer.recognize_entities.remote(text)
        
        print(f"Text: {text}")
        print("Entities:")
        
        # Group entities by type
        entities_by_type = {}
        for entity in result["entities"]:
            entity_type = entity["type"]
            if entity_type not in entities_by_type:
                entities_by_type[entity_type] = []
            entities_by_type[entity_type].append(entity)
        
        # Print entities by type
        for entity_type, entities in entities_by_type.items():
            print(f"  {entity_type}:")
            for entity in entities:
                print(f"    - {entity['text']} (Score: {entity['score']:.4f})")
                
        print(f"Processing time: {result['processing_time']:.4f} seconds\n")

await test_entity_recognition()


## Test StreamingAnalyzer

Now let's test the streaming analyzer that processes text incrementally.

In [None]:

async def test_streaming_analyzer():
    sample_text = """The Apple conference in San Francisco was attended by Tim Cook, who announced new products. 
    Microsoft and Google were also represented at the event. 
    The new iPhone impressed many analysts with its innovative features. 
    The event took place at the Moscone Center in downtown San Francisco. 
    Investors reacted positively to the announcements, with Apple stock rising 3% after the conference."""
    
    print("\n=== Testing Streaming Analysis ===\n")
    print("Full text to analyze:\n")
    print(sample_text)
    print("\nStreaming results (processing chunk by chunk):\n")
    
    # Process text in streaming mode
    async for chunk_result in streaming_analyzer.stream_analysis.remote(sample_text):
        print(f"\nChunk {chunk_result['chunk_id'] + 1}/{chunk_result['total_chunks']}:")
        print(f"Text: {chunk_result['chunk_text']}")
        
        if 'sentiment' in chunk_result:
            print(f"Sentiment: {chunk_result['sentiment']} (Score: {chunk_result['sentiment_score']:.4f})")
        
        if 'entities' in chunk_result and len(chunk_result['entities']) > 0:
            print("Entities:")
            for entity in chunk_result['entities']:
                print(f"  - {entity['word']} ({entity['entity']}) - Score: {entity['score']:.4f}")
        
        print(f"Processing time: {chunk_result['processing_time']:.4f} seconds")
        print(f"Progress: {chunk_result['progress'] * 100:.1f}%")

await test_streaming_analyzer()


## Test Document Analysis

Let's also test the non-streaming document analysis that processes text incrementally but returns a single result.

In [None]:
async def test_document_analysis():
    sample_text = """The Apple conference in San Francisco was attended by Tim Cook, who announced new products. 
    Microsoft and Google were also represented at the event. 
    The new iPhone impressed many analysts with its innovative features. 
    The event took place at the Moscone Center in downtown San Francisco. 
    Investors reacted positively to the announcements, with Apple stock rising 3% after the conference."""
    
    print("\n=== Testing Document Analysis ===\n")
    print("Full text to analyze:\n")
    print(sample_text)
    
    # Process the entire document
    result = await streaming_analyzer.analyze_document.remote(sample_text)
    
    print("\nDocument analysis results:\n")
    print(f"Text length: {result['text_length']} characters")
    print(f"Chunk count: {result['chunk_count']} chunks")
    
    if 'sentiment' in result:
        print(f"\nOverall sentiment: {result['sentiment']['overall']}")
        print(f"Average sentiment score: {result['sentiment']['average_score']:.4f}")
        print(f"Sentiment by chunk: {result['sentiment']['sentiment_by_chunk']}")
    
    if 'entities' in result:
        print(f"\nTotal entities: {result['entities']['count']}")
        print("Entities by type:")
        for entity_type, count in result['entities']['by_type'].items():
            print(f"  - {entity_type}: {count}")
        
        print("\nAll entities:")
        for entity in result['entities']['all_entities'][:10]:  # Show first 10 entities only
            print(f"  - {entity['word']} ({entity['entity']}) - Score: {entity['score']:.4f}")
        
        if len(result['entities']['all_entities']) > 10:
            print(f"  ... and {len(result['entities']['all_entities']) - 10} more entities")

await test_document_analysis() 

## Performance Benchmarking

Let's run a simple benchmark to measure the throughput of each model.

In [None]:
async def benchmark_models(num_requests=10):
    print("\n=== Performance Benchmarking ===\n")
    
    # Test data
    sentiment_text = "This product exceeded my expectations in every way possible!"
    classification_text = "The president announced new economic policies yesterday."
    classification_labels = ["politics", "economics", "sports", "technology"]
    entity_text = "Google and Microsoft are competing in the cloud services market in Europe."
    
    # Benchmark SentimentAnalyzer
    print(f"Benchmarking SentimentAnalyzer with {num_requests} requests...")
    start_time = time.time()
    
    tasks = [sentiment_analyzer.analyze.remote(sentiment_text) for _ in range(num_requests)]
    results = await asyncio.gather(*tasks)
    
    total_time = time.time() - start_time
    print(f"Total time: {total_time:.4f} seconds")
    print(f"Average time per request: {total_time/num_requests:.4f} seconds")
    print(f"Throughput: {num_requests/total_time:.2f} requests/second\n")
    
    # Benchmark TextClassifier
    print(f"Benchmarking TextClassifier with {num_requests} requests...")
    start_time = time.time()
    
    tasks = [text_classifier.classify.remote(classification_text, classification_labels) for _ in range(num_requests)]
    results = await asyncio.gather(*tasks)
    
    total_time = time.time() - start_time
    print(f"Total time: {total_time:.4f} seconds")
    print(f"Average time per request: {total_time/num_requests:.4f} seconds")
    print(f"Throughput: {num_requests/total_time:.2f} requests/second\n")
    
    # Benchmark EntityRecognizer
    print(f"Benchmarking EntityRecognizer with {num_requests} requests...")
    start_time = time.time()
    
    tasks = [entity_recognizer.recognize_entities.remote(entity_text) for _ in range(num_requests)]
    results = await asyncio.gather(*tasks)
    
    total_time = time.time() - start_time
    print(f"Total time: {total_time:.4f} seconds")
    print(f"Average time per request: {total_time/num_requests:.4f} seconds")
    print(f"Throughput: {num_requests/total_time:.2f} requests/second\n")

# Run benchmark with 10 requests per model
await benchmark_models(10)

## Cleanup

Optionally, we can shut down Ray Serve when we're done testing.

In [None]:
# Uncomment to shut down Ray Serve
# serve.shutdown()
# print("Ray Serve shut down")

print("All tests completed!")