# NLP Pipeline Quick Start Guide

This notebook demonstrates how to use the NLP Pipeline for sentiment analysis and entity extraction.

In [None]:
# Import required modules
import sys
sys.path.append('..')

import asyncio
from src.pipeline import NLPPipeline
from src.models import SentimentAnalyzer, EntityExtractor
from src.preprocessing import TextCleaner
from src.postprocessing import Visualizer

## 1. Basic Sentiment Analysis

In [None]:
# Initialize sentiment analyzer
sentiment_analyzer = SentimentAnalyzer()

# Example texts
texts = [
    "I absolutely love this product! It's amazing and works perfectly.",
    "The service was terrible. I'm very disappointed.",
    "It's okay, nothing special but does the job."
]

# Analyze sentiments
for text in texts:
    result = sentiment_analyzer.predict(text)
    print(f"Text: {text}")
    print(f"Sentiment: {result['sentiment']} (confidence: {result['confidence']:.2f})")
    print("-" * 80)

## 2. Entity Extraction

In [None]:
# Initialize entity extractor
entity_extractor = EntityExtractor()

# Example text with entities
text = """Apple Inc. announced today that Tim Cook will be visiting 
the new office in San Francisco next Monday. The company plans 
to invest $1 billion in AI research."""

# Extract entities
entities = entity_extractor.extract_entities(text)

print("Extracted Entities:")
for entity in entities:
    print(f"- {entity['text']} ({entity['label']}): {entity.get('description', '')}")

## 3. Using the Full Pipeline

In [None]:
# Initialize the pipeline
pipeline = NLPPipeline()

# Process documents
documents = [
    {
        "document_id": "doc001",
        "text": "Microsoft's Azure cloud platform is fantastic for enterprise solutions."
    },
    {
        "document_id": "doc002",
        "text": "The customer support from Amazon was unhelpful and slow."
    },
    {
        "document_id": "doc003",
        "text": "Google's new AI model performs reasonably well in most tasks."
    }
]

# Process batch asynchronously
results = await pipeline.process_batch(documents)

# Display results
for result in results:
    print(f"\nDocument: {result.document_id}")
    print(f"Text: {result.text}")
    print(f"Sentiment: {result.sentiment} (confidence: {result.sentiment_confidence:.2f})")
    print(f"Entities: {[e['text'] + ' (' + e['label'] + ')' for e in result.entities]}")
    print(f"Processing time: {result.processing_time:.3f}s")

## 4. Batch Processing and Visualization

In [None]:
# Process a larger batch
large_batch = [
    "This product exceeded my expectations! Highly recommend.",
    "Worst purchase ever. Complete waste of money.",
    "It's decent for the price.",
    "Absolutely brilliant! Best investment I've made.",
    "Not impressed. There are better alternatives.",
    "Average product with average results.",
    "Outstanding quality and excellent customer service!",
    "Disappointed with the quality. Would not buy again."
]

# Process batch
results = await pipeline.process_batch(large_batch)

# Prepare data for visualization
viz_data = [
    {
        'sentiment': r.sentiment,
        'confidence': r.sentiment_confidence,
        'text': r.text[:50] + '...' if len(r.text) > 50 else r.text
    }
    for r in results
]

# Create visualizer
visualizer = Visualizer()

# Plot sentiment distribution
fig = visualizer.plot_sentiment_distribution(viz_data)
fig.show()

## 5. Performance Metrics

In [None]:
# Get pipeline statistics
stats = pipeline.get_stats()

print("Pipeline Performance Metrics:")
print(f"- Documents processed: {stats['documents_processed']}")
print(f"- Average processing time: {stats['average_processing_time']:.3f}s")
print(f"- Throughput: {stats['throughput']:.1f} docs/second")

# Aggregated results
agg = stats['aggregated_results']
print("\nAggregated Results:")
print(f"- Total documents: {agg['total_documents']}")
print(f"- Sentiment distribution: {agg['sentiment_distribution']}")
print(f"- Total entities: {agg['total_entities']}")

# Shutdown pipeline
await pipeline.shutdown()

## 6. Text Preprocessing Examples

In [None]:
# Initialize text cleaner
cleaner = TextCleaner()

# Example messy text
messy_text = """
Check out this AMAZING product!!! 😍😍😍 
Visit https://example.com for more info... 
Contact us at: support@example.com #BestProduct #AI #NLP
"""

# Clean text
clean_text = cleaner.clean(messy_text)

print("Original text:")
print(messy_text)
print("\nCleaned text:")
print(clean_text)

## 7. Advanced Configuration

In [None]:
# Create custom configuration
from src.utils.config import Config

# Initialize with custom settings
custom_config = Config()
custom_config.model.batch_size = 64
custom_config.model.max_sequence_length = 256
custom_config.processing.num_workers = 8

# Save configuration
custom_config.save_to_file('custom_config.yaml')

# Initialize pipeline with custom config
custom_pipeline = NLPPipeline('custom_config.yaml')

print("Custom pipeline configuration loaded successfully!")