# Lab 2.4.2 Solutions: Pipeline Showcase

This notebook contains solutions to the exercises in the Pipeline Showcase notebook.

In [None]:
# Setup
from transformers import pipeline
import torch
import warnings
warnings.filterwarnings('ignore')

device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")

## Exercise Solution: Customer Support Bot

Build a simple customer support analyzer that:
1. Detects if the customer is angry (sentiment)
2. Extracts product names mentioned (NER)
3. Generates a helpful response (generation)

In [None]:
def analyze_support_ticket(ticket_text: str) -> dict:
    """
    Analyze a customer support ticket.
    
    Args:
        ticket_text: The customer's message
        
    Returns:
        Dictionary with sentiment, entities, and generated response
    """
    results = {
        'ticket': ticket_text,
        'sentiment': None,
        'entities': [],
        'suggested_response': None
    }
    
    # 1. Sentiment Analysis
    sentiment_pipe = pipeline(
        "sentiment-analysis",
        model="distilbert-base-uncased-finetuned-sst-2-english",
        device=device
    )
    sentiment_result = sentiment_pipe(ticket_text)[0]
    results['sentiment'] = {
        'label': sentiment_result['label'],
        'score': sentiment_result['score']
    }
    
    # Cleanup
    del sentiment_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    # 2. Named Entity Recognition
    ner_pipe = pipeline(
        "ner",
        model="dslim/bert-base-NER",
        aggregation_strategy="simple",
        device=device
    )
    entities = ner_pipe(ticket_text)
    results['entities'] = [
        {'text': e['word'], 'type': e['entity_group'], 'confidence': e['score']}
        for e in entities
    ]
    
    # Cleanup
    del ner_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    # 3. Generate Response based on sentiment
    gen_pipe = pipeline(
        "text-generation",
        model="distilgpt2",
        device=device
    )
    
    if results['sentiment']['label'] == 'NEGATIVE':
        prompt = "Dear valued customer, we sincerely apologize for the inconvenience. We understand your frustration and"
    else:
        prompt = "Dear valued customer, thank you for reaching out! We're delighted to hear from you and"
    
    response = gen_pipe(
        prompt,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        pad_token_id=gen_pipe.tokenizer.eos_token_id
    )
    results['suggested_response'] = response[0]['generated_text']
    
    # Cleanup
    del gen_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    return results

In [None]:
# Test with sample tickets
sample_tickets = [
    "I've been waiting 3 weeks for my MacBook to arrive! This is unacceptable!",
    "Just wanted to say your customer service team was amazing. Thanks!",
    "The iPhone screen is cracked and it's only been a week since I bought it."
]

print("CUSTOMER SUPPORT TICKET ANALYSIS")
print("=" * 70)

for i, ticket in enumerate(sample_tickets, 1):
    print(f"\n--- Ticket {i} ---")
    result = analyze_support_ticket(ticket)
    
    print(f"Ticket: {result['ticket']}")
    print(f"\nSentiment: {result['sentiment']['label']} ({result['sentiment']['score']:.2%})")
    
    print(f"\nEntities found:")
    if result['entities']:
        for e in result['entities']:
            print(f"  - {e['text']} ({e['type']}, {e['confidence']:.2%})")
    else:
        print("  No entities detected")
    
    print(f"\nSuggested Response:")
    print(f"  {result['suggested_response'][:200]}...")
    print("-" * 70)

## Challenge Solution: Content Moderation System

Create a content moderation pipeline that:
1. Detects toxic language
2. Extracts mentioned entities
3. Summarizes the content for human review
4. Generates a moderation decision

In [None]:
def moderate_content(text: str) -> dict:
    """
    Analyze content for moderation.
    
    Args:
        text: Content to moderate
        
    Returns:
        Dictionary with moderation analysis
    """
    results = {
        'original_text': text,
        'toxicity': None,
        'entities': [],
        'summary': None,
        'decision': None,
        'reason': None
    }
    
    # 1. Toxicity Detection (using sentiment as proxy - in production use toxicity model)
    # Note: For real moderation, use models like 'unitary/toxic-bert'
    sentiment_pipe = pipeline(
        "sentiment-analysis",
        model="distilbert-base-uncased-finetuned-sst-2-english",
        device=device
    )
    sentiment = sentiment_pipe(text)[0]
    # In production, use actual toxicity scores
    results['toxicity'] = {
        'is_negative': sentiment['label'] == 'NEGATIVE',
        'confidence': sentiment['score']
    }
    del sentiment_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    # 2. Entity Extraction
    ner_pipe = pipeline("ner", aggregation_strategy="simple", device=device)
    entities = ner_pipe(text)
    results['entities'] = [
        {'text': e['word'], 'type': e['entity_group']}
        for e in entities
    ]
    del ner_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    # 3. Summarization (for longer texts)
    if len(text.split()) > 30:
        summarizer = pipeline(
            "summarization",
            model="sshleifer/distilbart-cnn-12-6",
            device=device
        )
        summary = summarizer(text, max_length=50, min_length=10)[0]
        results['summary'] = summary['summary_text']
        del summarizer
        torch.cuda.empty_cache() if torch.cuda.is_available() else None
    else:
        results['summary'] = text
    
    # 4. Decision Logic
    if results['toxicity']['is_negative'] and results['toxicity']['confidence'] > 0.9:
        results['decision'] = 'FLAG_FOR_REVIEW'
        results['reason'] = 'High confidence negative sentiment detected'
    elif results['toxicity']['is_negative'] and results['toxicity']['confidence'] > 0.7:
        results['decision'] = 'REVIEW_RECOMMENDED'
        results['reason'] = 'Moderate negative sentiment detected'
    else:
        results['decision'] = 'APPROVE'
        results['reason'] = 'Content appears acceptable'
    
    # Check for sensitive entities (e.g., if people are mentioned in negative context)
    person_entities = [e for e in results['entities'] if e['type'] == 'PER']
    if person_entities and results['toxicity']['is_negative']:
        results['decision'] = 'FLAG_FOR_REVIEW'
        results['reason'] += f' - Names mentioned: {", ".join([e["text"] for e in person_entities])}'
    
    return results

In [None]:
# Test content moderation
test_content = [
    "This product is amazing! Best purchase I've ever made.",
    "John Smith is the worst manager. He should be fired immediately.",
    "The weather is nice today and I enjoyed my walk in Central Park."
]

print("CONTENT MODERATION RESULTS")
print("=" * 70)

for content in test_content:
    result = moderate_content(content)
    
    print(f"\nContent: {content[:60]}..." if len(content) > 60 else f"\nContent: {content}")
    print(f"Decision: {result['decision']}")
    print(f"Reason: {result['reason']}")
    print(f"Entities: {[e['text'] for e in result['entities']]}")
    print("-" * 50)

## Summary

In this solution notebook, we demonstrated:

1. **Customer Support Bot** that:
   - Analyzes sentiment to detect customer mood
   - Extracts product/company mentions using NER
   - Generates appropriate responses based on sentiment

2. **Content Moderation System** that:
   - Detects potentially problematic content
   - Identifies mentioned entities
   - Provides moderation decisions with reasoning

Key learnings:
- Chain multiple pipelines for complex tasks
- Clean up GPU memory between pipelines
- Use appropriate models for each subtask
- Combine ML outputs with business logic for decisions

In [None]:
# Cleanup
import gc
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
print("Cleanup complete!")