# Lab 2.5.2 Solution: Multi-Pipeline Customer Feedback Analyzer

This notebook provides the complete solution for the "Try It Yourself" exercise in Lab 2.5.2.

**Task**: Create a function that analyzes customer feedback using multiple pipelines:
1. Sentiment analysis
2. Named Entity Recognition
3. Summarization (for long text)

---

In [None]:
import torch
from transformers import pipeline
from typing import Dict, Any, List
import json

DEVICE = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if DEVICE >= 0 else 'CPU'}")

## Solution: Complete Customer Feedback Analyzer

In [None]:
class CustomerFeedbackAnalyzer:
    """
    Comprehensive customer feedback analyzer using multiple HuggingFace pipelines.
    
    Features:
    - Sentiment analysis
    - Named entity extraction
    - Text summarization (for long feedback)
    """
    
    def __init__(self, device: int = -1):
        """
        Initialize all pipelines.
        
        Args:
            device: -1 for CPU, 0+ for GPU
        """
        self.device = device
        
        print("Loading pipelines...")
        
        # Sentiment analysis
        self.sentiment = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english",
            device=device
        )
        print("  [OK] Sentiment analyzer loaded")
        
        # Named Entity Recognition
        self.ner = pipeline(
            "ner",
            model="dslim/bert-base-NER",
            aggregation_strategy="simple",
            device=device
        )
        print("  [OK] NER loaded")
        
        # Summarization
        self.summarizer = pipeline(
            "summarization",
            model="facebook/bart-large-cnn",
            device=device,
            torch_dtype=torch.bfloat16 if device >= 0 else torch.float32
        )
        print("  [OK] Summarizer loaded")
        
        print("\nAll pipelines ready!")
    
    def analyze(self, text: str, summarize_threshold: int = 100) -> Dict[str, Any]:
        """
        Analyze customer feedback.
        
        Args:
            text: Customer feedback text
            summarize_threshold: Word count threshold for summarization
            
        Returns:
            Dictionary with analysis results
        """
        result = {
            "original_text": text,
            "word_count": len(text.split()),
            "sentiment": None,
            "entities": [],
            "summary": None
        }
        
        # 1. Sentiment Analysis
        sentiment_result = self.sentiment(text)[0]
        result["sentiment"] = {
            "label": sentiment_result["label"],
            "score": round(sentiment_result["score"], 4),
            "is_positive": sentiment_result["label"] == "POSITIVE"
        }
        
        # 2. Named Entity Recognition
        entities = self.ner(text)
        result["entities"] = [
            {
                "text": ent["word"],
                "type": ent["entity_group"],
                "confidence": round(ent["score"], 4)
            }
            for ent in entities
        ]
        
        # 3. Summarization (if text is long enough)
        if result["word_count"] > summarize_threshold:
            summary = self.summarizer(
                text,
                max_length=60,
                min_length=20,
                do_sample=False
            )[0]["summary_text"]
            result["summary"] = summary
        else:
            result["summary"] = "(Text too short for summarization)"
        
        return result
    
    def format_report(self, analysis: Dict[str, Any]) -> str:
        """
        Format analysis results as a readable report.
        """
        report = []
        report.append("=" * 60)
        report.append("CUSTOMER FEEDBACK ANALYSIS REPORT")
        report.append("=" * 60)
        
        # Original text
        report.append(f"\nFEEDBACK ({analysis['word_count']} words):")
        report.append("-" * 40)
        text_preview = analysis['original_text'][:200]
        if len(analysis['original_text']) > 200:
            text_preview += "..."
        report.append(text_preview)
        
        # Sentiment
        report.append(f"\nSENTIMENT:")
        report.append("-" * 40)
        sentiment = analysis['sentiment']
        emoji = "" if sentiment['is_positive'] else ""
        report.append(f"  {emoji} {sentiment['label']} (confidence: {sentiment['score']:.1%})")
        
        # Entities
        report.append(f"\nENTITIES FOUND:")
        report.append("-" * 40)
        if analysis['entities']:
            for ent in analysis['entities']:
                report.append(f"  - {ent['type']:12}: '{ent['text']}' ({ent['confidence']:.1%})")
        else:
            report.append("  (No entities detected)")
        
        # Summary
        report.append(f"\nSUMMARY:")
        report.append("-" * 40)
        report.append(f"  {analysis['summary']}")
        
        report.append("\n" + "=" * 60)
        
        return "\n".join(report)
    
    def cleanup(self):
        """Free GPU memory."""
        del self.sentiment, self.ner, self.summarizer
        import gc
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

## Testing the Analyzer

In [None]:
# Initialize analyzer
analyzer = CustomerFeedbackAnalyzer(device=DEVICE)

In [None]:
# Test with sample feedback
sample_feedback = """
I bought the iPhone 15 Pro from Apple Store in San Francisco last week. 
The sales representative John was incredibly helpful and knowledgeable. 
However, I'm disappointed with the battery life - it barely lasts a full day 
with normal usage. The camera quality is excellent though, especially for 
low-light photography. I've contacted Apple Support and they suggested a 
software update might help. Overall, it's a mixed experience but I'm hopeful 
the issues will be resolved. Would recommend waiting for the next update 
before purchasing.
"""

# Analyze
analysis = analyzer.analyze(sample_feedback)

# Print formatted report
print(analyzer.format_report(analysis))

In [None]:
# Test with more samples
test_feedbacks = [
    "Terrible experience with Amazon delivery. My package arrived damaged and customer service was unhelpful. Never ordering again!",
    "Microsoft Teams has been fantastic for our remote work. The integration with Office 365 is seamless. Our team in Seattle loves it!",
    "Met with Dr. Johnson at Mayo Clinic yesterday. The care was exceptional and the diagnosis was thorough. Highly recommend!"
]

print("\nAnalyzing multiple feedbacks...")
print("=" * 60)

for i, feedback in enumerate(test_feedbacks, 1):
    print(f"\n--- Feedback {i} ---")
    analysis = analyzer.analyze(feedback)
    
    sentiment = analysis['sentiment']
    emoji = "" if sentiment['is_positive'] else ""
    
    print(f"Sentiment: {emoji} {sentiment['label']} ({sentiment['score']:.1%})")
    print(f"Entities: {[e['text'] for e in analysis['entities']]}")

In [None]:
# Output as JSON for API usage
print("\nJSON Output (for API integration):")
print("-" * 60)
analysis = analyzer.analyze(sample_feedback)
# Remove original text for cleaner JSON
json_output = {k: v for k, v in analysis.items() if k != 'original_text'}
print(json.dumps(json_output, indent=2))

In [None]:
# Cleanup
analyzer.cleanup()
print("\nAnalyzer cleanup complete!")

---

## Alternative: Simple Function Version

If you prefer a simpler function without a class:

In [None]:
def analyze_customer_feedback(text: str) -> Dict[str, Any]:
    """
    Simple function to analyze customer feedback.
    
    Note: This creates pipelines each time - not efficient for batch processing.
    Use the class version for production.
    """
    # Create pipelines
    sentiment_pipe = pipeline("sentiment-analysis", device=DEVICE)
    ner_pipe = pipeline("ner", aggregation_strategy="simple", device=DEVICE)
    
    result = {
        "original_text": text,
        "sentiment": None,
        "entities": [],
        "summary": None
    }
    
    # Sentiment
    sent = sentiment_pipe(text)[0]
    result["sentiment"] = {
        "label": sent["label"],
        "score": sent["score"]
    }
    
    # Entities
    entities = ner_pipe(text)
    result["entities"] = [
        {"text": e["word"], "type": e["entity_group"]}
        for e in entities
    ]
    
    # Summarization only if long
    if len(text.split()) > 100:
        summarizer = pipeline("summarization", device=DEVICE)
        result["summary"] = summarizer(text, max_length=60)[0]["summary_text"]
        del summarizer
    
    # Cleanup
    del sentiment_pipe, ner_pipe
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    return result

# Test
# result = analyze_customer_feedback("Great product from Apple!")
# print(result)