# Natural Language Understanding: Semantic Parsing & Advanced Sentiment Analysis

This notebook explores advanced NLU techniques including semantic parsing, structured data extraction, and multi-dimensional sentiment analysis using state-of-the-art transformer models.

In [None]:
# Install required packages
!pip install transformers torch pandas matplotlib seaborn plotly wordcloud
!pip install sentence-transformers

In [None]:
# Import required libraries
import warnings
warnings.filterwarnings('ignore')

from transformers import pipeline, AutoTokenizer, AutoModel
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from collections import Counter
import re
import json

# Set style for better visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")

## Semantic Parsing

Semantic parsing converts natural language into structured representations that machines can understand and execute. This includes generating SQL queries, API calls, logical forms, and structured data extraction.

### 1. Text-to-SQL Generation

Converting natural language queries into SQL statements.

In [None]:
# Initialize text-to-SQL pipeline
sql_generator = pipeline('text2text-generation', model='t5-small')

# Natural language to SQL examples
nl_queries = [
    "Show me all customers from New York",
    "Find products with price greater than 100 dollars",
    "Get the total sales for each month in 2023",
    "List employees who joined after January 2022",
    "Count the number of orders per customer"
]

print("🗃️  Text-to-SQL Generation")
print("=" * 60)

def generate_sql(nl_query, context="Database with tables: customers, products, orders, employees"):
    """Generate SQL from natural language query"""
    prompt = f"Translate to SQL: {nl_query}. Context: {context}"
    result = sql_generator(prompt, max_length=100, num_return_sequences=1)
    return result[0]['generated_text']

for i, query in enumerate(nl_queries, 1):
    sql_result = generate_sql(query)
    print(f"{i}. Natural Language: {query}")
    print(f"   💾 Generated SQL: {sql_result}")
    print()

# Custom query example
print("🔧 Custom Query:")
custom_query = "Show me the average salary of employees in the engineering department"
custom_sql = generate_sql(custom_query)
print(f"📝 Query: {custom_query}")
print(f"💾 SQL: {custom_sql}")

### 2. Structured Data Extraction

Extracting structured information from unstructured text.

In [None]:
# Initialize structured extraction pipeline
structure_generator = pipeline('text2text-generation', model='t5-small')

# Sample unstructured texts
unstructured_texts = [
    "John Smith, age 35, lives at 123 Main Street, New York, NY. He works as a Software Engineer at Tech Corp and earns $120,000 annually.",
    "The iPhone 14 Pro costs $999 and was released on September 16, 2022. It features a 6.1-inch display and comes in four colors: Space Black, Silver, Gold, and Deep Purple.",
    "Flight AA123 departs from JFK Airport at 3:45 PM on July 15th, 2024, arriving at LAX at 7:20 PM. The flight duration is 5 hours and 35 minutes."
]

print("📊 Structured Data Extraction")
print("=" * 60)

def extract_structured_data(text, schema="JSON"):
    """Extract structured data from unstructured text"""
    prompt = f"Extract structured data as {schema}: {text}"
    result = structure_generator(prompt, max_length=150, num_return_sequences=1)
    return result[0]['generated_text']

for i, text in enumerate(unstructured_texts, 1):
    print(f"{i}. Original Text:")
    print(f"   📝 {text}")
    
    # Extract as JSON
    json_structure = extract_structured_data(text, "JSON")
    print(f"   📊 Structured JSON: {json_structure}")
    
    # Extract as key-value pairs
    kv_structure = extract_structured_data(text, "key-value pairs")
    print(f"   🔑 Key-Value: {kv_structure}")
    print()

# Custom extraction
print("🎯 Custom Extraction Example:")
custom_text = "The quarterly sales report shows that Region A sold 1,500 units generating $450,000 revenue, while Region B sold 2,200 units with $660,000 revenue during Q3 2023."
custom_extraction = extract_structured_data(custom_text, "table format")
print(f"📝 Text: {custom_text}")
print(f"📋 Table: {custom_extraction}")

## Advanced Sentiment Analysis

Going beyond basic positive/negative sentiment to explore multi-dimensional sentiment analysis, aspect-based sentiment, and emotion classification.

### 1. Multi-Dimensional Sentiment Analysis

Analyzing sentiment across multiple dimensions: polarity, subjectivity, and emotional intensity.

In [None]:
# Initialize multiple sentiment analysis models
basic_sentiment = pipeline('sentiment-analysis', model='cardiffnlp/twitter-roberta-base-sentiment-latest')
emotion_classifier = pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')
intensity_classifier = pipeline('text-classification', model='SamLowe/roberta-base-go_emotions')

# Sample texts with varying sentiment complexity
complex_texts = [
    "The movie had amazing visuals, but the plot was completely disappointing.",  # Mixed sentiment
    "I'm absolutely thrilled about the new job opportunity, though I'm nervous about the challenges.",  # Mixed emotions
    "The restaurant service was terrible, but the food was surprisingly excellent.",  # Contrasting aspects
    "This product is okay, nothing special but does what it's supposed to do.",  # Neutral with nuance
    "I'm devastated that the concert was cancelled, but I understand the safety concerns."  # Complex emotional state
]

print("🎭 Multi-Dimensional Sentiment Analysis")
print("=" * 70)

def analyze_complex_sentiment(text):
    """Perform comprehensive sentiment analysis"""
    # Basic sentiment
    basic_result = basic_sentiment(text)[0]
    
    # Emotion detection
    emotion_result = emotion_classifier(text)[0]
    
    # Intensity/emotion spectrum
    intensity_result = intensity_classifier(text)[0]
    
    return {
        'basic_sentiment': basic_result,
        'emotion': emotion_result,
        'intensity': intensity_result
    }

for i, text in enumerate(complex_texts, 1):
    results = analyze_complex_sentiment(text)
    
    print(f"{i}. Text: {text}")
    print(f"   🎯 Basic Sentiment: {results['basic_sentiment']['label']} (confidence: {results['basic_sentiment']['score']:.4f})")
    print(f"   😊 Emotion: {results['emotion']['label']} (confidence: {results['emotion']['score']:.4f})")
    print(f"   🌡️  Intensity: {results['intensity']['label']} (confidence: {results['intensity']['score']:.4f})")
    print()

# Visualization of sentiment dimensions
sentiments = []
emotions = []
intensities = []

for text in complex_texts:
    results = analyze_complex_sentiment(text)
    sentiments.append(results['basic_sentiment']['label'])
    emotions.append(results['emotion']['label'])
    intensities.append(results['intensity']['label'])

# Create a summary dataframe
df = pd.DataFrame({
    'Text': [f"Text {i+1}" for i in range(len(complex_texts))],
    'Sentiment': sentiments,
    'Emotion': emotions,
    'Intensity': intensities
})

print("📊 Sentiment Analysis Summary:")
print(df.to_string(index=False))

### 2. Aspect-Based Sentiment Analysis

Analyzing sentiment toward specific aspects or features mentioned in the text.

In [None]:
# Aspect-based sentiment analysis using custom prompting
def extract_aspects_and_sentiment(text):
    """Extract aspects and their associated sentiments"""
    # Use text generation to identify aspects
    aspect_prompt = f"Extract the main aspects or features mentioned in this review: {text}"
    aspects_result = structure_generator(aspect_prompt, max_length=50)
    
    # Analyze sentiment for each identified aspect
    aspects_text = aspects_result[0]['generated_text']
    aspects = [aspect.strip() for aspect in aspects_text.split(',') if aspect.strip()]
    
    aspect_sentiments = {}
    for aspect in aspects:
        # Analyze sentiment specifically for this aspect
        sentiment_result = basic_sentiment(f"Regarding {aspect}: {text}")[0]
        aspect_sentiments[aspect] = sentiment_result
    
    return aspect_sentiments

# Sample reviews with multiple aspects
reviews = [
    "The hotel room was spacious and clean, but the service was slow and the food was mediocre.",
    "I love the camera quality on this phone, but the battery life is terrible and it's too expensive.",
    "The movie had brilliant acting and cinematography, though the storyline was confusing and too long.",
    "The restaurant has amazing atmosphere and delicious desserts, but the main courses were overpriced."
]

print("🎯 Aspect-Based Sentiment Analysis")
print("=" * 60)

all_aspect_results = []

for i, review in enumerate(reviews, 1):
    print(f"{i}. Review: {review}")
    
    aspect_sentiments = extract_aspects_and_sentiment(review)
    all_aspect_results.append(aspect_sentiments)
    
    print("   📋 Aspect Sentiments:")
    for aspect, sentiment in aspect_sentiments.items():
        emoji = "👍" if sentiment['label'] == 'POSITIVE' else "👎" if sentiment['label'] == 'NEGATIVE' else "😐"
        print(f"     {emoji} {aspect}: {sentiment['label']} (confidence: {sentiment['score']:.4f})")
    print()

# Create aspect sentiment heatmap data
aspect_df_data = []
for i, aspects in enumerate(all_aspect_results):
    for aspect, sentiment in aspects.items():
        score = sentiment['score'] if sentiment['label'] == 'POSITIVE' else -sentiment['score']
        aspect_df_data.append({
            'Review': f'Review {i+1}',
            'Aspect': aspect,
            'Sentiment_Score': score,
            'Label': sentiment['label']
        })

aspect_df = pd.DataFrame(aspect_df_data)
print("📊 Aspect Sentiment Summary:")
if not aspect_df.empty:
    print(aspect_df.groupby(['Review', 'Label']).size().unstack(fill_value=0))

## Interactive Visualizations

Creating interactive dashboards to explore sentiment and semantic parsing results.

In [None]:
# Create comprehensive interactive visualizations
def create_sentiment_dashboard():
    """Create an interactive sentiment analysis dashboard"""
    
    # Prepare data for visualization
    all_sentiments = []
    all_emotions = []
    all_confidences = []
    text_labels = []
    
    for i, text in enumerate(complex_texts):
        results = analyze_complex_sentiment(text)
        all_sentiments.append(results['basic_sentiment']['label'])
        all_emotions.append(results['emotion']['label'])
        all_confidences.append(results['basic_sentiment']['score'])
        text_labels.append(f"Text {i+1}")
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Sentiment Distribution', 'Emotion Distribution', 
                       'Confidence Scores', 'Sentiment vs Emotion'),
        specs=[[{'type': 'pie'}, {'type': 'bar'}],
               [{'type': 'scatter'}, {'type': 'bar'}]]
    )
    
    # 1. Sentiment Distribution (Pie Chart)
    sentiment_counts = Counter(all_sentiments)
    fig.add_trace(
        go.Pie(labels=list(sentiment_counts.keys()), 
               values=list(sentiment_counts.values()),
               name="Sentiment"),
        row=1, col=1
    )
    
    # 2. Emotion Distribution (Bar Chart)
    emotion_counts = Counter(all_emotions)
    fig.add_trace(
        go.Bar(x=list(emotion_counts.keys()), 
               y=list(emotion_counts.values()),
               name="Emotions",
               marker_color='lightblue'),
        row=1, col=2
    )
    
    # 3. Confidence Scores (Scatter Plot)
    fig.add_trace(
        go.Scatter(x=text_labels, 
                   y=all_confidences,
                   mode='markers+lines',
                   name="Confidence",
                   marker=dict(size=10, color='orange')),
        row=2, col=1
    )
    
    # 4. Sentiment vs Emotion Heatmap-style
    sentiment_emotion_df = pd.DataFrame({
        'Sentiment': all_sentiments,
        'Emotion': all_emotions
    })
    cross_tab = pd.crosstab(sentiment_emotion_df['Sentiment'], sentiment_emotion_df['Emotion'])
    
    fig.add_trace(
        go.Bar(x=cross_tab.columns,
               y=cross_tab.sum(),
               name="Sentiment-Emotion Cross",
               marker_color='lightgreen'),
        row=2, col=2
    )
    
    fig.update_layout(
        title_text="🎭 Comprehensive Sentiment Analysis Dashboard",
        title_x=0.5,
        height=800,
        showlegend=False
    )
    
    fig.show()

# Create the dashboard
create_sentiment_dashboard()

# Additional static visualizations
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# 1. Sentiment Timeline
sentiments_for_plot = []
for text in complex_texts:
    result = basic_sentiment(text)[0]
    score = result['score'] if result['label'] == 'POSITIVE' else -result['score']
    sentiments_for_plot.append(score)

ax1.plot(range(1, len(sentiments_for_plot) + 1), sentiments_for_plot, 
         marker='o', linewidth=2, markersize=8)
ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.7)
ax1.set_title('📈 Sentiment Score Timeline', fontsize=14, fontweight='bold')
ax1.set_xlabel('Text Number')
ax1.set_ylabel('Sentiment Score')
ax1.grid(True, alpha=0.3)

# 2. Emotion Intensity Heatmap
emotion_data = []
for text in complex_texts:
    result = emotion_classifier(text)[0]
    emotion_data.append(result['score'])

ax2.bar(range(1, len(emotion_data) + 1), emotion_data, color='skyblue', alpha=0.7)
ax2.set_title('😊 Emotion Intensity Distribution', fontsize=14, fontweight='bold')
ax2.set_xlabel('Text Number')
ax2.set_ylabel('Emotion Confidence')

# 3. Confidence Distribution
confidence_data = []
for text in complex_texts:
    result = basic_sentiment(text)[0]
    confidence_data.append(result['score'])

ax3.hist(confidence_data, bins=5, color='lightcoral', alpha=0.7, edgecolor='black')
ax3.set_title('📊 Confidence Score Distribution', fontsize=14, fontweight='bold')
ax3.set_xlabel('Confidence Score')
ax3.set_ylabel('Frequency')

# 4. Aspect Sentiment Radar (if aspect data exists)
if not aspect_df.empty:
    # Create a simple aspect summary
    aspects = aspect_df['Aspect'].unique()[:5]  # Top 5 aspects
    aspect_scores = []
    for aspect in aspects:
        avg_score = aspect_df[aspect_df['Aspect'] == aspect]['Sentiment_Score'].mean()
        aspect_scores.append(avg_score)
    
    ax4.bar(range(len(aspects)), aspect_scores, color='lightgreen', alpha=0.7)
    ax4.set_title('🎯 Average Aspect Sentiments', fontsize=14, fontweight='bold')
    ax4.set_xticks(range(len(aspects)))
    ax4.set_xticklabels(aspects, rotation=45, ha='right')
    ax4.set_ylabel('Average Sentiment Score')
else:
    ax4.text(0.5, 0.5, 'No Aspect Data Available', 
             transform=ax4.transAxes, ha='center', va='center',
             fontsize=12, style='italic')
    ax4.set_title('🎯 Aspect Analysis', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

print("📈 Visualization Summary:")
print(f"• Analyzed {len(complex_texts)} complex texts")
print(f"• Average sentiment confidence: {np.mean([basic_sentiment(text)[0]['score'] for text in complex_texts]):.4f}")
print(f"• Most common emotion: {Counter([emotion_classifier(text)[0]['label'] for text in complex_texts]).most_common(1)[0][0]}")
print(f"• Sentiment distribution: {dict(Counter([basic_sentiment(text)[0]['label'] for text in complex_texts]))}")

## Comprehensive NLU Pipeline

Combining semantic parsing and advanced sentiment analysis into a unified pipeline.

In [None]:
def comprehensive_nlu_pipeline(text, task_type="auto"):
    """
    Complete NLU pipeline combining semantic parsing and sentiment analysis
    
    Args:
        text (str): Input text to analyze
        task_type (str): Type of analysis - "sentiment", "semantic", "auto"
    """
    print("🔍 Comprehensive NLU Pipeline Analysis")
    print("=" * 70)
    print(f"📝 Input: {text}")
    print(f"🎯 Task Type: {task_type}")
    print()
    
    results = {}
    
    # Determine task type automatically if needed
    if task_type == "auto":
        # Simple heuristics to determine primary task
        if any(word in text.lower() for word in ['sql', 'query', 'database', 'table', 'select']):
            task_type = "semantic"
        elif any(word in text.lower() for word in ['feel', 'think', 'love', 'hate', 'good', 'bad']):
            task_type = "sentiment"
        else:
            task_type = "both"
    
    # Semantic Parsing
    if task_type in ["semantic", "both"]:
        print("🗃️  Semantic Parsing Results:")
        
        # Try SQL generation
        if any(word in text.lower() for word in ['show', 'find', 'get', 'list', 'count']):
            sql_result = generate_sql(text)
            results['sql'] = sql_result
            print(f"   💾 SQL: {sql_result}")
        
        # Try structured extraction
        structured_result = extract_structured_data(text)
        results['structured_data'] = structured_result
        print(f"   📊 Structured: {structured_result}")
        print()
    
    # Sentiment Analysis
    if task_type in ["sentiment", "both"]:
        print("🎭 Sentiment Analysis Results:")
        
        # Multi-dimensional sentiment
        sentiment_results = analyze_complex_sentiment(text)
        results['sentiment'] = sentiment_results
        
        print(f"   😊 Basic: {sentiment_results['basic_sentiment']['label']} ({sentiment_results['basic_sentiment']['score']:.4f})")
        print(f"   💫 Emotion: {sentiment_results['emotion']['label']} ({sentiment_results['emotion']['score']:.4f})")
        print(f"   🌡️  Intensity: {sentiment_results['intensity']['label']} ({sentiment_results['intensity']['score']:.4f})")
        
        # Aspect-based sentiment if applicable
        if len(text.split()) > 10:  # Only for longer texts
            aspect_results = extract_aspects_and_sentiment(text)
            results['aspects'] = aspect_results
            print("   🎯 Aspects:")
            for aspect, sentiment in aspect_results.items():
                emoji = "👍" if sentiment['label'] == 'POSITIVE' else "👎" if sentiment['label'] == 'NEGATIVE' else "😐"
                print(f"     {emoji} {aspect}: {sentiment['label']} ({sentiment['score']:.4f})")
        print()
    
    # Text Statistics
    words = text.split()
    sentences = len([s for s in text.split('.') if s.strip()])
    print("📊 Text Statistics:")
    print(f"   • Words: {len(words)}")
    print(f"   • Characters: {len(text)}")
    print(f"   • Sentences: {sentences}")
    print(f"   • Avg words per sentence: {len(words)/max(sentences, 1):.1f}")
    
    results['stats'] = {
        'words': len(words),
        'characters': len(text),
        'sentences': sentences
    }
    
    print("\n" + "="*70 + "\n")
    
    return results

# Test the comprehensive pipeline
test_cases = [
    # Semantic parsing examples
    "Show me all customers who made purchases greater than $500 in the last month",
    "Extract the key information: John Doe, 30 years old, Software Engineer at Google, lives in California",
    
    # Sentiment analysis examples
    "I absolutely love this new smartphone! The camera is amazing but the battery life could be better.",
    "The hotel stay was disappointing - dirty rooms and terrible service, though the location was convenient.",
    
    # Mixed examples
    "Find all employees with positive performance reviews and high customer satisfaction ratings",
    "I'm excited about the new job opportunity at Microsoft, but worried about relocating to Seattle."
]

for i, test_case in enumerate(test_cases, 1):
    print(f"🧪 Test Case {i}")
    result = comprehensive_nlu_pipeline(test_case, task_type="auto")

## Interactive Experimentation

Try your own examples with the comprehensive NLU pipeline! Modify the variables below to test different scenarios.

In [None]:
# 🧪 Interactive Experimentation Area
# Modify these variables to test your own examples!

# Your custom text input
user_input = "I need to find all customers from New York who gave us 5-star ratings, but I'm concerned about the recent complaints regarding delivery times."

# Task type: "semantic", "sentiment", "both", or "auto"
analysis_type = "auto"

print("🧪 Your Custom NLU Analysis")
print("="*50)

# Run the comprehensive analysis
custom_result = comprehensive_nlu_pipeline(user_input, task_type=analysis_type)

# Performance metrics
print("⚡ Performance Insights:")
if 'sentiment' in custom_result:
    sentiment_confidence = custom_result['sentiment']['basic_sentiment']['score']
    print(f"   • Sentiment confidence: {sentiment_confidence:.4f}")
    
    if sentiment_confidence > 0.8:
        print("   • High confidence sentiment prediction ✅")
    elif sentiment_confidence > 0.6:
        print("   • Moderate confidence sentiment prediction ⚠️")
    else:
        print("   • Low confidence sentiment prediction ❌")

if 'stats' in custom_result:
    complexity_score = custom_result['stats']['words'] / max(custom_result['stats']['sentences'], 1)
    print(f"   • Text complexity (words/sentence): {complexity_score:.1f}")
    
    if complexity_score > 20:
        print("   • High complexity text 🔥")
    elif complexity_score > 10:
        print("   • Medium complexity text 📊")
    else:
        print("   • Simple text structure 📝")

# Suggestions for improvement
print("\n💡 Experiment Suggestions:")
print("   • Try different task types: 'semantic', 'sentiment', 'both'")
print("   • Test with longer texts for aspect-based sentiment analysis")
print("   • Use domain-specific language (SQL, reviews, technical docs)")
print("   • Mix positive and negative sentiments in the same text")
print("   • Include structured data like names, dates, and numbers")

# Quick test examples you can copy-paste
print("\n📋 Quick Test Examples (copy and paste into user_input):")
quick_examples = [
    "Generate SQL to show top 10 best-selling products with customer reviews above 4 stars",
    "The new restaurant has amazing pasta and great atmosphere, but the service is incredibly slow and prices are too high",
    "Extract contact info: Dr. Sarah Johnson, cardiologist at Mayo Clinic, phone: 555-123-4567, email: s.johnson@mayo.edu",
    "I'm thrilled about the promotion but nervous about the increased responsibilities and longer hours",
    "Find all employees hired after 2020 with performance ratings excellent or outstanding"
]

for i, example in enumerate(quick_examples, 1):
    print(f"   {i}. {example}")

print(f"\n🎯 Current Analysis Summary:")
print(f"   • Input length: {len(user_input)} characters")
print(f"   • Analysis type: {analysis_type}")
print(f"   • Components analyzed: {', '.join(custom_result.keys())}")

# Create a simple visualization for the current result
if 'sentiment' in custom_result:
    sentiment_data = custom_result['sentiment']
    
    # Simple bar chart of sentiment components
    components = ['Basic Sentiment', 'Emotion', 'Intensity']
    scores = [
        sentiment_data['basic_sentiment']['score'],
        sentiment_data['emotion']['score'],
        sentiment_data['intensity']['score']
    ]
    labels = [
        sentiment_data['basic_sentiment']['label'],
        sentiment_data['emotion']['label'],
        sentiment_data['intensity']['label']
    ]
    
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))
    bars = ax.bar(components, scores, color=['lightblue', 'lightgreen', 'lightcoral'], alpha=0.7)
    
    # Add labels on bars
    for i, (bar, label) in enumerate(zip(bars, labels)):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{label}\n({height:.3f})', ha='center', va='bottom', fontweight='bold')
    
    ax.set_title('🎭 Sentiment Analysis Components for Your Input', fontsize=14, fontweight='bold')
    ax.set_ylabel('Confidence Score')
    ax.set_ylim(0, 1.1)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

print("\n✨ Ready for your next experiment! Modify 'user_input' and 'analysis_type' above and re-run this cell.")