"""
This notebook analyzes chat conversations to identify patterns of user dissatisfaction
and system responses, focusing on conversations where users express increasing frustration
or negative sentiment. The analysis combines sentiment analysis using the Cardiff RoBERTa model
with response similarity detection to identify related topics/tasks, while also tracking system 
apologies and temporal trends across different months. The visualizations highlight monthly 
patterns of problematic conversations, apology rates, and the progression of negative sentiment 
in user interactions.
"""

In [1]:
import os
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
import seaborn as sns
import re
from datetime import datetime
from pathlib import Path
from tqdm import tqdm 
import warnings

warnings.filterwarnings('ignore')

2025-02-14 01:50:22.774108: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-14 01:50:22.776936: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-14 01:50:22.822985: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Set cache directory for Hugging Face

In [2]:
os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser('~/Downloads/huggingface_cache')
os.makedirs(os.path.expanduser('~/Downloads/huggingface_cache'), exist_ok=True)

# Initialize sentiment classifier using Cardiff RoBERTa

In [3]:
# Initialize sentiment classifier using Cardiff RoBERTa
sentiment_classifier = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
    tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest",
    truncation=True,
    max_length=512
)

def analyze_sentiment(text: str) -> dict:
    """Analyze sentiment of text using the classifier."""
    try:
        truncated_text = str(text)[:512] if text else ""
        if not truncated_text.strip():
            return {'label': 'neutral', 'score': 1.0}
        result = sentiment_classifier(truncated_text)[0]
        return {
            'label': result['label'],
            'score': result['score']
        }
    except Exception as e:
        print(f"Error analyzing sentiment: {e}")
        return {'label': 'neutral', 'score': 1.0}

def calculate_response_similarity(responses: list) -> float:
    """Calculate similarity between consecutive responses."""
    # Filter out empty or whitespace-only responses
    valid_responses = [str(r).strip() for r in responses if r and str(r).strip() != ""]
    if len(valid_responses) < 2:
        return 0.0
    vectorizer = TfidfVectorizer()
    try:
        tfidf_matrix = vectorizer.fit_transform(valid_responses)
        similarity_matrix = cosine_similarity(tfidf_matrix)
        return np.mean([similarity_matrix[i, i+1] for i in range(len(similarity_matrix)-1)])
    except Exception as e:
        print(f"Error calculating response similarity: {e}")
        return 0.0

class ConversationEmotionAnalyzer:
    def __init__(self, df: pd.DataFrame):
        """Initialize analyzer with loaded DataFrame."""
        self.df = df
        
        # Initialize EmoRoBERTa using the Hugging Face pipeline.
        self.emotion_classifier = pipeline(
            "text-classification",
            model="arpanghoshal/EmoRoBERTa",
            model_kwargs={"from_tf": True},
            truncation=True,
            max_length=512
        )
        
        # Patterns for identifying apologies
        self.apology_patterns = [
            r'(?i)sorry', r'(?i)apologize', r'(?i)regret',
            r'(?i)apologies', r'(?i)my mistake', r'(?i)incorrect'
        ]
    
    def analyze_emotion(self, text: str) -> dict:
        """Analyze text emotion using EmoRoBERTa."""
        try:
            result = self.emotion_classifier(str(text))[0]
            return {
                'emotion': result['label'],
                'confidence': result['score']
            }
        except Exception as e:
            print(f"Error in emotion analysis: {e}")
            return {'emotion': 'unknown', 'confidence': 0.0}
    
    def calculate_similarity(self, responses: list) -> float:
        """Calculate similarity between consecutive responses."""
        return calculate_response_similarity(responses)
    
    def analyze_conversations(self, similarity_threshold: float = 0.7):
        """
        Analyze conversations for user dissatisfaction patterns.
        Returns a list of problematic conversation threads.
        
        Uses:
          - 'user_prompt' for user messages, and 
          - 'sys_response' for system responses.
        Timestamps are taken from 'prompt_timestamp'.
        """
        print("Starting conversation analysis...")
        # Sort the DataFrame by conversation_id and prompt_timestamp (assumed already in datetime)
        conversations = self.df.sort_values(['conversation_id', 'prompt_timestamp'])
        total_convs = conversations['conversation_id'].nunique()
        print(f"Total conversations to analyze: {total_convs}")
        
        problem_conversations = []
        
        for conv_id, conv_group in tqdm(conversations.groupby('conversation_id'),
                                          total=total_convs, 
                                          desc="Analyzing conversations"):
            # User messages come from 'user_prompt'; system responses come from 'sys_response'
            user_messages = conv_group['user_prompt'].dropna()
            system_responses = conv_group['sys_response'].dropna()
            
            if len(user_messages) < 2:
                continue
            
            sentiments = []
            for msg in user_messages:
                sentiment_result = analyze_sentiment(msg)
                sentiments.append(sentiment_result)
            
            # Check for increase in negative sentiment
            has_increasing_negative = False
            negative_count = sum(1 for s in sentiments if s['label'].lower() == 'negative')
            if len(sentiments) >= 2:
                for i in range(len(sentiments)-1):
                    if sentiments[i]['label'].lower() != 'negative' and sentiments[i+1]['label'].lower() == 'negative':
                        has_increasing_negative = True
                        break
            
            if has_increasing_negative or negative_count > 1:
                response_texts = system_responses.tolist()
                if len(response_texts) >= 2:
                    avg_similarity = self.calculate_similarity(response_texts)
                    
                    # Check for system apologies using common apology keywords
                    contains_apology = any(
                        any(re.search(pattern, str(resp)) for pattern in self.apology_patterns)
                        for resp in response_texts
                    )
                    
                    if avg_similarity >= similarity_threshold:
                        problem_conversations.append({
                            'conversation_id': conv_id,
                            # Use the conversation's timestamp from the 'prompt_timestamp' column
                            'timestamp': conv_group['prompt_timestamp'].iloc[0],
                            'messages': user_messages.tolist(),
                            'responses': response_texts,
                            'sentiments': sentiments,
                            'similarity_score': avg_similarity,
                            'contains_apology': contains_apology,
                            'negative_count': negative_count
                        })
        
        print(f"\nFound {len(problem_conversations)} problematic conversations")
        return problem_conversations

def load_and_preprocess_csv(file_path: str) -> pd.DataFrame:
    """
    Load and preprocess a CSV file.
    Assumes the CSV has columns: conversation_id, user_prompt, sys_response, prompt_timestamp, etc.
    """
    try:
        df = pd.read_csv(file_path)
        print("CSV file loaded successfully.")
        print("\nFirst 10 rows of the data:")
        print(df.head(10))
        return df
    except Exception as e:
        raise ValueError(f"Error loading CSV file: {e}")

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


# Run Analysis

In [4]:
csv_file = "/home/nipr-gpt/data/clean_src/niprgpt_msg_dyads_clean.csv"

print("Loading data from CSV...")
try:
    combined_df = load_and_preprocess_csv(csv_file)
    
    # Convert 'prompt_timestamp' to datetime with timezone handling (UTC)
    combined_df['prompt_timestamp'] = pd.to_datetime(
        combined_df['prompt_timestamp'], errors='coerce', utc=True
    )
    # Create a 'month' column for temporal analysis
    combined_df['month'] = combined_df['prompt_timestamp'].dt.strftime('%Y-%m')
    # Sort by conversation_id and prompt_timestamp
    combined_df = combined_df.sort_values(['conversation_id', 'prompt_timestamp'])
    
    print("\nData loading complete.")
    print(f"Total conversations: {combined_df['conversation_id'].nunique()}")
    print(f"Date range: {combined_df['prompt_timestamp'].min()} to {combined_df['prompt_timestamp'].max()}")
    
except Exception as e:
    print(f"Error in data loading: {e}")
    raise

print("\nAnalyzing conversations for user dissatisfaction...")
analyzer = ConversationEmotionAnalyzer(combined_df)
conversations = analyzer.analyze_conversations()

Loading data from CSV...
CSV file loaded successfully.

First 10 rows of the data:
   conversation_id                             prompt_id  \
0                1  f2923bd2-55ed-40bf-a08a-d7d840310c83   
1                1  0e21dd95-ff1a-4c2a-9a19-d5bb638d4c8e   
2                1  e4d628bb-925b-4193-a721-2f9e297d8c3e   
3                1  1de9702f-445b-4ed2-be23-72d286b72e12   
4                1  fe913ed9-0348-4403-8d2d-d10298595e46   
5                3  896b89c3-f16e-4444-8f74-5f5986610264   
6                3  7c20568d-dad3-4df1-8c89-c46d7e8849e5   
7                3  fb7d704e-1e5a-4dac-ad7f-9b2e3734b0c5   
8                3  4580c376-28b9-44dc-bc93-69bd9883ab96   
9                3  18a57669-8258-43d4-b60a-1bb37517f32c   

                                         user_prompt  \
0  What goes in to a terms of reference for a mee...   
1  1. Purpose: familiarize both labs with researc...   
2  Help write this better: Purpose: familiarize b...   
3  Write these better:\nKnowledg

2025-02-14 01:50:59.664634: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-14 01:50:59.666760: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-02-14 01:50:59.668807: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Starting conversation analysis...
Total conversations to analyze: 169404


Analyzing conversations: 100%|██████████| 169404/169404 [6:18:06<00:00,  7.47it/s]   


Found 1479 problematic conversations





# Monthly Analysis

In [5]:
monthly_stats = {}
for conv in tqdm(conversations, desc="Calculating monthly statistics"):
    month = pd.to_datetime(conv['timestamp']).strftime('%Y-%m')
    if month not in monthly_stats:
        monthly_stats[month] = {
            'total': 0,
            'with_apologies': 0,
            'avg_negative_count': 0,
            'conversations': []
        }
    monthly_stats[month]['total'] += 1
    monthly_stats[month]['with_apologies'] += 1 if conv['contains_apology'] else 0
    monthly_stats[month]['avg_negative_count'] += conv['negative_count']
    monthly_stats[month]['conversations'].append(conv)

print("\nMonthly Analysis:")
for month, stats in sorted(monthly_stats.items()):
    stats['avg_negative_count'] /= stats['total'] if stats['total'] > 0 else 1
    apology_percentage = (stats['with_apologies']/stats['total']*100) if stats['total'] > 0 else 0
    print(f"\n{month}:")
    print(f"Total problematic conversations: {stats['total']}")
    print(f"Conversations with apologies: {stats['with_apologies']} ({apology_percentage:.1f}%)")
    print(f"Average negative messages per conversation: {stats['avg_negative_count']:.2f}")

Calculating monthly statistics: 100%|██████████| 1479/1479 [00:00<00:00, 176925.89it/s]


Monthly Analysis:

2024-03:
Total problematic conversations: 1
Conversations with apologies: 0 (0.0%)
Average negative messages per conversation: 1.00

2024-05:
Total problematic conversations: 66
Conversations with apologies: 21 (31.8%)
Average negative messages per conversation: 1.53

2024-06:
Total problematic conversations: 272
Conversations with apologies: 147 (54.0%)
Average negative messages per conversation: 1.73

2024-07:
Total problematic conversations: 360
Conversations with apologies: 174 (48.3%)
Average negative messages per conversation: 1.64

2024-08:
Total problematic conversations: 663
Conversations with apologies: 304 (45.9%)
Average negative messages per conversation: 1.61

2024-09:
Total problematic conversations: 117
Conversations with apologies: 60 (51.3%)
Average negative messages per conversation: 1.54





# Save all analysis results

In [7]:
import pickle
import json
import os
from pathlib import Path
import numpy as np
import pandas as pd

# Save all analysis results
save_dir = "frustrated_analysis_results"
os.makedirs(save_dir, exist_ok=True)

# Save the complete analysis results
analysis_results = {
    # Conversation analysis results
    'conversations': conversations,
    'monthly_stats': monthly_stats,
    
    # Original DataFrame info
    'combined_df': combined_df.to_dict(),
    
    # Detailed statistics
    'statistics': {
        'total_conversations': len(conversations),
        'monthly_summary': {
            month: {
                'total': stats['total'],
                'with_apologies': stats['with_apologies'],
                'apology_percentage': (stats['with_apologies']/stats['total']*100) if stats['total'] > 0 else 0,
                'avg_negative_count': stats['avg_negative_count'],
                'conversation_details': stats['conversations']  # Add detailed conversation list
            }
            for month, stats in monthly_stats.items()
        }
    },
    
    # Metadata and date information
    'metadata': {
        'total_conversations': len(conversations),
        'date_range': {
            'start': combined_df['prompt_timestamp'].min().strftime('%Y-%m-%d'),
            'end': combined_df['prompt_timestamp'].max().strftime('%Y-%m-%d')
        },
        'total_original_conversations': combined_df['conversation_id'].nunique()
    }
}

# Save to pickle file
with open(f"{save_dir}/full_analysis_results.pkl", "wb") as f:
    pickle.dump(analysis_results, f)

# Save summary statistics as JSON for easy viewing
summary_stats = {
    'total_conversations': len(conversations),
    'date_range': {
        'start': combined_df['prompt_timestamp'].min().strftime('%Y-%m-%d'),
        'end': combined_df['prompt_timestamp'].max().strftime('%Y-%m-%d')
    },
    'monthly_summary': {
        month: {
            'total': stats['total'],
            'with_apologies': stats['with_apologies'],
            'apology_percentage': (stats['with_apologies']/stats['total']*100) if stats['total'] > 0 else 0,
            'avg_negative_count': stats['avg_negative_count']
        }
        for month, stats in monthly_stats.items()
    },
    # Add sentiment distribution
    'sentiment_summary': {
        month: {
            'negative_messages': sum(1 for conv in stats['conversations'] 
                                   for sent in conv['sentiments'] 
                                   if sent['label'].lower() == 'negative'),
            'total_messages': sum(len(conv['sentiments']) for conv in stats['conversations'])
        }
        for month, stats in monthly_stats.items()
    }
}

with open(f"{save_dir}/summary_stats.json", 'w') as f:
    json.dump(summary_stats, f, indent=2)

print(f"\nAll analysis results saved to {save_dir}/")


All analysis results saved to frustrated_analysis_results/


# Enhanced Visualizations

In [None]:
plt.style.use('default')
fig = plt.figure(figsize=(20, 15))

# 1. Monthly Trend of Problematic Conversations
plt.subplot(2, 2, 1)
months = sorted(monthly_stats.keys())
totals = [monthly_stats[m]['total'] for m in months]
plt.plot(months, totals, marker='o')
plt.title('Monthly Trend of Problematic Conversations')
plt.xticks(rotation=45)
plt.grid(True)

# 2. Apology Rate by Month
plt.subplot(2, 2, 2)
apology_rates = [monthly_stats[m]['with_apologies']/monthly_stats[m]['total']*100 
                 if monthly_stats[m]['total'] > 0 else 0 
                 for m in months]
plt.bar(months, apology_rates)
plt.title('Apology Rate by Month (%)')
plt.xticks(rotation=45)
plt.grid(True)

# 3. Average Negative Messages per Conversation
plt.subplot(2, 2, 3)
avg_negative = [monthly_stats[m]['avg_negative_count'] for m in months]
plt.plot(months, avg_negative, marker='o', color='red')
plt.title('Average Negative Messages per Conversation')
plt.xticks(rotation=45)
plt.grid(True)

# 4. Combined Monthly Metrics
plt.subplot(2, 2, 4)
x = np.arange(len(months))
width = 0.35
plt.bar(x - width/2, totals, width, label='Total Problematic')
plt.bar(x + width/2, [monthly_stats[m]['with_apologies'] for m in months], width, label='With Apologies')
plt.title('Monthly Comparison')
plt.xticks(x, months, rotation=45)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Domain and Task Analysis

In [None]:
def analyze_conversation_domains(conversations):
    """Analyze the types of tasks/domains where issues occur most frequently."""
    domains = {
        'coding': ['code', 'programming', 'function', 'error', 'script', 'debug'],
        'writing': ['write', 'essay', 'text', 'document', 'edit', 'revise'],
        'analysis': ['analyze', 'data', 'calculate', 'report', 'statistics'],
        'explanation': ['explain', 'describe', 'define', 'clarify', 'understand'],
        'technical': ['system', 'configure', 'setup', 'install', 'technical'],
        'general': ['help', 'question', 'how to', 'what is']
    }
    
    domain_stats = {domain: {'count': 0, 'avg_negative': 0, 'apology_rate': 0} 
                   for domain in domains}
    
    for conv in conversations:
        full_text = ' '.join(conv['messages']).lower()
        domain_scores = {domain: sum(1 for keyword in keywords if keyword in full_text)
                         for domain, keywords in domains.items()}
        dominant_domain = max(domain_scores.items(), key=lambda x: x[1])[0]
        domain_stats[dominant_domain]['count'] += 1
        domain_stats[dominant_domain]['avg_negative'] += conv['negative_count']
        domain_stats[dominant_domain]['apology_rate'] += 1 if conv['contains_apology'] else 0
    
    for stats in domain_stats.values():
        if stats['count'] > 0:
            stats['avg_negative'] /= stats['count']
            stats['apology_rate'] = (stats['apology_rate'] / stats['count']) * 100
    
    return domain_stats

print("\nAnalyzing conversation domains...")
domain_analysis = analyze_conversation_domains(conversations)

plt.figure(figsize=(15, 10))
plt.subplot(2, 2, 1)
domains_list = list(domain_analysis.keys())
counts = [stats['count'] for stats in domain_analysis.values()]
plt.bar(domains_list, counts)
plt.title('Distribution of Issues Across Domains')
plt.xticks(rotation=45)
plt.subplot(2, 2, 2)
avg_negative = [stats['avg_negative'] for stats in domain_analysis.values()]
plt.bar(domains_list, avg_negative, color='red')
plt.title('Average Negative Messages by Domain')
plt.xticks(rotation=45)
plt.subplot(2, 2, 3)
apology_rates = [stats['apology_rate'] for stats in domain_analysis.values()]
plt.bar(domains_list, apology_rates, color='green')
plt.title('Apology Rate by Domain (%)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("\nDetailed Domain Analysis:")
for domain, stats in domain_analysis.items():
    print(f"\n{domain.upper()}:")
    print(f"Total conversations: {stats['count']}")
    print(f"Average negative messages: {stats['avg_negative']:.2f}")
    print(f"Apology rate: {stats['apology_rate']:.1f}%")

# Response Pattern Analysis

In [None]:
def analyze_response_patterns(conversations):
    """Analyze patterns in system responses to user dissatisfaction."""
    length_patterns = []
    for conv in conversations:
        response_lengths = [len(str(resp)) for resp in conv['responses']]
        length_patterns.append({
            'initial_length': response_lengths[0],
            'final_length': response_lengths[-1],
            'length_change': response_lengths[-1] - response_lengths[0],
            'contains_apology': conv['contains_apology']
        })
    return length_patterns

patterns = analyze_response_patterns(conversations)

plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
length_changes = [p['length_change'] for p in patterns]
plt.hist(length_changes, bins=30)
plt.title('Distribution of Response Length Changes')
plt.xlabel('Change in Response Length')
plt.subplot(1, 2, 2)
apology_changes = [p['length_change'] for p in patterns if p['contains_apology']]
no_apology_changes = [p['length_change'] for p in patterns if not p['contains_apology']]
plt.boxplot([apology_changes, no_apology_changes], labels=['With Apology', 'Without Apology'])
plt.title('Response Length Changes vs Apologies')
plt.ylabel('Change in Response Length')
plt.tight_layout()
plt.show()

# Frustration Trigger Analysis

In [None]:
def analyze_frustration_triggers(conversations):
    """Analyze common triggers that lead to user frustration."""
    print("\nAnalyzing frustration triggers...")
    trigger_patterns = {
        'incorrect_response': r'(?i)(wrong|incorrect|not right|not what I|error|mistake)',
        'repetition': r'(?i)(repeat|again|already said|told you|same thing)',
        'misunderstanding': r'(?i)(not understanding|didnt understand|misunderstand|confused)',
        'incomplete': r'(?i)(incomplete|not finished|partial|missing|left out)',
        'too_vague': r'(?i)(vague|unclear|be specific|more detail|clearer)',
        'off_topic': r'(?i)(off topic|irrelevant|not related|different topic)'
    }
    
    trigger_stats = {trigger: {'count': 0, 'examples': []} for trigger in trigger_patterns}
    
    for conv in tqdm(conversations, desc="Analyzing triggers"):
        for i, msg in enumerate(conv['messages'][1:], 1):  # Start from second message
            for trigger, pattern in trigger_patterns.items():
                if re.search(pattern, str(msg)):
                    trigger_stats[trigger]['count'] += 1
                    if len(trigger_stats[trigger]['examples']) < 3:
                        trigger_stats[trigger]['examples'].append({
                            'user_message': msg,
                            'previous_response': conv['responses'][i-1] if i-1 < len(conv['responses']) else None,
                            'sentiment_score': conv['sentiments'][i]['score']
                        })
    
    total_triggers = sum(stats['count'] for stats in trigger_stats.values())
    trigger_percentages = {
        trigger: (stats['count'] / total_triggers * 100 if total_triggers > 0 else 0)
        for trigger, stats in trigger_stats.items()
    }
    
    plt.figure(figsize=(12, 6))
    triggers = list(trigger_percentages.keys())
    percentages = list(trigger_percentages.values())
    sorted_indices = np.argsort(percentages)[::-1]
    triggers = [triggers[i] for i in sorted_indices]
    percentages = [percentages[i] for i in sorted_indices]
    plt.bar(triggers, percentages)
    plt.title('Distribution of Frustration Triggers')
    plt.xlabel('Trigger Type')
    plt.ylabel('Percentage of Occurrences')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
    
    print("\nDetailed Trigger Analysis:")
    for trigger in triggers:
        print(f"\n{trigger.upper()} ({trigger_stats[trigger]['count']} occurrences, {trigger_percentages[trigger]:.1f}%)")
        if trigger_stats[trigger]['examples']:
            print("Example context:")
            for i, example in enumerate(trigger_stats[trigger]['examples'], 1):
                print(f"\nExample {i}:")
                print(f"User message: {example['user_message'][:200]}...")
                if example['previous_response']:
                    print(f"Previous system response: {example['previous_response'][:200]}...")
                print(f"Sentiment score: {example['sentiment_score']:.2f}")
    
    return trigger_stats, trigger_percentages

trigger_stats, trigger_percentages = analyze_frustration_triggers(conversations)

plt.figure(figsize=(10, 6))
trigger_avg_sentiment = {}
for trigger, stats in trigger_stats.items():
    if stats['examples']:
        avg_sentiment = np.mean([ex['sentiment_score'] for ex in stats['examples']])
        trigger_avg_sentiment[trigger] = avg_sentiment

sorted_triggers = sorted(trigger_avg_sentiment.items(), key=lambda x: x[1])
triggers = [t[0] for t in sorted_triggers]
sentiments = [t[1] for t in sorted_triggers]

plt.barh(triggers, sentiments)
plt.title('Average Sentiment Score by Trigger Type')
plt.xlabel('Average Sentiment Score')
plt.tight_layout()
plt.show()

print("\nKey Insights:")
print(f"Most common trigger: {max(trigger_percentages.items(), key=lambda x: x[1])[0]}")
print(f"Trigger with most negative sentiment: {min(trigger_avg_sentiment.items(), key=lambda x: x[1])[0]}")

# Response Pattern Analysis for Negative Emotions

In [None]:
def analyze_negative_response_patterns(conversations):
    """Analyze system responses that trigger increased negative emotions."""
    print("\nAnalyzing system responses that trigger negative emotions...")
    response_patterns = {
        'generic_response': r'(?i)(I understand|I can help|please provide|I\'ll assist)',
        'uncertainty': r'(?i)(might|maybe|perhaps|not sure|possibly)',
        'deflection': r'(?i)(cannot|unable to|I don\'t|I can\'t)',
        'complexity': r'(?i)(complex|complicated|difficult|advanced)',
        'technical_terms': r'(?i)(technical|functionality|implementation|algorithm)',
        'long_response': lambda x: len(str(x)) > 500,
        'short_response': lambda x: len(str(x)) < 50
    }
    
    pattern_stats = {pattern: {'count': 0, 'avg_next_sentiment': 0.0, 'examples': []} 
                     for pattern in response_patterns}
    
    for conv in tqdm(conversations, desc="Analyzing response patterns"):
        for i in range(len(conv['responses'])-1):
            current_response = conv['responses'][i]
            if i+1 < len(conv['sentiments']):
                next_sentiment = conv['sentiments'][i+1]
                if next_sentiment['label'].lower() == 'negative':
                    for pattern_name, pattern in response_patterns.items():
                        matches = False
                        if callable(pattern):
                            matches = pattern(current_response)
                        else:
                            matches = bool(re.search(pattern, str(current_response)))
                        if matches:
                            pattern_stats[pattern_name]['count'] += 1
                            pattern_stats[pattern_name]['avg_next_sentiment'] += next_sentiment['score']
                            if (next_sentiment['score'] < -0.5 and 
                                len(pattern_stats[pattern_name]['examples']) < 3):
                                pattern_stats[pattern_name]['examples'].append({
                                    'response': current_response,
                                    'next_user_message': conv['messages'][i+1] if i+1 < len(conv['messages']) else None,
                                    'sentiment_score': next_sentiment['score']
                                })
    
    for stats in pattern_stats.values():
        if stats['count'] > 0:
            stats['avg_next_sentiment'] /= stats['count']
    
    plt.figure(figsize=(15, 10))
    plt.subplot(2, 1, 1)
    patterns_list = list(pattern_stats.keys())
    counts = [stats['count'] for stats in pattern_stats.values()]
    sorted_indices = np.argsort(counts)[::-1]
    patterns_list = [patterns_list[i] for i in sorted_indices]
    counts = [counts[i] for i in sorted_indices]
    plt.bar(patterns_list, counts)
    plt.title('Frequency of Response Patterns Leading to Negative Emotions')
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Number of Occurrences')
    plt.subplot(2, 1, 2)
    avg_sentiments = [pattern_stats[p]['avg_next_sentiment'] for p in patterns_list]
    plt.bar(patterns_list, avg_sentiments, color='red')
    plt.title('Average Following Sentiment Score by Response Pattern')
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Average Sentiment Score')
    plt.tight_layout()
    plt.show()
    
    print("\nDetailed Response Pattern Analysis:")
    for pattern in patterns_list:
        stats = pattern_stats[pattern]
        print(f"\n{pattern.upper()} ({stats['count']} occurrences)")
        print(f"Average next sentiment score: {stats['avg_next_sentiment']:.3f}")
        if stats['examples']:
            print("\nExample responses that triggered strong negative reactions:")
            for i, example in enumerate(stats['examples'], 1):
                print(f"\nExample {i}:")
                print(f"System response: {str(example['response'])[:200]}...")
                if example['next_user_message']:
                    print(f"User reaction: {str(example['next_user_message'])[:200]}...")
                print(f"Resulting sentiment score: {example['sentiment_score']:.3f}")
    
    return pattern_stats

response_pattern_stats = analyze_negative_response_patterns(conversations)

print("\nAnalyzing response length correlation with negative emotions...")
lengths = []
sentiments_list = []
for conv in conversations:
    for i in range(len(conv['responses'])-1):
        if i+1 < len(conv['sentiments']):
            lengths.append(len(str(conv['responses'][i])))
            sentiments_list.append(conv['sentiments'][i+1]['score'])
            
plt.figure(figsize=(10, 6))
plt.scatter(lengths, sentiments_list, alpha=0.5)
plt.title('Response Length vs Next Sentiment Score')
plt.xlabel('Response Length (characters)')
plt.ylabel('Next Sentiment Score')
plt.axhline(y=0, color='r', linestyle='--', alpha=0.3)
plt.show()

correlation = np.corrcoef(lengths, sentiments_list)[0,1]
print(f"\nCorrelation between response length and next sentiment: {correlation:.3f}")

print("\nKey Insights:")
print(f"Most common trigger: {max(trigger_percentages.items(), key=lambda x: x[1])[0]}")
# (For trigger with most negative sentiment, ensure you have trigger_avg_sentiment computed as in the previous section.)
if trigger_avg_sentiment:
    print(f"Trigger with most negative sentiment: {min(trigger_avg_sentiment.items(), key=lambda x: x[1])[0]}")