In [None]:
"""
# Twitter Sentiment Analysis - Evaluation & Visualization Notebook

## 1. Import Libraries
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import (classification_report, confusion_matrix, 
                           accuracy_score, precision_recall_curve, roc_curve)
from sklearn.preprocessing import label_binarize
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import custom visualization classes
import sys
sys.path.append('../src')
from visualization import SentimentVisualizer
from model_visualization import ModelPerformanceVisualizer

"""
## 2. Load Data and Models
"""

# Load data
df = pd.read_csv('../data/processed/cleaned_twitter_data.csv')
X_test = pd.read_csv('../data/processed/test_data.csv')['text']
y_test = pd.read_csv('../data/processed/test_data.csv')['sentiment']

# Load trained models
models = {}
model_files = {
    'Naive_Bayes': '../models/trained_models/Naive_Bayes_model.pkl',
    'Logistic_Regression': '../models/trained_models/Logistic_Regression_model.pkl',
    'Random_Forest': '../models/trained_models/Random_Forest_model.pkl',
    'SVM': '../models/trained_models/SVM_model.pkl'
}

for model_name, file_path in model_files.items():
    try:
        models[model_name] = joblib.load(file_path)
        print(f"Loaded {model_name}")
    except:
        print(f"Could not load {model_name}")

"""
## 3. Model Predictions and Metrics
"""

from src.evaluation import ModelEvaluator

evaluator = ModelEvaluator()

# Generate predictions and calculate metrics
predictions = {}
for model_name, model in models.items():
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None
    
    result = evaluator.calculate_metrics(y_test, y_pred, y_prob, model_name)
    predictions[model_name] = {'y_pred': y_pred, 'y_prob': y_prob}
    
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {result['metrics']['accuracy']:.4f}")
    print(f"F1-Score: {result['metrics']['f1_macro']:.4f}")

"""
## 4. Comprehensive Model Comparison
"""

# Generate comparison report
comparison_df = evaluator.generate_comparison_report()

print("\nModel Comparison Summary:")
print(comparison_df)

# Plot model comparison
evaluator.plot_model_comparison()

"""
## 5. Confusion Matrix Visualization
"""

model_viz = ModelPerformanceVisualizer()
model_viz.plot_confusion_matrix_comparison(
    {name: {'pipeline': model} for name, model in models.items()},
    X_test, y_test
)

"""
## 6. Precision-Recall Comparison
"""

model_viz.plot_precision_recall_comparison(
    {name: {'pipeline': model} for name, model in models.items()},
    X_test, y_test
)

"""
## 7. Data Visualization
"""

# Create comprehensive data visualizations
data_viz = SentimentVisualizer(df)
data_viz.create_comprehensive_dashboard()
data_viz.create_wordclouds_grid()

"""
## 8. Feature Importance Analysis
"""

# Get the best model for feature importance analysis
best_model_name = comparison_df.index[0]
best_model = models[best_model_name]

if hasattr(best_model.named_steps['clf'], 'coef_'):
    print(f"\nFeature Importance Analysis for {best_model_name}")
    
    # Get feature names and coefficients
    feature_names = best_model.named_steps['tfidf'].get_feature_names_out()
    coefficients = best_model.named_steps['clf'].coef_
    
    # Plot feature importance for each class
    sentiment_names = ['Negative', 'Neutral', 'Positive']
    
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    
    for i, sentiment in enumerate([-1, 0, 1]):
        # Get top features for this sentiment
        sentiment_coef = coefficients[i] if len(coefficients) > 1 else coefficients[0]
        top_indices = np.argsort(sentiment_coef)[-10:]
        top_features = feature_names[top_indices]
        top_scores = sentiment_coef[top_indices]
        
        axes[i].barh(range(len(top_features)), top_scores, color=['red', 'blue', 'green'][i])
        axes[i].set_yticks(range(len(top_features)))
        axes[i].set_yticklabels(top_features)
        axes[i].set_title(f'Top Features - {sentiment_names[i]}')
        axes[i].set_xlabel('Coefficient Value')
    
    plt.tight_layout()
    plt.show()

"""
## 9. Error Analysis
"""

# Analyze misclassifications for the best model
best_model_predictions = predictions[best_model_name]['y_pred']
misclassified = X_test[y_test != best_model_predictions]
misclassified_actual = y_test[y_test != best_model_predictions]
misclassified_pred = best_model_predictions[y_test != best_model_predictions]

misclassified_df = pd.DataFrame({
    'text': misclassified,
    'actual': misclassified_actual,
    'predicted': misclassified_pred
})

print(f"\nMisclassification Analysis for {best_model_name}:")
print(f"Total misclassified: {len(misclassified_df)}")
print(f"Misclassification rate: {len(misclassified_df)/len(X_test):.4f}")

print("\nSample misclassifications:")
print(misclassified_df.head(10))

"""
## 10. Interactive Visualizations
"""

# Create interactive dashboard
try:
    interactive_viz = InteractiveVisualizations(df)
    interactive_viz.create_interactive_sentiment_dashboard()
    interactive_viz.create_interactive_word_cloud()
except:
    print("Interactive visualizations require plotly")

"""
## 11. Save Evaluation Results
"""

# Save detailed evaluation results
evaluation_results = {
    'model_comparison': comparison_df.to_dict(),
    'misclassification_analysis': {
        'total_misclassified': len(misclassified_df),
        'misclassification_rate': len(misclassified_df)/len(X_test),
        'sample_misclassifications': misclassified_df.head(20).to_dict('records')
    },
    'best_model': {
        'name': best_model_name,
        'accuracy': comparison_df.loc[best_model_name, 'accuracy'],
        'f1_score': comparison_df.loc[best_model_name, 'f1_macro']
    }
}

import json
with open('../models/model_evaluation/detailed_evaluation.json', 'w') as f:
    json.dump(evaluation_results, f, indent=2)

print("\nEvaluation completed successfully!")
print(f"Best model: {best_model_name}")
print(f"Best accuracy: {comparison_df.loc[best_model_name, 'accuracy']:.4f}")