In [None]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import os
import sys

# Import the pipeline class to reuse feature engineering logic
# We add 'src' to path to ensure we can import it regardless of where this script is run
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
try:
    from src.csat_pipelining import CSATPredictor
except ImportError:
    # Fallback: try importing if we are already in root
    from src.csat_pipelining import CSATPredictor

def evaluate_model():
    print("------------------------------------------------")
    print("      DeepCSAT Model Evaluation")
    print("------------------------------------------------")

    # 1. Robust Path Finding
    # This logic searches for files whether you run from root, src, or notebooks folder
    filename = "eCommerce_Customer_support_data.csv"
    model_name = "csat_model.pkl"
    
    # Places to look for the 'data' and 'models' folders
    search_roots = [
        os.getcwd(),                                  # Current CLI location
        os.path.dirname(os.path.abspath(__file__)),   # Script location
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Parent of script
    ]
    
    DATA_PATH = None
    MODEL_PATH = None
    
    for root in search_roots:
        # Check for data
        possible_data = os.path.join(root, "data", filename)
        if os.path.exists(possible_data):
            DATA_PATH = possible_data
            
        # Check for model
        possible_model = os.path.join(root, "models", model_name)
        if os.path.exists(possible_model):
            MODEL_PATH = possible_model
            
    # Set defaults for error printing if still not found
    if DATA_PATH is None: DATA_PATH = os.path.join("data", filename)
    if MODEL_PATH is None: MODEL_PATH = os.path.join("models", model_name)
    PLOT_DIR = "plots"

    # 2. Load Data & Model
    if not os.path.exists(DATA_PATH):
        print(f"‚ùå Error: Data file not found.")
        print(f"   Searched in: {DATA_PATH}")
        print("   Please ensure 'eCommerce_Customer_support_data.csv' is in a 'data' folder.")
        return

    if not os.path.exists(MODEL_PATH):
        print(f"‚ùå Error: Model file not found.")
        print(f"   Searched in: {MODEL_PATH}")
        print("   Please run 'python main.py' to train the model first.")
        return

    print(f"Loading data from: {DATA_PATH}")
    df_raw = pd.read_csv(DATA_PATH)
    
    print(f"Loading model from: {MODEL_PATH}")
    model = joblib.load(MODEL_PATH)

    # 3. Preprocessing (Feature Engineering)
    # We initialize the predictor just to access its feature_engineering method
    # independent of its internal state
    temp_predictor = CSATPredictor(DATA_PATH)
    print("Applying feature engineering...")
    df_processed = temp_predictor.feature_engineering(df_raw.copy())

    # Drop rows where target is missing (same as training)
    df_processed = df_processed.dropna(subset=['CSAT Score'])

    X = df_processed
    y = df_processed['CSAT Score']

    # 4. Split Data
    # MUST use the same random_state as training to ensure the 'test' set is actually unseen
    print("Splitting data (Test Size: 20%)...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # 5. Generate Predictions
    print("Predicting on test set...")
    y_pred = model.predict(X_test)

    # 6. Metrics
    acc = accuracy_score(y_test, y_pred)
    print(f"\nüèÜ Model Accuracy: {acc:.4f}")
    
    print("\nüìù Classification Report:")
    print(classification_report(y_test, y_pred))

    # 7. Confusion Matrix
    print("Generating Confusion Matrix...")
    if not os.path.exists(PLOT_DIR):
        os.makedirs(PLOT_DIR)

    plt.figure(figsize=(10, 8))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted CSAT Score')
    plt.ylabel('Actual CSAT Score')
    plt.title('Confusion Matrix: Actual vs Predicted')
    
    save_path = os.path.join(PLOT_DIR, "confusion_matrix.png")
    plt.savefig(save_path)
    print(f"‚úî Confusion Matrix saved to '{save_path}'")
    plt.close()

if __name__ == "__main__":
    evaluate_model()