# FreshHarvest Model Evaluation

This notebook provides comprehensive evaluation of trained FreshHarvest models.

## Evaluation Overview
- Model loading and validation
- Performance metrics calculation
- Confusion matrix analysis
- ROC curve analysis
- Per-class performance evaluation
- Error analysis and insights

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append('../src')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Sklearn imports
from sklearn.metrics import (
    classification_report, confusion_matrix, roc_curve, auc,
    accuracy_score, precision_score, recall_score, f1_score
)
from sklearn.preprocessing import label_binarize

# Import custom modules
from cvProject_FreshHarvest.utils.common import read_yaml, setup_logging

# Setup
setup_logging()
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("FreshHarvest Model Evaluation Notebook")
print("=" * 45)
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

## 1. Configuration and Model Loading

In [None]:
# Load configuration
config = read_yaml('../config/config.yaml')
print("Configuration loaded:")
print(f"- Image size: {config['data']['image_size']}")
print(f"- Number of classes: {config['data']['num_classes']}")

# Define class names
CLASS_NAMES = [
    'F_Banana', 'F_Lemon', 'F_Lulo', 'F_Mango', 'F_Orange', 'F_Strawberry', 'F_Tamarillo', 'F_Tomato',
    'S_Banana', 'S_Lemon', 'S_Lulo', 'S_Mango', 'S_Orange', 'S_Strawberry', 'S_Tamarillo', 'S_Tomato'
]

print(f"\nClass names: {CLASS_NAMES}")

# Model paths to try
model_paths = [
    '../models/trained/best_model.h5',
    '../models/best_hypertuned_model.h5',
    '../models/checkpoints/best_model_20250618_100126.h5'
]

# Load trained model
model = None
for path in model_paths:
    if os.path.exists(path):
        try:
            model = keras.models.load_model(path)
            print(f"\n✅ Model loaded successfully from: {path}")
            print(f"Model input shape: {model.input_shape}")
            print(f"Model output shape: {model.output_shape}")
            print(f"Total parameters: {model.count_params():,}")
            break
        except Exception as e:
            print(f"❌ Failed to load {path}: {e}")
            continue

if model is None:
    print("\n⚠️ No trained model found. Please train a model first.")
    print("Creating dummy model for demonstration...")
    
    # Create a simple model for demonstration
    model = keras.Sequential([
        keras.layers.Input(shape=(224, 224, 3)),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(16, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## 2. Test Data Preparation

In [None]:
# Create test data generator
def create_test_generator():
    """Create test data generator."""
    
    test_datagen = ImageDataGenerator(rescale=1./255)
    
    test_generator = test_datagen.flow_from_directory(
        '../data/processed/test',
        target_size=tuple(config['data']['image_size']),
        batch_size=1,  # Use batch size 1 for detailed analysis
        class_mode='categorical',
        shuffle=False  # Important: don't shuffle for evaluation
    )
    
    return test_generator

# Create test generator
try:
    test_gen = create_test_generator()
    print(f"✅ Test generator created successfully")
    print(f"Test samples: {test_gen.samples}")
    print(f"Test classes: {list(test_gen.class_indices.keys())}")
    
    # Generate predictions
    print(f"\n🔮 Generating predictions...")
    test_gen.reset()
    predictions = model.predict(test_gen, verbose=1)
    
    # Get true labels and predicted labels
    y_true = test_gen.classes
    y_pred = np.argmax(predictions, axis=1)
    
    print(f"✅ Predictions generated")
    print(f"Predictions shape: {predictions.shape}")
    print(f"True labels shape: {y_true.shape}")
    print(f"Predicted labels shape: {y_pred.shape}")
    
except Exception as e:
    print(f"❌ Error with test data: {e}")
    print("Creating dummy test data for demonstration...")
    
    # Create dummy test data
    n_samples = 100
    predictions = np.random.random((n_samples, 16))
    predictions = predictions / predictions.sum(axis=1, keepdims=True)  # Normalize to probabilities
    y_true = np.random.randint(0, 16, n_samples)
    y_pred = np.argmax(predictions, axis=1)
    
    print(f"Using dummy data: {n_samples} samples")

## 3. Performance Metrics Calculation

In [None]:
# Calculate comprehensive performance metrics
accuracy = accuracy_score(y_true, y_pred)
precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)

precision_weighted = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall_weighted = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)

print("\n" + "="*60)
print("OVERALL PERFORMANCE METRICS")
print("="*60)
print(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"\nMacro Averages:")
print(f"  Precision: {precision_macro:.4f}")
print(f"  Recall: {recall_macro:.4f}")
print(f"  F1-Score: {f1_macro:.4f}")
print(f"\nWeighted Averages:")
print(f"  Precision: {precision_weighted:.4f}")
print(f"  Recall: {recall_weighted:.4f}")
print(f"  F1-Score: {f1_weighted:.4f}")

# Detailed classification report
print(f"\n📋 Detailed Classification Report:")
print("-" * 60)
report = classification_report(y_true, y_pred, target_names=CLASS_NAMES, zero_division=0)
print(report)

## 4. Confusion Matrix Analysis

In [None]:
# Create and visualize confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Plot confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Normalized confusion matrix
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(12, 10))
sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues',
            xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.title('Normalized Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Confusion matrix analysis
print(f"\n📊 Confusion Matrix Analysis:")
print(f"- Total samples: {cm.sum()}")
print(f"- Correct predictions: {np.trace(cm)}")
print(f"- Incorrect predictions: {cm.sum() - np.trace(cm)}")
print(f"- Error rate: {(cm.sum() - np.trace(cm)) / cm.sum():.4f}")