# FreshHarvest Complete Pipeline

This notebook demonstrates the complete end-to-end pipeline for the FreshHarvest fruit freshness classification system.

## Pipeline Overview
- Data loading and preprocessing
- Model training and validation
- Model evaluation and testing
- Inference and prediction
- Performance monitoring
- Complete workflow demonstration

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Add src to path
sys.path.append('../src')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Import custom modules
from cvProject_FreshHarvest.utils.common import read_yaml, setup_logging
from cvProject_FreshHarvest.models.cnn_models import FreshHarvestCNN
from cvProject_FreshHarvest.data.data_loader import DataLoader

# Setup
setup_logging()
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("FreshHarvest Complete Pipeline Notebook")
print("=" * 50)
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 1. Configuration and Setup

In [None]:
# Load configuration
config = read_yaml('../config/config.yaml')
print("📋 Configuration loaded:")
print(f"- Image size: {config['data']['image_size']}")
print(f"- Number of classes: {config['data']['num_classes']}")
print(f"- Batch size: {config['training']['batch_size']}")
print(f"- Learning rate: {config['training']['learning_rate']}")
print(f"- Epochs: {config['training']['epochs']}")

# Define class names
CLASS_NAMES = [
    'F_Banana', 'F_Lemon', 'F_Lulo', 'F_Mango', 'F_Orange', 'F_Strawberry', 'F_Tamarillo', 'F_Tomato',
    'S_Banana', 'S_Lemon', 'S_Lulo', 'S_Mango', 'S_Orange', 'S_Strawberry', 'S_Tamarillo', 'S_Tomato'
]

print(f"\n🍎 Classes: {len(CLASS_NAMES)} total")
print(f"Fresh fruits: {[c for c in CLASS_NAMES if c.startswith('F_')]}")
print(f"Spoiled fruits: {[c for c in CLASS_NAMES if c.startswith('S_')]}")

## 2. Data Pipeline Setup

In [None]:
# Initialize data loader
try:
    data_loader = DataLoader('../config/config.yaml')
    print("✅ DataLoader initialized successfully")
    
    # Create data generators
    train_gen, val_gen, test_gen = data_loader.create_generators()
    
    print(f"\n📊 Data Statistics:")
    print(f"- Training samples: {train_gen.samples if train_gen else 'N/A'}")
    print(f"- Validation samples: {val_gen.samples if val_gen else 'N/A'}")
    print(f"- Test samples: {test_gen.samples if test_gen else 'N/A'}")
    
    if train_gen:
        print(f"- Classes found: {list(train_gen.class_indices.keys())}")
        
except Exception as e:
    print(f"❌ Error with DataLoader: {e}")
    print("Creating manual data generators...")
    
    # Manual data generator creation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        brightness_range=[0.8, 1.2]
    )
    
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    # Try to create generators
    train_gen = val_gen = test_gen = None
    
    data_paths = ['../data/processed/train', '../data/processed/val', '../data/processed/test']
    for path in data_paths:
        if os.path.exists(path):
            try:
                if 'train' in path:
                    train_gen = train_datagen.flow_from_directory(
                        path, target_size=(224, 224), batch_size=32, class_mode='categorical'
                    )
                else:
                    gen = val_datagen.flow_from_directory(
                        path, target_size=(224, 224), batch_size=32, class_mode='categorical'
                    )
                    if 'val' in path:
                        val_gen = gen
                    else:
                        test_gen = gen
            except Exception as e:
                print(f"Could not create generator for {path}: {e}")
    
    print(f"Manual generators created: train={train_gen is not None}, val={val_gen is not None}, test={test_gen is not None}")

## 3. Model Architecture

In [None]:
# Initialize CNN model builder
try:
    cnn_builder = FreshHarvestCNN('../config/config.yaml')
    print("✅ CNN Builder initialized successfully")
    
    # Create lightweight model for demonstration
    model = cnn_builder.create_lightweight_cnn()
    print(f"✅ Lightweight CNN model created")
    
except Exception as e:
    print(f"❌ Error with CNN Builder: {e}")
    print("Creating simple model manually...")
    
    # Create simple model manually
    model = keras.Sequential([
        keras.layers.Input(shape=(224, 224, 3)),
        keras.layers.Conv2D(32, 3, activation='relu', padding='same'),
        keras.layers.MaxPooling2D(2),
        keras.layers.Conv2D(64, 3, activation='relu', padding='same'),
        keras.layers.MaxPooling2D(2),
        keras.layers.Conv2D(128, 3, activation='relu', padding='same'),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(16, activation='softmax')
    ])

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

print(f"\n🏗️ Model Architecture:")
print(f"- Total parameters: {model.count_params():,}")
print(f"- Trainable parameters: {sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]):,}")
print(f"- Input shape: {model.input_shape}")
print(f"- Output shape: {model.output_shape}")

# Display model summary
model.summary()

## 4. Training Demonstration

In [None]:
# Training demonstration (short training for demo purposes)
print("🚀 Starting training demonstration...")

# Setup callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=1e-7,
        verbose=1
    )
]

# Training parameters for demonstration
DEMO_EPOCHS = 3  # Short training for demo
STEPS_PER_EPOCH = 10  # Limited steps for demo
VALIDATION_STEPS = 5  # Limited validation steps

if train_gen and val_gen:
    print(f"Training with real data for {DEMO_EPOCHS} epochs...")
    
    try:
        history = model.fit(
            train_gen,
            steps_per_epoch=min(STEPS_PER_EPOCH, train_gen.samples // train_gen.batch_size),
            epochs=DEMO_EPOCHS,
            validation_data=val_gen,
            validation_steps=min(VALIDATION_STEPS, val_gen.samples // val_gen.batch_size),
            callbacks=callbacks,
            verbose=1
        )
        
        print("✅ Training completed successfully!")
        
    except Exception as e:
        print(f"❌ Training error: {e}")
        history = None
        
else:
    print("⚠️ No data generators available. Creating dummy training history...")
    
    # Create dummy training history for demonstration
    history = type('History', (), {
        'history': {
            'loss': [1.5, 1.2, 1.0],
            'accuracy': [0.4, 0.6, 0.7],
            'val_loss': [1.6, 1.3, 1.1],
            'val_accuracy': [0.35, 0.55, 0.65]
        }
    })()
    
    print("✅ Dummy training history created for demonstration")

# Plot training history
if history and hasattr(history, 'history'):
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    
    # Plot loss
    axes[0].plot(history.history['loss'], label='Training Loss')
    if 'val_loss' in history.history:
        axes[0].plot(history.history['val_loss'], label='Validation Loss')
    axes[0].set_title('Model Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Plot accuracy
    axes[1].plot(history.history['accuracy'], label='Training Accuracy')
    if 'val_accuracy' in history.history:
        axes[1].plot(history.history['val_accuracy'], label='Validation Accuracy')
    axes[1].set_title('Model Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\n📈 Final Training Results:")
    print(f"- Final training accuracy: {history.history['accuracy'][-1]:.4f}")
    if 'val_accuracy' in history.history:
        print(f"- Final validation accuracy: {history.history['val_accuracy'][-1]:.4f}")