# üåø LeafSense - Plant Disease Detection
## ResNet50 Transfer Learning on Kaggle

**Goal**: Train a plant disease classifier (38 classes, ‚â•95% accuracy, ‚â§90 min)

**Kaggle Setup** (REQUIRED):
1. **GPU**: Settings ‚Üí Accelerator ‚Üí **GPU T4 x2**
2. **Dataset**: Add Data ‚Üí Search **"New Plant Diseases Dataset"** ‚Üí `vipoooool/new-plant-diseases-dataset`
3. **Internet**: ON (for packages)

**Outputs**: `LeafSense_ResNet50.h5`, `LeafSense_Model.tflite`, `class_indices.json`

## 1Ô∏è‚É£ Setup & Configuration

In [None]:
# Core imports
import os, random, json
import numpy as np
import tensorflow as tf
from pathlib import Path
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight

# Set seeds for reproducibility
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# GPU Configuration
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    # Enable mixed precision for faster training
    tf.keras.mixed_precision.set_global_policy('mixed_float16')
    print(f"‚úÖ GPU: {len(gpus)} device(s) | Mixed Precision: ON")
else:
    print("‚ö†Ô∏è  No GPU! Enable GPU in Settings ‚Üí Accelerator ‚Üí GPU T4")

print(f"TensorFlow: {tf.__version__} | Keras: {tf.keras.__version__}")

## 2Ô∏è‚É£ Dataset & Hyperparameters

In [None]:
# Configuration
CONFIG = {
    'IMG_SIZE': (224, 224),
    'BATCH_SIZE': 32,
    'PHASE1_EPOCHS': 10,  # Frozen base
    'PHASE2_EPOCHS': 10,  # Fine-tuning
    'INITIAL_LR': 1e-3,
    'FINETUNE_LR': 1e-5,
    'DROPOUT': 0.3,
    'DENSE_UNITS': 512,
}

# Auto-detect environment (Kaggle vs Local)
if Path('/kaggle/input').exists():
    # Running on Kaggle - check which dataset is added
    kaggle_base = Path('/kaggle/input')
    
    # Try vipoooool dataset first (has train/valid split)
    if (kaggle_base / 'new-plant-diseases-dataset').exists():
        BASE_PATH = kaggle_base / 'new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)'
        OUTPUT_DIR = Path('/kaggle/working')
        print("üåê Environment: Kaggle (vipoooool dataset)")
    # Try abdallahalidev dataset (single color folder)
    elif (kaggle_base / 'plantvillage-dataset').exists():
        BASE_PATH = kaggle_base / 'plantvillage-dataset'
        OUTPUT_DIR = Path('/kaggle/working')
        print("üåê Environment: Kaggle (abdallahalidev dataset - no train/valid split)")
    else:
        raise FileNotFoundError(
            "‚ùå No PlantVillage dataset found!\n"
            "Add either:\n"
            "  - vipoooool/new-plant-diseases-dataset OR\n"
            "  - abdallahalidev/plantvillage-dataset"
        )
else:
    # Running locally - download dataset using kagglehub
    print("üíª Environment: Local")
    import kagglehub
    
    OUTPUT_DIR = Path('./output')
    OUTPUT_DIR.mkdir(exist_ok=True)
    
    # Download dataset (cached after first download)
    print("üì• Downloading PlantVillage dataset...")
    dataset_path = kagglehub.dataset_download("abdallahalidev/plantvillage-dataset")
    print(f"‚úÖ Dataset downloaded to: {dataset_path}")
    
    BASE_PATH = Path(dataset_path)

# Auto-detect folder structure
TRAIN_DIR = BASE_PATH / 'train'
VAL_DIR = BASE_PATH / 'valid'

# If train/valid don't exist, use 'color' folder (abdallahalidev dataset)
if not TRAIN_DIR.exists():
    COLOR_DIR = BASE_PATH / 'color'
    if COLOR_DIR.exists():
        print("‚ö†Ô∏è  No train/valid split found. Using 'color' folder.")
        print("   Will create 80/20 split from color folder...")
        # Use color folder as single source - we'll split it later in ImageDataGenerator
        TRAIN_DIR = COLOR_DIR
        VAL_DIR = COLOR_DIR  # Will use validation_split parameter
        USE_SPLIT = True
    else:
        raise FileNotFoundError(
            f"‚ùå Dataset structure not recognized!\n"
            f"Expected: train/ and valid/ folders OR color/ folder\n"
            f"Found in {BASE_PATH}: {list(BASE_PATH.iterdir()) if BASE_PATH.exists() else 'Path does not exist'}"
        )
else:
    USE_SPLIT = False

print(f"‚úÖ Dataset: {BASE_PATH}")
print(f"‚úÖ Train: {TRAIN_DIR}")
print(f"‚úÖ Validation: {VAL_DIR}")
print(f"‚úÖ Output: {OUTPUT_DIR}")
print(f"üìä Config: {CONFIG['BATCH_SIZE']} batch | {CONFIG['PHASE1_EPOCHS']+CONFIG['PHASE2_EPOCHS']} epochs")

## 3Ô∏è‚É£ Data Generators

In [None]:
# Data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=CONFIG['IMG_SIZE'], batch_size=CONFIG['BATCH_SIZE'],
    class_mode='categorical', shuffle=True, seed=SEED
)

val_gen = val_datagen.flow_from_directory(
    VAL_DIR, target_size=CONFIG['IMG_SIZE'], batch_size=CONFIG['BATCH_SIZE'],
    class_mode='categorical', shuffle=False
)

NUM_CLASSES = len(train_gen.class_indices)

# Save class indices
with open(OUTPUT_DIR / 'class_indices.json', 'w') as f:
    json.dump(train_gen.class_indices, f, indent=2)

# Compute class weights for imbalance
class_weights = dict(enumerate(compute_class_weight(
    'balanced', classes=np.unique(train_gen.classes), y=train_gen.classes
)))

print(f"‚úÖ Data loaded: {NUM_CLASSES} classes | {train_gen.samples:,} train | {val_gen.samples:,} val")

## 4Ô∏è‚É£ Build ResNet50 Model

In [None]:
# Build model with ResNet50 base
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze initially

# Custom classification head
inputs = keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(CONFIG['DENSE_UNITS'], activation='relu')(x)
x = layers.Dropout(CONFIG['DROPOUT'])(x)
x = layers.BatchNormalization()(x)
outputs = layers.Dense(NUM_CLASSES, activation='softmax', dtype='float32')(x)

model = Model(inputs, outputs, name='LeafSense_ResNet50')

print(f"‚úÖ Model: {model.count_params():,} params | Base frozen: {not base_model.trainable}")

## 5Ô∏è‚É£ Phase 1: Train with Frozen Base (10 epochs)

In [None]:
# Compile
model.compile(
    optimizer=Adam(CONFIG['INITIAL_LR']),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
callbacks = [
    ModelCheckpoint(OUTPUT_DIR / 'best_model.h5', monitor='val_accuracy', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
]

print(f"? Phase 1: Training with frozen base ({CONFIG['PHASE1_EPOCHS']} epochs)...")

# Train Phase 1
history1 = model.fit(
    train_gen,
    epochs=CONFIG['PHASE1_EPOCHS'],
    validation_data=val_gen,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

print(f"‚úÖ Phase 1 complete: Val Acc = {max(history1.history['val_accuracy']):.4f}")

## 6Ô∏è‚É£ Phase 2: Fine-Tune (Unfreeze last 20 layers, 10 epochs)

In [None]:
# Unfreeze last 20 layers
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=Adam(CONFIG['FINETUNE_LR']),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(f"üöÄ Phase 2: Fine-tuning ({CONFIG['PHASE2_EPOCHS']} epochs, unfrozen=20 layers)...")

# Train Phase 2
history2 = model.fit(
    train_gen,
    epochs=CONFIG['PHASE2_EPOCHS'],
    validation_data=val_gen,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

print(f"‚úÖ Phase 2 complete: Val Acc = {max(history2.history['val_accuracy']):.4f}")

## 7Ô∏è‚É£ Evaluate Model

In [None]:
# Evaluate
val_loss, val_acc = model.evaluate(val_gen, verbose=0)
print(f"üìä Final Validation Accuracy: {val_acc*100:.2f}%")
print(f"üìä Final Validation Loss: {val_loss:.4f}")

## 8Ô∏è‚É£ Save Models (Keras .h5 + TensorFlow Lite)

In [None]:
# Save Keras .h5 model
h5_path = OUTPUT_DIR / 'LeafSense_ResNet50.h5'
model.save(str(h5_path))
h5_size_mb = h5_path.stat().st_size / (1024 * 1024)

print(f"‚úÖ Keras Model saved: {h5_path.name} ({h5_size_mb:.2f} MB)")

# Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

tflite_path = OUTPUT_DIR / 'LeafSense_Model.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_model)

tflite_size_mb = tflite_path.stat().st_size / (1024 * 1024)
print(f"‚úÖ TFLite Model saved: {tflite_path.name} ({tflite_size_mb:.2f} MB)")
print(f"\nüì• Download from Kaggle Output tab ‚Üí Both models + class_indices.json")

## 9Ô∏è‚É£ How to Use Models Locally

### Load Keras Model (.h5)
```python
import tensorflow as tf
import numpy as np
from PIL import Image
import json

# Load model
model = tf.keras.models.load_model('LeafSense_ResNet50.h5')

# Load class mapping
with open('class_indices.json', 'r') as f:
    class_indices = json.load(f)
idx_to_class = {v: k for k, v in class_indices.items()}

# Predict
def predict_disease(image_path):
    img = Image.open(image_path).convert('RGB').resize((224, 224))
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    
    predictions = model.predict(img_array)
    predicted_idx = np.argmax(predictions[0])
    confidence = predictions[0][predicted_idx]
    disease_name = idx_to_class[predicted_idx]
    
    return disease_name, confidence

# Example
disease, conf = predict_disease('leaf_image.jpg')
print(f"Disease: {disease} ({conf*100:.2f}%)")
```

### Load TFLite Model (.tflite) - Faster!
```python
import tensorflow as tf

# Load TFLite
interpreter = tf.lite.Interpreter(model_path='LeafSense_Model.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Predict
def predict_tflite(img_array):
    interpreter.set_tensor(input_details[0]['index'], img_array)
    interpreter.invoke()
    return interpreter.get_tensor(output_details[0]['index'])
```

**‚úÖ Both models work on ANY machine with TensorFlow installed!**