In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ludehsar/apple-disease-dataset")

print("Path to dataset files:", path)

In [None]:
import os

# This is the 'path' printed by your kagglehub command
base_path = '/root/.cache/kagglehub/datasets/ludehsar/apple-disease-dataset/versions/1/datasets'

# Map the subdirectories
train_dir = os.path.join(base_path, 'train')
test_dir = os.path.join(base_path, 'test')

print("Training directory:", train_dir)
print("Testing directory:", test_dir)

# Check if the folders exist
if os.path.exists(train_dir):
    print("Classes found:", os.listdir(train_dir))

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 1. Define Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.15,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 2. Setup Train Generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(300, 300),
    batch_size=16, # Smaller batch size helps CPU memory
    class_mode='categorical',
    color_mode='rgb'
)

# 3. Setup Test Generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(300, 300),
    batch_size=16,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False
)

In [None]:
import tensorflow as tf
import os

# 1. Force the environment variable before any other imports
os.environ['KERAS_BACKEND'] = 'tensorflow'

from tensorflow.keras import layers, models, applications

# 2. Clear session
tf.keras.backend.clear_session()

def build_fixed_b3():
    # Define the input layer strictly
    # This locks the architecture to 3 channels
    img_input = layers.Input(shape=(300, 300, 3))

    # Load the model with weights=None first!
    # This prevents the immediate shape mismatch crash
    base_model = applications.EfficientNetB3(
        include_top=False,
        weights=None,
        input_tensor=img_input
    )

    # Now, manually download and load the weights into this 3-channel structure
    weights_path = tf.keras.utils.get_file(
        'efficientnetb3_notop.h5',
        'https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5',
        cache_subdir='models'
    )

    # This manual load will usually bypass the automated check that is failing
    base_model.load_weights(weights_path)

    # Build the head for your 4 Apple Disease classes
    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dropout(0.3)(x)
    output = layers.Dense(4, activation='softmax')(x)

    return models.Model(img_input, output)

try:
    model = build_fixed_b3()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print("SUCCESS: Model architecture locked to 3 channels and weights loaded.")
    model.summary()
except Exception as e:
    print(f"STILL FAILING: {e}")

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# Add this to your existing callbacks list
# callbacks = [early_stop, reduce_lr, checkpoint_callback]
# Stop training if accuracy stops improving
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Lower learning rate when progress stalls
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
# This will save the best version of the model directly to your Drive during training
checkpoint_callback = ModelCheckpoint(
    filepath='/content/drive/MyDrive/Apple_Disease_Project/best_model_weights.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)
callbacks = [early_stop, checkpoint_callback, reduce_lr]

In [None]:
# Re-set Batch Size consistently
BATCH_SIZE = 16

# 1. Setup Train Generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(300, 300),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

# 2. Setup Validation/Test Generator
# IMPORTANT: Ensure 'test_dir' has the same subfolder structure as 'train_dir'
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(300, 300),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# 3. Double check class indices match
print("Train Classes:", train_generator.class_indices)
print("Validation Classes:", test_generator.class_indices)

In [None]:
# Reset the model weights if you want a clean start
tf.keras.backend.clear_session()
model = build_fixed_b3() # Use the function we wrote earlier
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Start training again
history = model.fit(
    train_generator,
    # REMOVED manually calculated steps to let Keras handle it
    epochs=20,
    validation_data=test_generator,
    callbacks=callbacks
)

In [None]:
# Ensure your drive is mounted first
# from google.colab import drive
# drive.mount('/content/drive')

# Save as .h5
save_path_h5 = '/content/drive/MyDrive/Apple_Disease_Project/apple_disease_model.h5'
model.save(save_path_h5)

print(f"Model successfully saved in Legacy H5 format at: {save_path_h5}")