In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.mixed_precision import set_global_policy

set_global_policy('mixed_float16')
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
        
data_dir = 'D:/Major Project/Dataset/Masked_Dataset'
image_paths = []
labels = []
classes = sorted(os.listdir(data_dir))

for class_name in classes:
    class_dir = os.path.join(data_dir, class_name)
    for img_name in os.listdir(class_dir):
        if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(class_dir, img_name)
            image_paths.append(img_path)
            labels.append(class_name)

df = pd.DataFrame({'image_path': image_paths, 'label': labels})
train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

print(f"Total samples: {len(df)}")
print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")
print(f"Number of classes: {len(classes)}")

batch_size = 64
target_size = (224, 224)
num_classes = len(classes)
num_workers = 4

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='label',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    workers=num_workers,
    use_multiprocessing=True
)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='image_path',
    y_col='label',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    workers=num_workers,
    use_multiprocessing=True
)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='image_path',
    y_col='label',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    workers=num_workers,
    use_multiprocessing=True
)

def generator_to_tfdata(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, 224, 224, 3], [None, num_classes])
    )
    return dataset.prefetch(tf.data.AUTOTUNE)

train_dataset = generator_to_tfdata(train_generator)
val_dataset = generator_to_tfdata(val_generator)
test_dataset = generator_to_tfdata(test_generator)

steps_per_epoch = len(train_df) // batch_size
validation_steps = len(val_df) // batch_size
print(f"Steps per epoch: {steps_per_epoch}")
print(f"Validation steps: {validation_steps}")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3050 Laptop GPU, compute capability 8.6
Total samples: 73000
Training samples: 58400
Validation samples: 7300
Test samples: 7300
Number of classes: 73
Found 58400 validated image filenames belonging to 73 classes.
Found 7300 validated image filenames belonging to 73 classes.
Found 7300 validated image filenames belonging to 73 classes.
Steps per epoch: 912
Validation steps: 114


In [4]:
strategy = tf.distribute.MultiWorkerMirroredStrategy()
with strategy.scope():
    def build_model(num_classes):
        base_model = EfficientNetB0(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
        for layer in base_model.layers:
            layer.trainable = False
        x = base_model.output
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(128, activation='relu')(x)
        predictions = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
        model = models.Model(inputs=base_model.input, outputs=predictions)
        return model, base_model

    model, base_model = build_model(num_classes)
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

print("Initial Model (Base Layers Frozen):")
print(f"Total parameters: {model.count_params()}")
print(f"Trainable parameters: {sum([w.shape.num_elements() for w in model.trainable_weights])}")
print(f"Non-trainable parameters: {model.count_params() - sum([w.shape.num_elements() for w in model.trainable_weights])}")

early_stopping_initial = EarlyStopping(monitor='val_accuracy', patience=5, mode='max', restore_best_weights=True)
model_checkpoint_initial = ModelCheckpoint(
    'D:/Major Project/efficientnet/best_initial_efficientnetb0_weights.h5',
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    save_weights_only=True
)

history_initial = model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=50,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    callbacks=[early_stopping_initial, model_checkpoint_initial],
    workers=num_workers,
    use_multiprocessing=True,
    verbose=1
)
history_df = pd.DataFrame(history_initial.history)
history_df.to_csv('D:/Major Project/efficientnet/training_history_initial_efficientnetb0.csv', index=False)
model.save_weights('D:/Major Project/efficientnet/initial_weights_efficientnetb0.h5')
print("Saved initial weights to 'initial_weights_efficientnetb0.h5'")


INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:GPU:0',), communication = CommunicationImplementation.AUTO
Initial Model (Base Layers Frozen):
Total parameters: 4222956
Trainable parameters: 173385
Non-trainable parameters: 4049571
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Saved initial weights to 'initial_weights_efficientnetb0.h5'


In [9]:
# Fine Tuning
def build_model(num_classes):
    base_model =  EfficientNetB0(input_shape=(224, 224, 3), include_top=False, weights=None)
    for layer in base_model.layers:
        layer.trainable = False
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    predictions = layers.Dense(num_classes, activation='softmax', dtype='float32')(x)
    model = models.Model(inputs=base_model.input, outputs=predictions)
    return model, base_model

model, base_model = build_model(num_classes)
model.load_weights('D:/Major Project/efficientnet/initial_weights_efficientnetb0.h5')

for layer in base_model.layers[-17:]:
    layer.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("Fine-Tuned Model (Last 17 Layers Unfrozen):")
print(f"Total parameters: {model.count_params()}")
print(f"Trainable parameters: {sum([w.shape.num_elements() for w in model.trainable_weights])}")
print(f"Non-trainable parameters: {model.count_params() - sum([w.shape.num_elements() for w in model.trainable_weights])}")

early_stopping_fine = EarlyStopping(monitor='val_accuracy', patience=3, mode='max', restore_best_weights=True)
model_checkpoint_fine = ModelCheckpoint(
    'D:/Major Project/efficientnet/best_fine_tuned_vgg16.keras',
    monitor='val_accuracy',
    mode='max',
    save_best_only=True
)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

history_fine = model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=30,
    validation_data=val_dataset,
    validation_steps=validation_steps,
    callbacks=[early_stopping_fine, model_checkpoint_fine, reduce_lr],
    workers=num_workers,
    use_multiprocessing=True,
    verbose=1
)

history_fine_df = pd.DataFrame(history_fine.history)
history_fine_df.to_csv('D:/Major Project/efficientnet/training_history_fine_vgg16.csv', index=False)

Fine-Tuned Model (Last 17 Layers Unfrozen):
Total parameters: 4222956
Trainable parameters: 1302777
Non-trainable parameters: 2920179
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [10]:
model.save_weights('D:/Major Project/efficientnet/all_weights.keras')
model.save('D:/Major Project/efficientnet/final_model.keras')

In [11]:
# Evaluate model on test set
test_steps = len(test_df) // batch_size
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_steps)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.1039, Test Accuracy: 0.9637


In [22]:
import random
import seaborn as sns
test_generator.reset()
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_steps, verbose=1)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

y_pred = model.predict(test_generator, steps=test_steps, verbose=1)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_generator.classes[:len(y_pred_classes)]
if len(y_true) != len(y_pred_classes):
    print("Warning: Mismatch in lengths. Truncating y_true to match y_pred_classes.")
    y_true = y_true[:len(y_pred_classes)]
y_true = np.array(y_true)
y_pred_classes = np.array(y_pred_classes)
class_labels = ['Apple__Apple_scab', 'Apple_Black_rot', 'Apple_Cedar_apple_rust', 'Apple_healthy', 'BittergourdDowny_Mildew', 'BittergourdHealthy', 'BittergourdJassid', 
    'BittergourdLeafSpot', 'BittergourdNitrogen_Deficiency', 'BittergourdNitrogen_and_Magnesium_Deficiency', 'BittergourdNitrogen_and_Potassium_Deficiency', 'BittergourdPotassium_Deficiency', 
     'BittergourdPotassium_and_Magnesium_Deficiency', 'Blueberry_healthy', 'Cherry(including_sour)__Powdery_mildew', 'Cherry(including_sour)__healthy', 
     'Corn(maize)__Cercospora_leaf_spot Gray_leaf_spot', 'Corn(maize)__Common_rust', 'Corn_(maize)__Northern_Leaf_Blight', 'Corn(maize)__healthy', 'EggplantAphids', 
     'EggplantCercosporaLeafSpot', 'EggplantFleaBeetles', 'EggplantHealthy', 'EggplantLeafWilt', 'EggplantPhytophthoraBlight', 'EggplantPowderyMildew', 'EggplantTobaccoMosaicVirus',
     'Grape_Black_rot', 'Grape_Esca(Black_Measles)', 'Grape__Leaf_blight(Isariopsis_Leaf_Spot)', 'Grape__healthy', 'LettuceBacterial', 'LettuceFungal', 'LettuceHealthy', 
     'Orange_Haunglongbing(Citrus_greening)', 'Papaya_Anthracanose_Diease', 'Papaya_Black_Spot_Diease', 'Papaya_Healthy', 'Papaya_Powdery_Mildery_Diease', 'Papaya_Ring_spot_Diease',
     'Papaya_phytophthora_Disease', 'Peach__Bacterial_spot', 'Peach_healthy', 'Pepper,_bell_Bacterial_spot', 'Pepper,_bell_healthy', 'PigeonpeaHealthy', 'PigeonpeaLeafSpot', 
     'PigeonpeaLeafwebber', 'PigeonpeaSterilicmosaic', 'Potato_Early_blight', 'Potato_Late_blight', 'Potato_healthy', 'Raspberry_healthy', 'Soybean_healthy', 'Squash_Powdery_mildew',
     'Strawberry_Leaf_scorch', 'Strawberry_healthy', 'SweetPumpkinDownyMildewDisease', 'SweetPumpkinHealthy', 'SweetPumpkinLeafCurlDisease', 'SweetPumpkinMosaicDisease', 
     'SweetPumpkinRedBeetle', 'Tomato_Bacterial_spot', 'Tomato_Early_blight', 'Tomato_Late_blight', 'Tomato_Leaf_Mold', 'Tomato_Septoria_leaf_spot',
     'Tomato_Spider_mites Two-spotted_spider_mite', 'Tomato_Target_Spot', 'Tomato_Tomato_Yellow_Leaf_Curl_Virus', 'Tomato_Tomato_mosaic_virus', 'Tomato__healthy']
num_classes_to_show = 20
if len(class_labels) < num_classes_to_show:
    num_classes_to_show = len(class_labels)

random_classes = random.sample(class_labels, num_classes_to_show)
random_class_indices = [class_labels.index(cls) for cls in random_classes]
mask_true = np.isin(y_true, random_class_indices)

filtered_y_true_temp = y_true[mask_true]
filtered_y_pred_temp = y_pred_classes[mask_true]
mask_pred = np.isin(filtered_y_pred_temp, random_class_indices)

filtered_y_true = filtered_y_true_temp[mask_pred]
filtered_y_pred_classes = filtered_y_pred_temp[mask_pred]

label_mapping = {idx: i for i, idx in enumerate(random_class_indices)}
filtered_y_true_mapped = np.array([label_mapping[label] for label in filtered_y_true])
filtered_y_pred_mapped = np.array([label_mapping[label] for label in filtered_y_pred_classes])

cm = confusion_matrix(filtered_y_true_mapped, filtered_y_pred_mapped)

plt.figure(figsize=(10, 10)) 
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=random_classes, yticklabels=random_classes,
            cbar=False, linewidths=0.3, square=True)
plt.xticks(rotation=90, fontsize=6)
plt.yticks(fontsize=6)
plt.xlabel('Predicted Labels', fontsize=8)
plt.ylabel('True Labels', fontsize=8)
plt.title('Confusion Matrix', fontsize=12)
plt.tight_layout()

plt.savefig('D:/Major Project/efficientnet/confusion_matrix_random_subset.png', dpi=150, bbox_inches='tight')
plt.close()
print("Confusion matrix with random subset of classes saved to 'D:/Major Project/efficientnet/confusion_matrix_random_subset.png'")

Test Loss: 0.1039, Test Accuracy: 0.9637
Confusion matrix with random subset of classes saved to 'D:/Major Project/efficientnet/confusion_matrix_random_subset.png'


In [24]:
class_labels = list(test_generator.class_indices.keys())
report = classification_report(y_true, y_pred_classes, target_names=class_labels, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.to_csv('D:/Major Project/efficientnet/classification_report.csv')
print("Classification report saved to 'D:/Major Project/efficientnet/classification_report.csv'")
print("\nClassification Report:")
print(report_df)

predictions_df = pd.DataFrame({
    'True_Label': [class_labels[i] for i in y_true],
    'Predicted_Label': [class_labels[i] for i in y_pred_classes],
    'Confidence': np.max(y_pred, axis=1)
})
predictions_df.to_csv('D:/Major Project/efficientnet/test_predictions.csv', index=False)
print("Test predictions saved to 'D:/Major Project/efficientnet/test_predictions.csv'")

Classification report saved to 'D:/Major Project/efficientnet/classification_report.csv'

Classification Report:
                              precision    recall  f1-score      support
Apple___Apple_scab             0.980000  0.980000  0.980000   100.000000
Apple___Black_rot              1.000000  0.990000  0.994975   100.000000
Apple___Cedar_apple_rust       1.000000  1.000000  1.000000   100.000000
Apple___healthy                0.980000  0.989899  0.984925    99.000000
Bittergourd__Downy_Mildew      0.979381  0.950000  0.964467   100.000000
...                                 ...       ...       ...          ...
Tomato___Tomato_mosaic_virus   0.989691  0.960000  0.974619   100.000000
Tomato___healthy               0.931373  0.950000  0.940594   100.000000
accuracy                       0.963679  0.963679  0.963679     0.963679
macro avg                      0.965489  0.963688  0.963625  7296.000000
weighted avg                   0.965516  0.963679  0.963636  7296.000000

[76 rows x

In [26]:
try:
    history_initial_df = pd.read_csv('D:/Major Project/efficientnet/training_history_initial_efficientnetb0.csv')
    history_fine_df = pd.read_csv('D:/Major Project/efficientnet/training_history_fine_efficientnetb0.csv')
    print("Loaded training history from 'training_history_initial_efficientnetb0.csv' and 'training_history_fine_efficientnetb0.csv'")
except Exception as e:
    print(f"Error loading history CSVs: {e}")
    print("Please ensure 'training_history_initial_efficientnetb0.csv' and 'training_history_fine_efficientnetb0.csv' exist in 'D:/Major Project/efficientnet/'")
    exit(1)

required_columns = ['accuracy', 'val_accuracy', 'loss', 'val_loss']
if not all(col in history_initial_df.columns for col in required_columns) or \
   not all(col in history_fine_df.columns for col in required_columns):
    print("Error: CSV files must contain columns: 'accuracy', 'val_accuracy', 'loss', 'val_loss'")
    exit(1)

history_initial = {
    'accuracy': history_initial_df['accuracy'].tolist(),
    'val_accuracy': history_initial_df['val_accuracy'].tolist(),
    'loss': history_initial_df['loss'].tolist(),
    'val_loss': history_initial_df['val_loss'].tolist()
}
history_fine = {
    'accuracy': history_fine_df['accuracy'].tolist(),
    'val_accuracy': history_fine_df['val_accuracy'].tolist(),
    'loss': history_fine_df['loss'].tolist(),
    'val_loss': history_fine_df['val_loss'].tolist()
}
print(f"Initial training epochs: {len(history_initial['accuracy'])}")
print(f"Fine-tuning epochs: {len(history_fine['accuracy'])}")
print(f"Total epochs: {len(history_initial['accuracy']) + len(history_fine['accuracy'])}")

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history_initial['accuracy'] + history_fine['accuracy'], label='Train Accuracy')
plt.plot(history_initial['val_accuracy'] + history_fine['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_initial['loss'] + history_fine['loss'], label='Train Loss')
plt.plot(history_initial['val_loss'] + history_fine['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig('D:/Major Project/efficientnet/training_history.png')
plt.close()
print("Training history plot saved to 'D:/Major Project/efficientnet/training_history.png'")

Error loading history CSVs: [Errno 2] No such file or directory: 'D:/Major Project/efficientnet/training_history_fine_efficientnetb0.csv'
Please ensure 'training_history_initial_efficientnetb0.csv' and 'training_history_fine_efficientnetb0.csv' exist in 'D:/Major Project/efficientnet/'
Initial training epochs: 29
Fine-tuning epochs: 30
Total epochs: 59
Training history plot saved to 'D:/Major Project/efficientnet/training_history.png'
