In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight

# Set up directories

base_dir = 'C:\\Users\\Sadia Mahmud\\Documents\\Jupiter_p3\\paddy-disease-classification'
train_dir = os.path.join(base_dir, 'train_images')
test_dir = os.path.join(base_dir, 'test_images')  # Added test directory

# Set up image data generators with a validation split
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
validate_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(480, 640),
    batch_size=32,
    class_mode='categorical',
    subset='training')

validate_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(480, 640),
    batch_size=32,
    class_mode='categorical',
    subset='validation')

# Calculate class weights for imbalanced data
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes)
class_weights_dict = dict(enumerate(class_weights))

# Define a simple CNN model
model = Sequential()
model.add(Conv2D(40, (3, 3), input_shape=(480, 640, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(40, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(80, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(160))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))  # Adjust this number based on the number of classes
model.add(Activation('softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=7)

# Train the model with class weights
model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=100,
    validation_data=validate_generator,
    validation_steps=len(validate_generator),
    class_weight=class_weights_dict,
    callbacks=[early_stopping])

# Save the model
model_save_path = os.path.join(base_dir, 'plant_disease_model.h5')
model.save(model_save_path)

# Create a test data generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(480, 640),
    batch_size=32,
    class_mode=None,  # Since the test images are not labeled
    shuffle=False)


# Predict the test set
predictions = model.predict(test_generator, steps=len(test_generator))
predicted_classes = np.argmax(predictions, axis=1)

# Map predicted classes to their labels
labels = (train_generator.class_indices)
labels = dict((v, k) for k, v in labels.items())
predicted_labels = [labels[k] for k in predicted_classes]

# Retrieve filenames and combine with predicted labels
filenames = test_generator.filenames
results = pd.DataFrame({"Filename": filenames, "Predictions": predicted_labels})

# Save the results to a CSV file
results_csv_path = os.path.join(base_dir, 'test_predictions.csv')
results.to_csv(results_csv_path, index=False)

Found 8330 images belonging to 10 classes.
Found 2077 images belonging to 10 classes.


Epoch 1/100


  8/261 [..............................] - ETA: 3:55:41 - loss: 9.7968 - accuracy: 0.1055 