<a href="https://colab.research.google.com/github/achmadraja/cassavaleafdiseaseclassificaion/blob/master/casssavaleafdiseaseclassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential

# Load the training data
train_df = pd.read_csv('train.csv')

# Set the path to the train and test images folder
train_path = 'train_images/'
test_path = 'test_images/'

# Split the dataset into training and validation sets
train_data, valid_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Set the batch size and image size
batch_size = 32
img_size = 224

In [14]:
# Create an image data generator with data augmentation for the training set
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

# Create an image data generator for the validation and test sets
val_datagen = ImageDataGenerator(rescale=1./255)

# Generate the training, validation, and test sets
train_generator = train_datagen.flow_from_dataframe(train_data, 
                                                    directory=train_path, 
                                                    x_col='image_id', 
                                                    y_col='label', 
                                                    target_size=(img_size, img_size), 
                                                    batch_size=batch_size, 
                                                    class_mode='raw')

valid_generator = val_datagen.flow_from_dataframe(valid_data, 
                                                  directory=train_path, 
                                                  x_col='image_id', 
                                                  y_col='label', 
                                                  target_size=(img_size, img_size), 
                                                  batch_size=batch_size, 
                                                  class_mode='raw')

test_generator = val_datagen.flow_from_directory(test_path,
                                                  target_size=(img_size, img_size),
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode=None)

Found 850 validated image filenames.
Found 205 validated image filenames.
Found 0 images belonging to 0 classes.




In [15]:
# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size, img_size, 3)),
    MaxPooling2D((2, 2)),
    Dropout(0.2),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.2),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.2),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
# Set the number of epochs
epochs = 20

# Train the model
history = model.fit(train_generator,
                    epochs=epochs,
                    validation_data=valid_generator)

Epoch 1/20


ValueError: ignored

In [None]:
# Evaluate the model with the validation set
val_loss, val_acc = model.evaluate(valid_generator)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_acc)

# Plot the training and validation accuracy and loss
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,max(plt.ylim())])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# Make predictions on the testing set
preds = model.predict(test_generator)
predictions = np.argmax(preds, axis=1)

# Create a submission file
submission = pd.DataFrame({'image_id': test_generator.filenames, 'label': predictions})
submission['image_id'] = submission['image_id'].apply(lambda x: x.split('/')[-1])
submission.to_csv('submission.csv', index=False)