In [18]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow.keras as keras
import json
import os
from zipfile import ZipFile

In [19]:
print(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1
True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [26]:
!pip install kaggle



In [27]:
kaggle_credentials = json.load(open("kaggle.json"))

In [28]:
#kaggle API credentials
os.environ['KAGGLE_USERNAME'] = kaggle_credentials["username"]
os.environ['KAGGLE_KEY'] = kaggle_credentials["key"]

In [30]:
#downloads dataset from kaggle
!kaggle datasets download -d vipoooool/new-plant-diseases-dataset

Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
License(s): copyright-authors
new-plant-diseases-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [31]:
#unzip dataset
with ZipFile("new-plant-diseases-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall()

KeyboardInterrupt: 

<H4>Data Preprocessing
<H5> Training set preprocessing

In [35]:
training_set = keras.utils.image_dataset_from_directory(
    'New Plant Diseases Dataset(Augmented)/train',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
)

Found 70295 files belonging to 38 classes.


<H4> Validation set processing 

In [None]:
for x,y in training_set:
    print(x,x.shape)
    print(y,y.shape)
    break

In [36]:
validation_set = keras.utils.image_dataset_from_directory(
    'New Plant Diseases Dataset(Augmented)/valid',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
)

Found 17572 files belonging to 38 classes.


<h4> Building convolutional neural network

In [None]:
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.models import Sequential

In [None]:
#definition of the model
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation = 'relu', input_shape=[256, 256, 3]))
model.add(Conv2D(32, (3, 3), activation = 'relu'))
model.add(MaxPool2D(pool_size=2, strides=2))

model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation = 'relu'))
model.add(Conv2D(64,(3, 3), activation='relu'))
model.add(MaxPool2D(2, 2))

model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation = 'relu'))
model.add(Conv2D(128,(3, 3), activation='relu'))
model.add(MaxPool2D(2, 2))

model.add(Conv2D(filters=256, kernel_size=3, padding='same',activation = 'relu'))
model.add(Conv2D(256,(3, 3), activation='relu'))
model.add(MaxPool2D(2, 2))

model.add(Conv2D(filters=512, kernel_size=3, activation = 'relu'))
model.add(Conv2D(512,(3, 3), activation='relu'))
model.add(MaxPool2D(2, 2))

model.add(Dropout(0.25))

#flattens to one dimension array for the output layers
model.add(Flatten())
model.add(Dense(units=1500, activation='relu'))

model.add(Dropout(0.4))

#output layer
model.add(Dense(units=38, activation='softmax'))

<h4> Compile model

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

<H4> Model training

In [None]:
training_history = model.fit(x=training_set, validation_data=validation_set, epochs=5)

<H4> Model evaluation

In [None]:
train_loss, train_accuracy = model.evaluate(training_set)

In [None]:
print(train_loss, train_accuracy)

In [None]:
validation_loss, validation_accuracy = model.evaluate(validation_set)

In [None]:
print(validation_loss, validation_accuracy)

<H4> Save model

In [None]:
from keras import Model

model.save(filepath="trained_model/trained_disease_detection.keras", overwrite=True, save_format="keras")

In [None]:
training_history.history

<h5> Recording history in JSON

In [None]:
import json
with open("training_history.json", "w") as f:
    json.dump(training_history.history, f)

<H4> Visualizations and Metrics

In [None]:
epochs = [i for i in range(1,6)]
plt.plot(epochs, training_history.history['accuracy'], color='green', label="training accuracy")
plt.plot(epochs, training_history.history['val_accuracy'], color='red', label="validation accuracy")
plt.xlabel("Number of epochs")
plt.ylabel("Accuracy")
plt.title("Accuracy per Epoch")
plt.legend()
plt.show()

In [None]:
class_name = validation_set.class_names
class_name

In [None]:
test_set = keras.utils.image_dataset_from_directory(
    'Plant_Diseases_Dataset/valid',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=False,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
) 

In [None]:
y_pred = model.predict(test_set)
y_pred

In [None]:
predicted_cats = tf.argmax(y_pred, axis=1)

In [None]:
true_cats = tf.concat([y for x,y in test_set], axis=0)
true_cats

In [None]:
Y_true = tf.argmax(true_cats, axis=1)
Y_true

In [None]:
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
print(classification_report(Y_true, predicted_cats, target_names=class_name))

In [None]:
cm = confusion_matrix(Y_true, predicted_cats)
cm

In [None]:
from IPython.core.pylabtools import figsize

plt.figure(figsize(40,40))
sns.heatmap(cm, annot=True, annot_kws={'size':10})
plt.xlabel("predicted class", fontsize=20)
plt.ylabel("true class", fontsize=20)
plt.title("Plant Disease Confusion Matrix", fontsize=25)
plt.show()