### Step 1. Importing Required Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import tensorflow as tf
import glob
import os
import pandas as pd
from tqdm import tqdm
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
# Check if TensorFlow can access a GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs available: {gpus}")
    tf.config.experimental.set_memory_growth(gpus[0], True)
    print("Using GPU for TensorFlow operations.")
else:
    print("No GPU found. TensorFlow will use CPU.")

### Step 2. Preapre RAW DataSets

In [None]:
data_path = r'C:\Users\bot\Desktop\googleClassroom\PlantVillage'

In [None]:
categories = os.listdir(data_path)
print("Classes:", categories)

### Step 3. Splitting Training, Validation, Testing Data From RAW Data Directory

In [None]:
data_dir = data_path
img_size = 128

categories = os.listdir(data_dir)
print("Classes:", categories)

data = []
labels = []

for i, category in tqdm(enumerate(categories)):
    folder_path = os.path.join(data_dir, category)
    for img in os.listdir(folder_path):
        try:
            img_path = os.path.join(folder_path, img)
            img_arr = cv2.imread(img_path)
            img_arr = cv2.resize(img_arr, (img_size, img_size))
            data.append(img_arr)
            labels.append(i)
        except Exception as e:
            pass  # Skip all unreadable images

print("Total images:", len(data))

In [None]:
import os
import shutil
import random
from tqdm import tqdm


random.seed(42)

src_dir = data_path
target_base = '.'
splits = {'train': 0.60, 'valid': 0.20, 'test': 0.20}

# Get class names from subfolders
classes = [d for d in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, d))]

for split in splits:
    for cls in classes:
        os.makedirs(os.path.join(target_base, split, cls), exist_ok=True)

# Prepare to store split counts
split_indices_per_class = {}
total_images = 0

print("Original and split counts per class:")
for cls in classes:
    img_dir = os.path.join(src_dir, cls)
    images = [f for f in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, f))]
    random.shuffle(images)
    n_total = len(images)
    n_train = int(n_total * splits['train'])
    n_valid = int(n_total * splits['valid'])
    n_test = n_total - n_train - n_valid

    split_indices = {
        'train': images[:n_train],
        'valid': images[n_train:n_train + n_valid],
        'test': images[n_train + n_valid:]
    }
    split_indices_per_class[cls] = split_indices
    total_images += sum(len(files) for files in split_indices.values())

    print(f"Class '{cls}': total={n_total}, train={len(split_indices['train'])}, valid={len(split_indices['valid'])}, test={len(split_indices['test'])}")

print(f"\nTotal images to copy: {total_images}\n")

# Copy with a single progress bar
with tqdm(total=total_images, desc="Overall Progress", unit="img") as pbar:
    for cls, split_indices in split_indices_per_class.items():
        for split, files in split_indices.items():
            for f in files:
                src_path = os.path.join(src_dir, cls, f)
                dst_path = os.path.join(target_base, split, cls, f)
                shutil.copy2(src_path, dst_path)
                pbar.update(1)

### Step 4. Reviewing Data

In [None]:
# Display some sample images from the raw data for visual inspection
sample_class = random.choice(categories)
sample_images = random.sample(os.listdir(os.path.join(data_path, sample_class)), 9)

fig, axs = plt.subplots(3, 3, figsize=(9, 7))
fig.suptitle(f"Sample images from class: {sample_class}", fontsize=16)

for i, img_name in enumerate(sample_images):
    img_path = os.path.join(data_path, sample_class, img_name)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    h, w, c = img.shape
    axs[i // 3, i % 3].imshow(img)
    axs[i // 3, i % 3].set_title(f"{w}x{h}x{c}", fontsize=10)
    axs[i // 3, i % 3].axis('off')

    # Optional: print detailed info in console
    print(f"{img_name}: shape = {img.shape}")

plt.tight_layout()
plt.show()

### Step 5. Preparing Datasets to build CNN Models

In [None]:
training_set = tf.keras.utils.image_dataset_from_directory(
    'train',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
)

In [None]:
validation_set = tf.keras.utils.image_dataset_from_directory(
    'valid',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
)

In [None]:
cnn = tf.keras.models.Sequential()

In [None]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=[128, 128, 3]))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Dropout(0.20))
cnn.add(tf.keras.layers.Flatten())

cnn.add(tf.keras.layers.Dense(units=1500, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.4))

cnn.add(tf.keras.layers.Dense(units=15, activation='softmax'))

cnn.summary()

In [None]:
cnn.compile(optimizer=tf.keras.optimizers.legacy.Adam(
    learning_rate=0.0001),loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
training_history = cnn.fit(x=training_set,validation_data=validation_set,epochs=10)

### Step 6. Evaluation on Training & Validation Datasets

In [None]:
#Training set Accuracy
train_loss, train_acc = cnn.evaluate(training_set)
print('Training accuracy:', train_acc)
#Validation set Accuracy
val_loss, val_acc = cnn.evaluate(validation_set)
print('Validation accuracy:', val_acc)

In [None]:
# Save the trained model
cnn.save('trained_plant_disease_model.keras')

In [None]:
epochs = [i for i in range(1,11)]
plt.plot(epochs,training_history.history['accuracy'],color='red',label='Training Accuracy')
plt.plot(epochs,training_history.history['val_accuracy'],color='blue',label='Validation Accuracy')
plt.xlabel('No. of Epochs')
plt.title('Visualization of Accuracy Result')
plt.legend()
plt.show()

In [None]:
test_set = tf.keras.utils.image_dataset_from_directory(
    'test',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=1,
    image_size=(128, 128),
    shuffle=False,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False
)

In [None]:
y_pred = cnn.predict(test_set)
predicted_categories = tf.argmax(y_pred, axis=1)
true_categories = tf.concat([y for x, y in test_set], axis=0)
Y_true = tf.argmax(true_categories, axis=1)
Y_true
predicted_categories
from sklearn.metrics import confusion_matrix,classification_report
cm = confusion_matrix(Y_true,predicted_categories)
# Precision Recall Fscore
print(classification_report(Y_true,predicted_categories,target_names=class_name))

In [None]:
plt.figure(figsize=(40, 40))
sns.heatmap(cm,annot=True,annot_kws={"size": 10})
plt.xlabel('Predicted Class',fontsize = 20)
plt.ylabel('Actual Class',fontsize = 20)
plt.title('Plant Disease Prediction Confusion Matrix',fontsize = 25)
plt.show()