<a href="https://colab.research.google.com/github/eyreynational/Final-Model-YOLOv8-CNN-ResNet-and-VGG16-/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import TensorBoard
import os
# Tensorflow Libraries
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# System libraries
from pathlib import Path
import os.path
import random
from collections import Counter

# Visualization Libraries
import matplotlib.cm as cm
import cv2
import seaborn as sns
from cycler import cycler
import textwrap

sns.set_style('darkgrid')

# Metrics
from sklearn.metrics import classification_report, confusion_matrix
import itertools

In [None]:
# Define paths
train_dir = '/content/drive/MyDrive/Master/Dataset/trian'  # Update with your path
test_dir = '/content/drive/MyDrive/Master/Dataset/test'

In [None]:
import os

# Get the shape (dimensions) of the training directory
train_dir_shape = os.path.abspath(train_dir)
print(train_dir_shape)

# If you want the size of training dir
train_dir_size = os.path.getsize(train_dir)
print(train_dir_size)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Define a function to count images in each category
def count_images_per_category(directory):
    category_counts = {}
    for category in os.listdir(directory):
        category_path = os.path.join(directory, category)
        if os.path.isdir(category_path):  # Check if it's a directory
            category_counts[category] = len(os.listdir(category_path))
    return category_counts

# Count images in training and testing sets
train_counts = count_images_per_category(train_dir)
test_counts = count_images_per_category(test_dir)

# Create a bar chart
categories = list(train_counts.keys())  # Get category names
train_values = list(train_counts.values())  # Get image counts for training set
test_values = list(test_counts.values())  # Get image counts for testing set

x = np.arange(len(categories))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, train_values, width, label='Train')
rects2 = ax.bar(x + width/2, test_values, width, label='Test')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Number of Images')
ax.set_title('Dataset Distribution by Category')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=45, ha='right')  # Rotate x-axis labels if needed
ax.legend()

# Add value labels to the bars
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

fig.tight_layout()
plt.show()

In [None]:
img_size = (256, 256)

In [None]:
# Function to load images and their labels from a directory
def load_images_from_dir(directory, img_size):
    images = []
    labels = []
    # List only subdirectories (each corresponding to a category)
    categories = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    for category in categories:
        category_path = os.path.join(directory, category)
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            # Read the image using cv2 (in BGR format)
            img = cv2.imread(img_path)
            if img is not None:
                # Resize to the common size (256x256)
                img = cv2.resize(img, img_size)
                images.append(img)
                labels.append(category)
    return np.array(images), np.array(labels)

In [None]:
# Load training images and labels
X_train, y_train = load_images_from_dir(train_dir, img_size)
print("Training data shape:", X_train.shape)

In [None]:
# Flatten each image into a 1D vector
n_samples = X_train.shape[0]
X_flat = X_train.reshape(n_samples, -1)
print("Flattened image shape:", X_flat.shape)

In [None]:
# Standardize the pixel values (mean=0, std=1)
from sklearn.preprocessing import StandardScaler # Import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_flat.astype(np.float64))

In [None]:
# Apply PCA to retain 95% of the variance
from sklearn.decomposition import PCA  # Import PCA here
pca = PCA(n_components=0.95, svd_solver='full')
X_pca = pca.fit_transform(X_scaled)
print("PCA reduced shape:", X_pca.shape)

In [None]:
X_test, y_test = load_images_from_dir(test_dir, img_size)
X_test_flat = X_test.reshape(X_test.shape[0], -1)
X_test_scaled = scaler.transform(X_test_flat.astype(np.float64))
X_test_pca = pca.transform(X_test_scaled)


In [None]:
# Plot the cumulative explained variance ratio to see how many components were selected
plt.figure(figsize=(8, 5))
plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('PCA Explained Variance')
plt.grid(True)
plt.show()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation for the Training Set
train_datagen = ImageDataGenerator(
    rescale=1./255,                # Normalization
    rotation_range=20,             # Random rotations
    width_shift_range=0.2,         # Horizontal shifts
    height_shift_range=0.2,        # Vertical shifts
    shear_range=0.2,               # Shear transformations
    zoom_range=0.2,                # Random zoom
    horizontal_flip=True,          # Flip horizontally
    fill_mode='nearest'            # Fill mode for shifts
)

# For the Test Set, only rescale
test_datagen = ImageDataGenerator(rescale=1./255)

# Load the training set
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'  # For multi-class classification
)

# Load the test set
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Check Class Balance
print("Class distribution in training set:", Counter(train_generator.classes))
print("Class distribution in test set:", Counter(test_generator.classes))

In [None]:
print(train_generator.class_indices)
print(test_generator.class_indices)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# ... (your previous code for ImageDataGenerator and generators) ...

# Function to display images with labels
def show_images_with_labels(generator, num_images=5):
    """Displays a specified number of images with their labels."""
    images, labels = next(generator)  # Get a batch of images and labels
    num_images = min(num_images, images.shape[0])  # Limit to available images

    plt.figure(figsize=(15, 5))  # Adjust figure size as needed
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(images[i])
        plt.title(f"Label: {np.argmax(labels[i])}")  # Display numerical label
        plt.axis('off')
    plt.show()

# Example usage
show_images_with_labels(train_generator)  # Display images from the training set

In [None]:
# Data Augmentation for the Training Set
train_datagen = ImageDataGenerator(
    rescale=1./255,                # Normalization
    rotation_range=40,             # Random rotations
    width_shift_range=0.2,         # Horizontal shifts
    height_shift_range=0.2,        # Vertical shifts
    shear_range=0.2,               # Shear transformations
    zoom_range=0.2,                # Random zoom
    horizontal_flip=True,          # Flip horizontally
    fill_mode='nearest'            # Fill mode for shifts
)

In [None]:
# For the Test Set, only rescale
test_datagen = ImageDataGenerator(rescale=1./255)

# Load the training set
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'  # For multi-class classification
)

In [None]:
# Load the test set
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

In [None]:
# Model Architecture
model = models.Sequential()

# Add Convolutional layers
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))


In [None]:
# Flatten and add Dense layers
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.5))  # Dropout to prevent overfitting
model.add(layers.Dense(5, activation='softmax'))  # Output layer for 5 classes

In [None]:
# Compile the model
model.compile(
    optimizer='adam',  # You can try other optimizers like RMSprop, SGD
    loss='categorical_crossentropy',  # Multi-class classification loss
    metrics=['accuracy']  # Track accuracy during training
)


In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=70,  # Adjust the number of epochs based on overfitting/underfitting
    validation_data=test_generator,
    validation_steps=test_generator.samples // test_generator.batch_size
)

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator, verbose=2)
print(f"Test Accuracy: {test_acc}")

In [None]:
# Evaluate the model
train_loss, train_acc = model.evaluate(train_generator, verbose=2)
print(f"Train Accuracy: {train_acc}")

In [None]:
# Predict on the test set
Y_pred = model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)

In [None]:
# Classification report for precision, recall, f1-score
print('Classification Report')
target_names = ['Powdery_mildew', 'Bacterial_spot', 'Late_blight', 'Healthy', 'Mosaic_virus']
print(classification_report(test_generator.classes, y_pred, target_names=target_names))


In [None]:
from sklearn.metrics import classification_report

print('Classification Report')
target_names = ['Powdery_mildew', 'Bacterial_spot', 'Late_blight', 'Healthy', 'Mosaic_virus']
print(classification_report(test_generator.classes, y_pred, target_names=target_names, digits=4))


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

# Get the confusion matrix
cm = confusion_matrix(test_generator.classes, y_pred)

# Class labels
class_names = ['Powdery_mildew', 'Bacterial_spot', 'Late_blight', 'Healthy', 'Mosaic_virus']

# Calculate per-class accuracy
print("Per-Class Accuracy:")
for i, class_name in enumerate(class_names):
    correct = cm[i, i]
    total = cm[i].sum()
    accuracy = correct / total if total > 0 else 0
    print(f"{class_name}: {accuracy:.4f}")


In [None]:
# Confusion matrix for detailed error analysis
print('Confusion Matrix')
print(confusion_matrix(test_generator.classes, y_pred))

In [None]:
# Generate the confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix # Import confusion_matrix

# Get predictions for the test set
y_pred = model.predict(test_generator).argmax(axis=1)  # Get predicted class labels

conf_matrix = confusion_matrix(test_generator.classes, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=test_generator.class_indices,
            yticklabels=test_generator.class_indices)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save the entire model to a file
model.save('https://drive.google.com/drive/folders/1tmOAHKL5YK_dxG9ufIbU_DDsIhSPcZZH/CNN_model.h5')