In [None]:
pip install opendatasets

In [None]:
import opendatasets
opendatasets.download('https://www.kaggle.com/datasets/aryashah2k/mango-leaf-disease-dataset')
import os
print(os.listdir('/content/mango-leaf-disease-dataset'))

In [None]:
import os
import numpy as np
from PIL import Image

# Define the path to the dataset
data_path = '/content/mango-leaf-disease-dataset'

# Define the size of the images after resizing
img_size = 224

# Define the number of classes
num_classes = len(os.listdir(data_path))

# Create empty lists to store the preprocessed images and their labels
X = []
y = []

# Loop over each class directory
for i, class_dir in enumerate(os.listdir(data_path)):
    # Get the path to the class directory
    class_path = os.path.join(data_path, class_dir)
    # print("class",class_path)
    
    # Loop over each image in the class directory
    for img_file in os.listdir(class_path):
        # Get the path to the image file
        img_path = os.path.join(class_path, img_file)
        print("img_path",img_path)
        
        # Load the image and resize it to the desired size
        img = Image.open(img_path).resize((img_size, img_size))
        
        # Convert the image to grayscale (optional)
        img = img.convert('L')
        
        # Convert the image to a numpy array and normalize the pixel values
        img = np.array(img) / 255.0
        
        # Add the preprocessed image and its label to the lists
        X.append(img)
        y.append(i)

# Convert the lists to numpy arrays
X = np.array(X)
y = np.array(y)
print(X)
print(y)


In [None]:
print(os.listdir(data_path))
class_mapping = {
    0: 'Die Back',
    1: 'Healthy',
    2: 'Anthracnose',
    3: 'Gall Midge',
    4: 'Cutting Weevil',
    5: 'Sooty Mould',
    6: 'Powdery Mildew',
    7: 'Bacterial Canker'
}


In [None]:
from sklearn.model_selection import train_test_split

# Split the preprocessed data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Initialize the classifiers
svc = SVC(kernel='linear')
rfc = RandomForestClassifier(n_estimators=100)
knn = KNeighborsClassifier(n_neighbors=5)
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Train the classifiers on the training data
svc.fit(X_train, y_train)
svc_acc = accuracy_score(y_test, svc.predict(X_test))
print("Support Vector Machine accuracy: {:.2f}%".format(svc_acc*100))
rfc.fit(X_train, y_train)
rfc_acc = accuracy_score(y_test, rfc.predict(X_test))
print("Random Forest Classifier accuracy: {:.2f}%".format(rfc_acc*100))
knn.fit(X_train, y_train)
knn_acc = accuracy_score(y_test, knn.predict(X_test))
print("K-Nearest Neighbors Classifier accuracy: {:.2f}%".format(knn_acc*100))
print("SVM classification report:")
print(classification_report(y_test, svc.predict(X_test)))

print("Random forest classification report:")
print(classification_report(y_test, rfc.predict(X_test)))

print("KNN classification report:")
print(classification_report(y_test, knn.predict(X_test)))

In [None]:
from sklearn.metrics import *
svc_f1 = f1_score(y_test, svc.predict(X_test), average='weighted')
rfc_f1 = f1_score(y_test,  rfc.predict(X_test), average='weighted')
knn_f1 = f1_score(y_test, knn.predict(X_test), average='weighted')

print("SVM F1 score:", svc_f1)
print("Random forest F1 score:", rfc_f1)
print("KNN F1 score:", knn_f1)

if svc_f1 >= rfc_f1 and svc_f1 >= knn_f1:
    print("SVM classifier has the best performance.")
    best_clf = svc_f1
elif rfc_f1 >= svc_f1 and rfc_f1 >= knn_f1:
    print("Random forest classifier has the best performance.")
    best_clf = rfc
else:
    print("KNN classifier has the best performance.")
    best_clf = knn

In [None]:
#predecting the image
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

# Load the new image
img_path = '/content/mango-leaf-disease-dataset/Powdery Mildew/20211109_121223 (Custom).jpg'
img_size = 224
try:
    img = Image.open(img_path).resize((img_size, img_size))
    tt=img
except FileNotFoundError:
    print(f"FileNotFoundError: Could not load image at {img_path}")
    exit(1)

# Convert the image to grayscale (optional)
img = img.convert('L')
img = np.array(img) / 255.0

# Convert the preprocessed image to a numpy array
new_image = np.array(img)

# Reshape the image to a 1D array
new_image = new_image.reshape(1, -1)

# Use the trained SVM classifier to predict the label of the new image
rfc_pred = rfc.predict(new_image)

# Get the predicted class name
predicted_class_index = rfc.predict(new_image)[0]
predicted_class_name = class_mapping[predicted_class_index]

# # Show the image and print a description of it
plt.imshow(tt)

plt.text(0, -5, predicted_class_name, fontsize=12, color='white', backgroundcolor='gray')
plt.axis('off')
plt.show()
print("Random forest classifier label")

print("The image depicts:", predicted_class_name)


In [None]:
#Rxperiments on Random Forest
from sklearn.model_selection import *

# Set up th e parameter grid for the Random forest classifier
param_grid = {'n_estimators': [100],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 4],
    'min_samples_leaf': [1, 2]}
rfc = RandomForestClassifier()
rfc_grid = GridSearchCV(
    rfc,
    param_grid,
    cv=5,
    n_jobs=-1,  # use all available CPU cores
    verbose=2
)

rfc_grid.fit(X_train, y_train)
# Get the test accuracy using the best model found by GridSearchCV
best_model = rfc_grid.best_estimator_
best_score = rfc_grid.best_score_
best_params = rfc_grid.best_params_
test_accuracy = best_model.score(X_test, y_test)
print('Test accuracy:', test_accuracy)
# Print the best hyperparameters and the corresponding classification report
print("Best RFC hyperparameters:", best_params)
svc_pred = rfc_grid.predict(X_test)
print("RFC classification report:")
print(classification_report(y_test, rfc_pred))
rfc = RandomForestClassifier(**best_params)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Mean cross-validation score:", best_score)
print("Test accuracy:", accuracy)
accuracy=accuracy*100
print("test accuracy",accuracy,"%")

In [None]:
#experiments on svm
from sklearn.model_selection import *
# Set up the parameter grid for the SVM classifier
param_grid = {'C': [0.1],
              'kernel': ['poly', 'rbf'],
              'gamma': ['scale']}


svc_grid = GridSearchCV(
    svc,
    param_grid,
    cv=5,
    n_jobs=-1,  # use all available CPU cores
    verbose=2
)

svc_grid.fit(X_train, y_train)
# Get the test accuracy using the best model found by GridSearchCV
best_model = svc_grid.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print('Test accuracy:', test_accuracy)
# Print the best hyperparameters and the corresponding classification report
print("Best SVM hyperparameters:", svc_grid.best_params_)
svc_pred = svc_grid.predict(X_test)
accuracy = accuracy_score(y_test, svc_pred)
print("accuracy",accuracy)

print("SVM classification report:")
print(classification_report(y_test, svc_pred))
