In [1]:
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np # linear algebra

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
test_path = '/content/drive/MyDrive/plant_data/archive/Test/Test'
train_path = '/content/drive/MyDrive/plant_data/archive/Train/Train'

In [10]:
import os
import cv2

# Load the dataset
class_names_train = os.listdir(train_path)
class_names_test = os.listdir(test_path)

num_classes_train = len(class_names_train)
num_classes_test = len(class_names_test)
X_train = []
X_test = []
y_train = []
y_test = []
label_map_train = {class_names_train[i]: i for i in range(num_classes_train)}
label_map_test = {class_names_test[i]: i for i in range(num_classes_test)}
 

for class_name in class_names_train:
    class_dir = os.path.join(train_path, class_name)
    for filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, filename)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 128))
        X_train.append(img.flatten())
        y_train.append(label_map_train[class_name])


for class_name in class_names_test:
    class_dir = os.path.join(test_path, class_name)
    for filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, filename)
        img = cv2.imread(img_path)
       
        if img is not None:
            img = cv2.resize(img, (128, 128))
            X_test.append(img.flatten())
            y_test.append(label_map_test[class_name])

In [11]:
print("X_train: {} \nX_test: {}".format((len(X_train), len(X_train[0])), (len(X_test), len(X_test[0]))))

X_train: (1322, 49152) 
X_test: (150, 49152)


In [12]:
# Since size of columns is too big, we will be using PCA as it is bound to have 
# some columns with high correlation

In [13]:
from sklearn.decomposition import PCA

In [56]:
# # pca = PCA(n_components=0.95, svd_solver='full')
# pca = PCA(n_components=100)
# X_train_pca = pca.fit_transform(X_train)
# X_test_pca = pca.transform(X_test)

In [102]:
X = np.concatenate((X_train, X_test))

# fit PCA on the entire dataset
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X)

# split the transformed data back into training and test sets
X_train_pca = X_pca[:len(X_train)]
X_test_pca = X_pca[len(X_train):]

In [103]:
print("X_train after pca: {} \nX_test after pca: {}".format((len(X_train_pca), 
                                                        len(X_train_pca[0])), (len(X_test_pca), len(X_test_pca[0]))))

X_train after pca: (1322, 100) 
X_test after pca: (150, 100)


In [104]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

In [105]:
param_grid_knn = {'n_neighbors': [3, 5, 7],
              'weights': ['uniform', 'distance'],
              # 'metric': ['euclidean', 'manhattan', 'chebyshev'],
              'p': [1, 2]}

In [106]:
knn = KNeighborsClassifier()

In [107]:
grid_search_knn = GridSearchCV(knn, param_grid_knn, cv=5)

In [108]:
grid_search_knn.fit(X_train_pca, y_train)

In [109]:
print("Best hyperparameters:", grid_search_knn.best_params_)
print("Best accuracy score:", grid_search_knn.best_score_)

Best hyperparameters: {'n_neighbors': 7, 'p': 2, 'weights': 'distance'}
Best accuracy score: 0.6990337335620354


In [110]:
y_pred = grid_search_knn.predict(X_test_pca)

In [111]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.26

In [112]:
target_names = ['class 0', 'class 1', 'class 2']
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)

              precision    recall  f1-score   support

     class 0       0.22      0.44      0.29        50
     class 1       0.04      0.02      0.03        50
     class 2       0.67      0.32      0.43        50

    accuracy                           0.26       150
   macro avg       0.31      0.26      0.25       150
weighted avg       0.31      0.26      0.25       150



In [28]:
from sklearn.svm import SVC

In [99]:
param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + [0.1, 1, 10],
}
svm = SVC()

In [100]:
grid_search_svm = GridSearchCV(estimator=svm, param_grid=param_grid_svm, cv=5, n_jobs=-1)

In [None]:
grid_search_svm.fit(X_train_pca, y_train)

In [None]:
print("Best parameters: ", grid_search_svm.best_params_)
print("Best accuracy score: ", grid_search_svm.best_score_)

In [31]:
svm.score(X_test_pca, y_test)

0.2

In [None]:
y_pred = grid_search_svm.predict(X_test_pca)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

In [None]:
target_names = ['class 0', 'class 1', 'class 2']
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf = RandomForestClassifier(random_state=56)

In [None]:
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid_rf, cv=5, scoring='accuracy')

In [None]:
grid_search_rf.fit(X_train_pca, y_train)

In [None]:
print("Best hyperparameters: ", grid_search_rf.best_params_)
print("Best score: ", grid_search_rf.best_score_)

In [None]:
y_pred = grid_search_rf.predict(X_test_pca)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
accuracy

In [None]:
target_names = ['class 0', 'class 1', 'class 2']
cr = classification_report(y_test, y_pred, target_names=target_names)
print(cr)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

In [None]:
train_path

'/Users/vaibhav/Downloads/archive (1)/Train/Train'

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
data_dir = '/Users/vaibhav/Downloads/archive (1)/Train/Train'
data_test = '/Users/vaibhav/Downloads/archive (1)/Test/Test'
class_names = os.listdir(data_dir)
class_names1 = os.listdir(data_test)
num_classes = len(class_names)
num_classes1 = len(class_names1)
X_train = []
X_test = []
y_train = []
y_test = []
label_map = {class_names[i]: i for i in range(num_classes)}
label_map1 = {class_names[i]: i for i in range(num_classes1)}
 

for class_name in class_names:
    class_dir = os.path.join(data_dir, class_name)
    for filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, filename)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (128, 128))
        X_train.append(img.flatten())
        y_train.append(label_map[class_name])


for class_name1 in class_names1:
    class_dir1 = os.path.join(data_test, class_name1)
    for filename1 in os.listdir(class_dir1):
        img_path = os.path.join(class_dir1, filename1)
        img = cv2.imread(img_path)
       
        if img is not None:
            img = cv2.resize(img, (128, 128))
            X_test.append(img.flatten())
            y_test.append(label_map1[class_name1])
        

In [None]:
# Reshape the data
X_train = np.array(X_train).reshape(len(X_train), -1)
y_train = np.array(y_train)

# Reshape the data
X_test = np.array(X_test).reshape(len(X_test), -1)
y_test = np.array(y_test)
        

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args


# Define the SVM classifier and its hyperparameters
def svm_classifier(C, gamma):
    svm = SVC(C=C, gamma=gamma, random_state=42)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    return -accuracy_score(y_test, y_pred)

# Define the search space for hyperparameters
search_space = [
    Real(1e-3, 1e3, "log-uniform", name="C"),
    Real(1e-6, 1e1, "log-uniform", name="gamma"),
]

# Use Bayesian optimization to find the best hyperparameters
@use_named_args(search_space)
def objective(**params):
    return svm_classifier(**params)

result = gp_minimize(objective, search_space, n_calls=20, random_state=42)

# Print the best hyperparameters and accuracy score
print("Best hyperparameters: C={}, gamma={}".format(result.x[0], result.x[1]))
print("Best accuracy: {:.3f}".format(-result.fun))

In [None]:
# Train the KNN model
knn = KNeighborsClassifier(n_neighbors=7, metric='chebyshev')
knn.fit(X_train, y_train)

# Evaluate the model
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.32


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the confusion matrix as a table
print("Confusion Matrix:")
print(conf_matrix)

# Display the confusion matrix as an image
sns.set(font_scale=1.4)
sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

svm = SVC(kernel='rbf', gamma='scale')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

In [None]:

# Train the Random Forest model
rf = RandomForestClassifier(n_estimators=50, random_state=56)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

In [None]:
print("Accuracy SVM:", accuracy_svm)
print("Accuracy rf:", accuracy_rf)


Accuracy SVM: 0.7133333333333334
Accuracy rf: 0.5866666666666667


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Load the dataset
data_dir = '/Users/vaibhav/Downloads/archive (1)/Train/Train'
data_test = '/Users/vaibhav/Downloads/archive (1)/Test/Test'
class_names = os.listdir(data_dir)
class_names1 = os.listdir(data_test)
num_classes = len(class_names)
num_classes1 = len(class_names1)
X = []
y = []
X_test = []
y_test = []
input_shape = (128, 128, 3)
os.environ['TF_XLA_FLAGS'] = '--tf_xla_cpu_global_jit'

label_map = {class_names[i]: i for i in range(num_classes)}
label_map1 = {class_names1[i]: i for i in range(num_classes1)}

for class_name in class_names:
    class_dir = os.path.join(data_dir, class_name)
    for filename in os.listdir(class_dir):
        img_path = os.path.join(class_dir, filename)
        img = cv2.imread(img_path)
        img = cv2.resize(img, input_shape[:2])
        X.append(img)
        y.append(label_map[class_name])

for class_name1 in class_names1:
    class_dir1 = os.path.join(data_test, class_name1)
    for filename1 in os.listdir(class_dir1):
        img_path = os.path.join(class_dir1, filename1)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, input_shape[:2])
            X_test.append(img)
            y_test.append(label_map1[class_name1])

X = np.array(X)
y = to_categorical(y, num_classes=num_classes)
X_test = np.array(X_test)
y_test = to_categorical(y_test, num_classes=num_classes1)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model architecture


In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
