In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.applications import VGG16
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.applications import Xception, VGG19, MobileNet, VGG16, EfficientNetB0
from keras.models import Model
from keras.layers import GlobalAveragePooling2D
from sklearn.feature_selection import SelectKBest
from sklearn.discriminant_analysis import StandardScaler
from sklearn.feature_selection import f_classif
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint
from sklearn.linear_model import LogisticRegression
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
import joblib

In [2]:
base_dir = "F:\Resources & Codes\Lab Dataset\Plant Village Dataset\segmented\Apple"
image_size = 224
batch_size = 32

In [3]:
feature_extraction_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255.0)


In [4]:
# Use ImageDataGenerator for loading and augmenting images
datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Adjust validation split as needed
)

# Load and label images using flow_from_directory
generator = datagen.flow_from_directory(
    base_dir,
    target_size=(image_size, image_size),
    batch_size=32,  # Adjust batch size as needed
    class_mode='categorical',  # Use 'categorical' for multi-class classification
    subset='training'  # Use 'validation' for validation set
)

Found 2537 images belonging to 4 classes.


In [5]:
all_images = []
all_labels = []

for class_folder in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_folder)
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path):
            img_path = os.path.join(class_path, filename)
            img = keras.preprocessing.image.load_img(img_path, target_size=(image_size, image_size))
            img_array = keras.preprocessing.image.img_to_array(img)
            all_images.append(img_array)
            all_labels.append(class_folder)


In [6]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(all_labels)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    all_images, y_encoded, test_size=0.1, random_state=42, stratify=y_encoded
)

In [8]:
X_train = np.reshape(X_train, (-1, image_size, image_size, 3))

In [9]:
train_data = feature_extraction_datagen.flow(
    X_train, keras.utils.to_categorical(y_train), batch_size=batch_size, shuffle=False
)

In [10]:
X_test = np.reshape(X_test, (-1, image_size, image_size, 3))

In [11]:
test_data = feature_extraction_datagen.flow(
    X_test, keras.utils.to_categorical(y_test), batch_size=batch_size, shuffle=False
)

In [12]:
# Define a function for feature extraction using a pre-trained model
def extract_features(model, X_data):
    base_model = model(weights='imagenet', include_top=False)
    model = Model(inputs=base_model.input, outputs=GlobalAveragePooling2D()(base_model.output))

    features = []
    for img in X_data:
        img = np.expand_dims(img, axis=0)
        features.append(model.predict(img))

    return np.vstack(features)

# Define the models
models = [MobileNet, VGG16]

# Initialize lists to store extracted features
X_train_features = []
X_test_features = []

# Extract features using each model
for model in models:
    X_train_features.append(extract_features(model, X_train))
    X_test_features.append(extract_features(model, X_test))

# Convert lists to numpy arrays
X_train_features = np.concatenate(X_train_features, axis=1)
X_test_features = np.concatenate(X_test_features, axis=1)

# Standardize the features
scaler = StandardScaler()
X_train_features_std = scaler.fit_transform(X_train_features)
X_test_features_std = scaler.fit_transform(X_test_features)




In [13]:
# Perform ANOVA analysis
k_best = SelectKBest(f_classif, k='all')
X_train_selected = k_best.fit_transform(X_train_features_std, y_train)
X_test_selected = k_best.transform(X_test_features_std)  # Use transform instead of fit_transform

# Print selected features
selected_features_indices = np.where(k_best.get_support())[0]
print("Selected Features Indices:", selected_features_indices)

Selected Features Indices: [   0    1    2 ... 1533 1534 1535]


Logistic Regression

In [14]:
logreg = LogisticRegression(random_state=42,max_iter=1000)

In [15]:
history = logreg.fit(X_train_selected, y_train)

In [16]:
# Save the trained model to a file
joblib.dump(logreg, 'logistic_regression_model.joblib')

# Load the saved model
loaded_model = joblib.load('logistic_regression_model.joblib')

In [17]:
# Predict on the test set using the loaded model
predictions = loaded_model.predict(X_test_selected)

# Evaluate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Logistic Regression Test Accuracy:", accuracy*100)

Logistic Regression Test Accuracy: 98.11320754716981


In [18]:
# Predict on the test set using the loaded model
predictions = loaded_model.predict(X_train_selected)

# Evaluate accuracy
accuracy = accuracy_score(y_train, predictions)
print("Logistic Regression Train Accuracy:", accuracy*100)

Logistic Regression Train Accuracy: 100.0


Define the DNN model

In [19]:
# One-hot encoding
y_train_one_hot = to_categorical(y_train, num_classes=4)
y_test_one_hot = to_categorical(y_test, num_classes=4)

In [20]:
# Define the DNN model
def build_dnn_model(input_dim, num_of_classes):
    model = Sequential([
        Dense(256, activation='relu', input_dim=input_dim),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(num_of_classes, activation='softmax')  # Use 'softmax' for multi-class classification
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Check the number of features after ANOVA feature selection
num_selected_features = len(selected_features_indices)

# Build the DNN model
dnn_model = build_dnn_model(num_selected_features, 4)

In [21]:
# Define the ModelCheckpoint callback
checkpoint = ModelCheckpoint('model_checkpoint.h5', 
                             monitor='val_accuracy', 
                             save_best_only=True, 
                             mode='max', 
                             verbose=1)

In [22]:
# Train the DNN model with one-hot encoded labels
history = dnn_model.fit(X_train_selected, y_train_one_hot,
                        epochs=100,
                        batch_size=32,
                        validation_data=(X_test_selected, y_test_one_hot), callbacks=[checkpoint])

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.94025, saving model to model_checkpoint.h5
Epoch 2/100
Epoch 2: val_accuracy improved from 0.94025 to 0.96226, saving model to model_checkpoint.h5
Epoch 3/100
Epoch 3: val_accuracy did not improve from 0.96226
Epoch 4/100
Epoch 4: val_accuracy improved from 0.96226 to 0.96855, saving model to model_checkpoint.h5
Epoch 5/100
Epoch 5: val_accuracy did not improve from 0.96855
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.96855
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.96855
Epoch 8/100
Epoch 8: val_accuracy improved from 0.96855 to 0.97170, saving model to model_checkpoint.h5
Epoch 9/100
Epoch 9: val_accuracy did not improve from 0.97170
Epoch 10/100
Epoch 10: val_accuracy did not improve from 0.97170
Epoch 11/100
Epoch 11: val_accuracy improved from 0.97170 to 0.97799, saving model to model_checkpoint.h5
Epoch 12/100
Epoch 12: val_accuracy did not improve from 0.97799
Epoch 13/100
Epoch 13: val_accuracy 