In [None]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import tensorflow as tf
import imageio

train_dataset_path = Path("/input/traindataset")  # Update the path to the "train" folder
test_dataset_path = Path("/input/testdataset")    # Update the path to the "test" folder

# Load train images
train_images = list(train_dataset_path.glob(r'**/*.jpg'))
train_labels = [x.parents[0].stem for x in train_images]

# Load test images
test_images = list(test_dataset_path.glob(r'**/*.jpg'))
test_labels = [x.parents[0].stem for x in test_images]

# Check if there are images in both train and test folders
if len(train_images) == 0 or len(test_images) == 0:
    raise ValueError("No images found in either the 'train' or 'test' folder.")

# Convert to pandas Series
train_images = pd.Series(train_images, name="mptraintest").astype(str)
train_labels = pd.Series(train_labels, name="Labels").astype(str)

test_images = pd.Series(test_images, name="mptraintest").astype(str)
test_labels = pd.Series(test_labels, name="Labels").astype(str)

# Create DataFrames
train_data = pd.concat([train_images, train_labels], axis=1)
test_data = pd.concat([test_images, test_labels], axis=1)

train_data = train_data.sample(frac=1, random_state=42).reset_index(drop=True)
test_data = test_data.sample(frac=1, random_state=42).reset_index(drop=True)

train_data.head(10)

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import mobilenet

train_generator_mobile_net = ImageDataGenerator(
    preprocessing_function=mobilenet.preprocess_input,
    validation_split=0.05  # Use 5% of the train data for validation
)

# Use the entire train data for training and validation
train = train_generator_mobile_net.flow_from_dataframe(
    dataframe=train_data,
    x_col="mptraintest",
    y_col="Labels",
    target_size=(256, 256),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'  # Use 95% of the train data for training
)

validation = train_generator_mobile_net.flow_from_dataframe(
    dataframe=train_data,
    x_col="mptraintest",
    y_col="Labels",
    target_size=(256, 256),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'  # Use 5% of the train data for validation
)

# Keep the test data for evaluation purposes
test_generator_mobile_net = ImageDataGenerator(
    preprocessing_function=mobilenet.preprocess_input
)

test = test_generator_mobile_net.flow_from_dataframe(
    dataframe=test_data,
    x_col="mptraintest",
    y_col="Labels",
    target_size=(256, 256),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    rotation_range=32,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shuffle=False
)



from tensorflow.keras.applications import VGG19   # Upadate DCNN model e.g. VGG16, VGG19, ResNet50

VGG_ = VGG19(
    input_shape=(256, 256, 3),
    include_top=False,
    weights="imagenet",
    pooling='avg'
)

VGG_.trainable = False


from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy

# building the Predictor layers
x = Dense(256, activation='relu')(VGG_.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.4)(x)

outputs = Dense(30, activation='softmax')(x)

VGG = Model(inputs=VGG_.inputs, outputs=outputs)

VGG.compile(
    optimizer=Adam(),
    loss=CategoricalCrossentropy(),
    metrics=[CategoricalAccuracy()]
)


CHECKPOINTS = Path("./checkpoints")
CHECKPOINTS.mkdir(exist_ok=True)


from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# training
results = VGG.fit(
    train,
    validation_data = validation,
    batch_size = 32,
    epochs = 30,
    callbacks = [
        EarlyStopping(
            monitor="val_loss",
            patience=4,
            restore_best_weights=True
        ), 
        ReduceLROnPlateau(patience=2),
        ModelCheckpoint(
            str(CHECKPOINTS),
            monitor="val_loss",
            save_best_only=True
        ),
    ]
)


pd.DataFrame(results.history)[['categorical_accuracy', 'val_categorical_accuracy']].plot()
plt.title("Accuracy")
plt.show()


pd.DataFrame(results.history)[['loss', 'val_loss']].plot()
plt.title("Loss")
plt.show()


df=pd.DataFrame(results.history)[['loss', 'val_loss','categorical_accuracy', 'val_categorical_accuracy']]
df.to_csv('VGG19.csv',index=False)


MODEL_PATH = Path("./saved_model")
MODEL_PATH.mkdir(exist_ok=True)
VGG.save(str(MODEL_PATH))


# dump the training history as well
np.save('model_history.npy', results.history)

# history=np.load('model_history.npy',allow_pickle='TRUE').item()


# Testing the model
results = VGG.evaluate(test)


predictions = np.argmax(VGG.predict(test), axis=1)


from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

predictions = VGG.predict(test)
labels = dict((v, k) for k, v in train.class_indices.items())
predicted_labels = [labels[i] for i in np.argmax(predictions, axis=1)]
actual_labels = test_data["Labels"].tolist()

# Compute the classification report
print(classification_report(actual_labels, predicted_labels))

# Compute and plot the confusion matrix
cf = confusion_matrix(actual_labels, predicted_labels, normalize="true")
plt.figure(figsize=(16, 8))
sns.heatmap(cf, annot=True, xticklabels=sorted(set(actual_labels)), yticklabels=sorted(set(actual_labels)), cmap='Blues')
plt.title('VGG19')
plt.savefig('VGG19.png', bbox_inches='tight', dpi=500)
plt.show()



VGG.save('VGG19.h5')