In [None]:
# Ran once and formed the \data1\split folder.
# import os
# import shutil
# import random

# # Define data directory
# data_dir = "data1/Indian-monuments/images"
# train_dir = "data1/split/Indian-monuments/images/train"
# val_dir = "data1/split/Indian-monuments/images/validation"
# test_dir = "data1/split/Indian-monuments/images/test"

# # Create directories for train, validation, and test sets if they don't exist
# os.makedirs(train_dir, exist_ok=True)
# os.makedirs(val_dir, exist_ok=True)
# os.makedirs(test_dir, exist_ok=True)

# # Define the ratio for splitting the data
# train_ratio = 0.8
# val_ratio = 0.1
# test_ratio = 0.1

# # Get the list of class directories
# class_directories = [
#     directory
#     for directory in os.listdir(data_dir)
#     if os.path.isdir(os.path.join(data_dir, directory))
# ]

# # Iterate over each class directory
# for class_dir in class_directories:
#     # Get the list of image filenames for the current class
#     class_images = os.listdir(os.path.join(data_dir, class_dir))
#     # Shuffle the list of image filenames
#     random.shuffle(class_images)

#     # Calculate the number of images for each subset
#     num_train = int(len(class_images) * train_ratio)
#     num_val = int(len(class_images) * val_ratio)
#     num_test = len(class_images) - num_train - num_val

#     # Split the images into train, validation, and test sets
#     train_images = class_images[:num_train]
#     val_images = class_images[num_train : num_train + num_val]
#     test_images = class_images[num_train + num_val :]

#     # Copy images to their respective directories
#     for image in train_images:
#         src = os.path.join(data_dir, class_dir, image)
#         dst = os.path.join(train_dir, class_dir, image)
#         os.makedirs(os.path.dirname(dst), exist_ok=True)
#         shutil.copyfile(src, dst)

#     for image in val_images:
#         src = os.path.join(data_dir, class_dir, image)
#         dst = os.path.join(val_dir, class_dir, image)
#         os.makedirs(os.path.dirname(dst), exist_ok=True)
#         shutil.copyfile(src, dst)

#     for image in test_images:
#         src = os.path.join(data_dir, class_dir, image)
#         dst = os.path.join(test_dir, class_dir, image)
#         os.makedirs(os.path.dirname(dst), exist_ok=True)
#         shutil.copyfile(src, dst)

# # Now you can use the directories train_dir, val_dir, and test_dir for training, validation, and testing respectively.

In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import MobileNetV2
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import pandas as pd

In [None]:
train_dir = Path("data1/split/Indian-monuments/images/train")
train_filepaths = (
    list(train_dir.glob("**/*.jpg"))
    + list(train_dir.glob("*.jpeg"))
    + list(train_dir.glob("*.png"))
)
val_dir = Path("data1/split/Indian-monuments/images/validation")
val_filepaths = (
    list(val_dir.glob("**/*.jpg"))
    + list(val_dir.glob("*.jpeg"))
    + list(val_dir.glob("*.png"))
)
test_dir = Path("data1/split/Indian-monuments/images/test")
test_filepaths = (
    list(test_dir.glob("**/*.jpg"))
    + list(test_dir.glob("*.jpeg"))
    + list(test_dir.glob("*.png"))
)

In [None]:
def proc_img(filepath):
    labels = [str(filepath[i]).split("\\")[-2] for i in range(len(filepath))]
    print(labels)
    filepath = pd.Series(filepath, name="Filepath").astype(str)
    labels = pd.Series(labels, name="Label")
    df = pd.concat([filepath, labels], axis=1)
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df


train_df = proc_img(train_filepaths)
val_df = proc_img(val_filepaths)
test_df = proc_img(test_filepaths)

In [None]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

In [None]:
unique_labels = train_df.copy().drop_duplicates(subset=["Label"]).reset_index()
train_df["Label"].nunique()

In [None]:
val_df["Label"].nunique()

In [None]:
test_df["Label"].nunique()

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.models import Model

# Define data generators with preprocessing function for MobileNetV2
train_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
val_generator = ImageDataGenerator(preprocessing_function=preprocess_input)

train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224, 224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
)

val_images = val_generator.flow_from_dataframe(
    dataframe=val_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224, 224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224, 224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=False,
)

base_model = ResNet50(
    weights="imagenet", include_top=False, pooling="avg", input_shape=(224, 224, 3)
)
base_model.trainable = False


# adding custom head

x = base_model.output
x = Dense(128, activation="relu")(x)
x = Dense(128, activation="relu")(x)
predictions = Dense(train_df["Label"].nunique(), activation="softmax")(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

history = model.fit(train_images, validation_data=val_images, epochs=30, verbose=1)

In [None]:
plt.plot(history.history["loss"], color="red", label="train")
plt.plot(history.history["val_loss"], color="blue", label="validation")
plt.legend()
plt.show()

In [None]:
plt.plot(history.history["accuracy"], color="red", label="train")
plt.plot(history.history["val_accuracy"], color="blue", label="validation")
plt.legend()
plt.show()

In [None]:
predicted_labels = model.predict(test_images)
label_mapping = {v: k for k, v in train_images.class_indices.items()}

# Convert predicted labels to class names
predicted_classes = [label_mapping[np.argmax(label)] for label in predicted_labels]

# Convert true labels to class names
true_classes = [label_mapping[label] for label in test_images.labels]

for i in range(10):
    print(f"True label: {true_classes[i]}, Predicted label: {predicted_classes[i]}")

from sklearn.metrics import accuracy_score

acc = accuracy_score(true_classes, predicted_classes)
print(f"Accuracy: {100 * acc:.2f}%")

In [None]:
# Define the file path to save the predicted classes
output_file = "predicted_classes.txt"

# Write the predicted classes to the file
with open(output_file, "w") as file:
    for predicted_class in predicted_classes:
        file.write(predicted_class + "\n")

print("Predicted classes saved to", output_file)

In [None]:
model.save(f"model_with_resnetv2_{100 * acc:.2f}.h5")

In [None]:
import keras


keras.saving.save_model(model, f"model_with_v2resnet_{100 * acc:.2f}.keras")

In [None]:
import pickle

with open("label_mapping.pkl", "wb") as f:
    pickle.dump(label_mapping, f)

print("Label mapping saved successfully.")

In [None]:
model.summary()