# Importing the necessary libraries

In [None]:
import os
import joblib
import shutil
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Defining the paths

In [None]:
root_dir = "dataset"

train_csv_path = f"{root_dir}/train.csv"
test_csv_path = f"{root_dir}/test.csv"
train_folder_path = f"{root_dir}/train"
test_folder_path = f"{root_dir}/test"

# Loading the CSV data

In [None]:
train_data = pd.read_csv(train_csv_path)
test_data = pd.read_csv(test_csv_path)

# Defining Image directories and copying the images

In [None]:
train_images_dir = f"{root_dir}/train/images"
test_images_dir = f"{root_dir}/test/images"

if not os.path.exists(train_images_dir):
    os.makedirs(train_images_dir, exist_ok=True)
    for i, row in train_data.iterrows():
        shutil.copy(
            os.path.join(train_folder_path, row["filename"]),
            f"{train_images_dir}/{row['filename']}",
        )
else:
    print(f"{train_images_dir} already exists, skipping copy for training images.")

if not os.path.exists(test_images_dir):
    os.makedirs(test_images_dir, exist_ok=True)
    for i, row in test_data.iterrows():
        shutil.copy(
            os.path.join(test_folder_path, row["filename"]),
            f"{test_images_dir}/{row['filename']}",
        )
else:
    print(f"{test_images_dir} already exists, skipping copy for testing images.")

# Data Preprocessing, creating data generator (training,validation)

In [None]:
print(len(os.listdir(f"{root_dir}/train/images")))

train_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=f"{root_dir}/train/images",
    x_col="filename",
    y_col="category",
    target_size=(256, 256),
    batch_size=32,
    class_mode="categorical",
    subset="training",
)

validation_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=f"{root_dir}/train/images",
    x_col="filename",
    y_col="category",
    target_size=(256, 256),
    batch_size=32,
    class_mode="categorical",
    subset="validation",
)

In [None]:
class_indices = train_generator.class_indices
class_indices

# Load and preprocess image

In [None]:
def load_and_preprocess_image(image_path, target_size=(256, 256)):
    img = Image.open(image_path)
    img = img.resize(target_size)
    img_array = np.array(img) / 255.0  # Normalize pixel values
    return img_array

# Define model architecture

In [None]:
model = Sequential(
    [
        Conv2D(32, (3, 3), activation="relu", input_shape=(256, 256, 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation="relu"),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation="relu"),
        Dropout(0.5),
        Dense(len(train_generator.class_indices), activation="softmax"),
    ]
)

# Compiling and Training the model

In [None]:
# Compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Training the model
history = model.fit(train_generator, epochs=100, validation_data=validation_generator)

# Saving the model

In [None]:
model.save("models/table_classification_model_100.keras")
joblib.dump(list(class_indices), "models/labels.pkl")

# Visualizing the results

In [None]:
# Extracting data from the history object
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]

epochs = range(len(acc))

# Plotting training and validation accuracy
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, acc, "bo-", label="Training accuracy")
plt.plot(epochs, val_acc, "ro-", label="Validation accuracy")
plt.title("Training and Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Plotting training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, "bo-", label="Training loss")
plt.plot(epochs, val_loss, "ro-", label="Validation loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=test_images_dir,
    x_col="filename",
    y_col="category",
    target_size=(256, 256),
    batch_size=32,
    class_mode="categorical",
    shuffle=False,
)

# Loading Model

In [None]:
model_path = "models/table_classification_model_100.keras"
model = load_model(model_path)
labels = joblib.load("models/labels.pkl")

In [None]:
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
test_predictions = model.predict(test_generator)
test_predicted_classes = np.argmax(test_predictions, axis=1)
true_classes = test_generator.classes

In [None]:
class_labels = list(test_generator.class_indices.keys())
report = classification_report(
    true_classes, test_predicted_classes, target_names=class_labels
)
print(report)