## Loading all of the necessary libraries

In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
 
from sklearn.metrics import classification_report, confusion_matrix
 
# deep learning libraries
import tensorflow as tf
import keras
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import applications
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Flatten, Dense, Dropout
from keras.preprocessing import image
from random import sample
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from sklearn.metrics import classification_report

import cv2
 
import warnings
warnings.filterwarnings('ignore')


#define paths to the train, test and val datasets
train_path='/kaggle/input/notebook5ef36487a5/train/'
test_path='/kaggle/input/notebook5ef36487a5/test/'
val_path='/kaggle/input/notebook5ef36487a5/val/'

## Load our csv file with a names of the files belonging to the specific class

In [None]:
# datasets
labels = pd.read_csv("/kaggle/input/nazwy-plikow/nazwy_plikow.csv", sep=',')


# Display the DataFrame
print(labels.head())

## Adding jpg extension to our files

In [None]:
def to_jpg(id):
    return id+".jpg"


labels['zdjecie'] = labels['zdjecie'].apply(to_jpg)

## Data augmentation and pre-processing using tensorflow

In [None]:
gen = ImageDataGenerator(
                rescale=1./255.,
                horizontal_flip = True,
                rotation_range=20,  # Rotate images by up to 20 degrees
                width_shift_range=0.1,  # Shift images horizontally by up to 10% of the width
                height_shift_range=0.1,  # Shift images vertically by up to 10% of the height
                shear_range=0.1,  # Shear transformations
)

train_generator = gen.flow_from_dataframe(
    labels, # dataframe
    directory = train_path,
    x_col = 'zdjecie',
    y_col = 'rasa',
    color_mode="rgb",
    target_size = (256,256), # image height , image width
    class_mode="sparse",
    batch_size=32,
    shuffle=True,
    seed=42,
)


val_gen = ImageDataGenerator(rescale=1./255.)
validation_generator = val_gen.flow_from_dataframe(
    labels, # dataframe
    directory = val_path,
    x_col = 'zdjecie',
    y_col = 'rasa',
    color_mode="rgb",
    target_size = (256,256), # image height , image width
    class_mode="sparse",
    batch_size=32,
    shuffle=True,
    seed=42,
)

In [None]:
#defining all of the class indicies for the train generator
class_indices = train_generator.class_indices
class_indices

## Building our model

In [None]:
# load the InceptionResNetV2 architecture with imagenet weights as base
base_model = tf.keras.applications.InceptionResNetV2(
                    include_top=False,
                    weights='imagenet',
                    input_shape=(256,256,3))

base_model.trainable=False
# For freezing the layer we make use of layer.trainable = False
# means that its internal state will not change during training.
# model's trainable weights will not be updated during fit(),
# and also its state updates will not run.

model = tf.keras.Sequential([ 
        base_model, 
        tf.keras.layers.BatchNormalization(renorm=True),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(renorm=True),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(120, activation='softmax')
    ])

## Defining an optimizer for our model

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001)
model.compile(optimizer=optimizer ,loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

## Training model for 25 epochs

In [None]:
batch_size=32
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = validation_generator.n//validation_generator.batch_size

# fit model
history = model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=25)

## Saving model

In [None]:
model.save("Model_Final_.h5")


## Plot the accuracy

In [None]:
# store results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

# plot results
# accuracy
plt.figure(figsize=(10, 16))
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['font.size'] = 14
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.facecolor'] = 'white'
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title(f'\nTraining and Validation Accuracy. \nTrain Accuracy: {str(acc[-1])}\nValidation Accuracy: {str(val_acc[-1])}')


## Plot the loss

In [None]:
# plot loss
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title(f'Training and Validation Loss. \nTrain Loss: {str(loss[-1])}\nValidation Loss: {str(val_loss[-1])}')
plt.xlabel('epoch')
plt.tight_layout(pad=3.0)
plt.show()



## Creating the report for our model

In [None]:
# Load the trained model
model = load_model('/kaggle/input/model-final/Model_Final_.h5')

# Load the class mapping from CSV
csv_file_path = '/kaggle/input/class-indexes/class_indexes.csv'
df = pd.read_csv(csv_file_path)

# Specify the test directory
test_dir = '/kaggle/input/notebook5ef36487a5/test/'

# List all files in the test directory
all_files = os.listdir(test_dir)

# Initialize lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

for file_name in all_files:
    # Load and preprocess each image for prediction
    img_path = os.path.join(test_dir, file_name)
    img = image.load_img(img_path, target_size=(256, 256))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array /= 255.0  # Normalize the image data

    # Make predictions
    predictions = model.predict(img_array)

    # Get the index of the predicted class
    predicted_class_index = np.argmax(predictions)

    # Convert predicted class index to class name
    predicted_class = df.iloc[predicted_class_index, 0]

    # Get the true class from the ground truth dataset
    true_class = labels.loc[labels['zdjecie'] == file_name, 'rasa'].values[0]

    # Append true and predicted labels to the lists
    true_labels.append(true_class)
    predicted_labels.append(predicted_class)

# Calculate recall and F1 score
report = classification_report(true_labels, predicted_labels)
report_dict = classification_report(true_labels, predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()

# Save the DataFrame to a CSV file
report_df.to_csv('/kaggle/working/classification_report_final.csv', index=True)

## Visualizing the metrics from the report (precision, recall and F1 score) in the form of the boxplots

In [None]:
# Load the classification report DataFrame from the CSV file
report_df = pd.read_csv('/kaggle/working/classification_report_final.csv', index_col=0)

# Plot precision, recall, and F1 score
sns.set(style="whitegrid")

# Create box plots for precision, recall, and F1 score
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

# Box plot for precision
sns.boxplot(y=report_df['precision'], ax=axes[0], color='blue')
axes[0].set_title('Precision')

# Box plot for recall
sns.boxplot(y=report_df['recall'], ax=axes[1], color='green')
axes[1].set_title('Recall')

# Box plot for F1 score
sns.boxplot(y=report_df['f1-score'], ax=axes[2], color='orange')
axes[2].set_title('F1 Score')

plt.tight_layout()
plt.show()


## Creating a generator for the test set

In [None]:
test_datagen = image.ImageDataGenerator(rescale=1./255)

# Specified batch size
batch_size = 32

test_generator = test_datagen.flow_from_dataframe(
    labels, # dataframe
    directory=test_path,  # images data path
    x_col='zdjecie',
    y_col='rasa',
    color_mode="rgb",
    target_size=(256, 256), 
    batch_size=batch_size,
    class_mode='sparse',
    shuffle=True,
    seed=42
)

## Evaluation of the model for the test data set

In [None]:
model = load_model('/kaggle/input/model-final/Model_Final_.h5')
accuracy_score = model.evaluate(test_generator)
print(accuracy_score)
print("Accuracy: {:.4f}%".format(accuracy_score[1] * 100)) 

print("Loss: ",accuracy_score[0])

## Evaluation of the model for the validation data set

In [None]:
model = load_model('/kaggle/input/model-final/Model_Final_.h5')
accuracy_score = model.evaluate(validation_generator)
print(accuracy_score)
print("Accuracy: {:.4f}%".format(accuracy_score[1] * 100)) 

print("Loss: ",accuracy_score[0])

## List the number of the classes below the average base on the created report

In [None]:
report_df = pd.read_csv('/kaggle/input/classification-report/classification_report.csv', index_col=0)

# Calculate the average precision
average_precision = report_df['precision'].mean()

# Filter rows where precision is below the average
below_average_precision = report_df[report_df['precision'] < average_precision]

# Display the filtered DataFrame
print("Average precision:")
print(average_precision)
print("------------------")
print("Classes with Precision Below Average:")
print(len(below_average_precision))