## Importing the libraries

In [114]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from PIL import Image
from sklearn.metrics import confusion_matrix , classification_report


## Dataset preparation

In [115]:
directory = 'animals'
filepaths =[]
labels=[]
folders = os.listdir(directory)
for folder in folders:
    folder_path = os.path.join(directory, folder)
    filenames= os.listdir(folder_path)
    for filename in filenames:
        filepath = os.path.join(folder_path, filename)
        filepaths.append(filepath)
        labels.append(folder)
        
file_series = pd.Series(filepaths , name='filepaths')
label_series = pd.Series(labels , name='labels')
element_series = pd.concat([file_series , label_series], axis=1)

## Preparing the model using cnn

In [116]:
cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64,64,3))) #1st convolutional layer
cnn.add(tf.keras.layers.MaxPooling2D(pool_size = 2, strides = 2))
cnn.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

cnn.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

cnn.add(tf.keras.layers.Flatten())

cnn.add(tf.keras.layers.Dense(512, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.5))  # Adding dropout for regularization

cnn.add(tf.keras.layers.Dense(256, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.5))  # Adding dropout for regularization

# Output layer with softmax activation for multiclass classification
cnn.add(tf.keras.layers.Dense(90, activation='softmax'))

cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [117]:
def load_image(filepath, target_size=(224, 224)):
    img = Image.open(filepath)
    img = img.resize(target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    return img_array

## Applying 3 fold cross verification

In [118]:
n_splits =3
skf = StratifiedKFold(n_splits=n_splits , random_state= 42, shuffle=True)
fold_indices = list(skf.split(element_series['filepaths'], element_series['labels']))
for fold , (train_index, val_index)in enumerate(fold_indices):
    print(f"Fold {fold + 1}:")
    train_fold_df = element_series.iloc[train_index]
    val_fold_df = element_series.iloc[val_index]
    print("Training dataset size:", len(train_fold_df))
    print("Validation dataset size:", len(val_fold_df))

    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Validation data should only be rescaled
    test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create data generators for train and validation sets
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_fold_df,
        x_col='filepaths',
        y_col='labels',
        target_size=(64, 64),  # Adjust according to your model's input shape
        batch_size=32,
        class_mode='categorical'  # or 'categorical' for multiclass classification
    )
    
    validation_generator = test_datagen.flow_from_dataframe(
        dataframe=val_fold_df,
        x_col='filepaths',
        y_col='labels',
        target_size=(64, 64),  # Adjust according to your model's input shape
        batch_size=32,
        class_mode='categorical'  # or 'categorical' for multiclass classification
    )
    
    # Train your model using the data generators
    history = cnn.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=20,
        validation_data=validation_generator,
        validation_steps=len(validation_generator)
    )
    # Evaluate accuracy
    val_loss, val_acc = cnn.evaluate(validation_generator)
    print(f"Validation Accuracy: {val_acc}")

    # Calculate confusion matrix
    val_labels = np.array([validation_generator.class_indices[label] for label in val_fold_df['labels']])

    # Predict labels for validation data
    val_pred = cnn.predict(validation_generator)
    val_pred = np.argmax(val_pred, axis=1)
    
    # Calculate confusion matrix
    cm = confusion_matrix(val_labels, val_pred)
    print(f"Confusion Matrix for Fold {fold + 1}:\n{cm}\n")

    

    
    
    

Fold 1:
Training dataset size: 3600
Validation dataset size: 1800
Found 3600 validated image filenames belonging to 90 classes.
Found 1800 validated image filenames belonging to 90 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Validation Accuracy: 0.07333333045244217
Confusion Matrix for Fold 1:
[[0 2 1 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]

Fold 2:
Training dataset size: 3600
Validation dataset size: 1800
Found 3600 validated image filenames belonging to 90 classes.
Found 1800 validated image filenames belonging to 90 classes.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epo