## Importing the libraries

In [65]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from PIL import Image
from sklearn.metrics import confusion_matrix , classification_report
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical


## Dataset preparation

In [66]:
directory = 'animals'
filepaths =[]
labels=[]
folders = os.listdir(directory)
for folder in folders:
    folder_path = os.path.join(directory, folder)
    filenames= os.listdir(folder_path)
    for filename in filenames:
        filepath = os.path.join(folder_path, filename)
        filepaths.append(filepath)
        labels.append(folder)
        
file_series = pd.Series(filepaths , name='filepaths')
label_series = pd.Series(labels , name='labels')
element_series = pd.concat([file_series , label_series], axis=1)
# Modify the labels for 5-vs-rest classification
element_series['labels_modified'] = np.where(element_series['labels'].isin(selected_classes), '1', '0')

# Convert labels to one-hot encoded format
labels_one_hot = to_categorical(element_series['labels_modified'], num_classes=5)

# Update the label column in element_series
element_series['labels_modified'] = labels_one_hot.tolist()

# Print the first few rows of the modified DataFrame
print(element_series.head())

                         filepaths    labels            labels_modified
0  animals\antelope\02f4b3be2d.jpg  antelope  [0.0, 1.0, 0.0, 0.0, 0.0]
1  animals\antelope\03d7fc0888.jpg  antelope  [0.0, 1.0, 0.0, 0.0, 0.0]
2  animals\antelope\058fa9a60f.jpg  antelope  [0.0, 1.0, 0.0, 0.0, 0.0]
3  animals\antelope\0a37838e99.jpg  antelope  [0.0, 1.0, 0.0, 0.0, 0.0]
4  animals\antelope\0b1a3af197.jpg  antelope  [0.0, 1.0, 0.0, 0.0, 0.0]


## Preparing the model using cnn

In [67]:
cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64,64,3))) #1st convolutional layer
cnn.add(tf.keras.layers.MaxPooling2D(pool_size = 2, strides = 2))
cnn.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

cnn.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

cnn.add(tf.keras.layers.Flatten())

cnn.add(tf.keras.layers.Dense(512, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.5))  # Adding dropout for regularization

cnn.add(tf.keras.layers.Dense(256, activation='relu'))
cnn.add(tf.keras.layers.Dropout(0.5))  # Adding dropout for regularization

# Output layer with softmax activation for multiclass classification
cnn.add(tf.keras.layers.Dense(1, activation='sigmoid'))


cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [68]:
def load_image(filepath, target_size=(224, 224)):
    img = Image.open(filepath)
    img = img.resize(target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    return img_array

## Applying 3 fold cross verification

In [None]:
n_splits =3
skf = StratifiedKFold(n_splits=n_splits , random_state= 42, shuffle=True)
fold_indices = list(skf.split(element_series['filepaths'], element_series['labels']))
for fold , (train_index, val_index)in enumerate(fold_indices):
    print(f"Fold {fold + 1}:")
    train_fold_df = element_series.iloc[train_index]
    val_fold_df = element_series.iloc[val_index]
    # Convert labels_modified column values to strings
    train_fold_df['labels_modified'] = train_fold_df['labels_modified'].astype(str)
    val_fold_df['labels_modified'] = val_fold_df['labels_modified'].astype(str)
    print("Training dataset size:", len(train_fold_df))
    print("Validation dataset size:", len(val_fold_df))

    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Validation data should only be rescaled
    test_datagen = ImageDataGenerator(rescale=1./255)
    
    # Create data generators for train and validation sets
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_fold_df,
        x_col='filepaths',
        y_col='labels_modified',
        target_size=(64, 64),  # Adjust according to your model's input shape
        batch_size=32,
        class_mode='binary'  # or 'categorical' for multiclass classification
    )
    
    validation_generator = test_datagen.flow_from_dataframe(
        dataframe=val_fold_df,
        x_col='filepaths',
        y_col='labels_modified',
        target_size=(64, 64),  # Adjust according to your model's input shape
        batch_size=32,
        class_mode='binary'  # or 'categorical' for multiclass classification
    )
    
    # Train your model using the data generators
    history = cnn.fit(
        train_generator,
        epochs=1,
        validation_data=validation_generator,
        validation_steps=len(validation_generator)
    )
        # Evaluate accuracy
    val_loss, val_acc = cnn.evaluate(validation_generator)
    print(f"Validation Accuracy: {val_acc}")
    
    # Get predicted labels
    val_pred = cnn.predict(validation_generator)
    val_pred_labels = np.argmax(val_pred, axis=1)
    
    # Calculate confusion matrix
    cm = confusion_matrix(validation_generator.classes, val_pred_labels)
    print(f"Confusion Matrix for Fold {fold + 1}:\n{cm}\n")


Fold 1:
Training dataset size: 3600
Validation dataset size: 1800
Found 3600 validated image filenames belonging to 2 classes.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_fold_df['labels_modified'] = train_fold_df['labels_modified'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_fold_df['labels_modified'] = val_fold_df['labels_modified'].astype(str)


Found 1800 validated image filenames belonging to 2 classes.


  return dispatch_target(*args, **kwargs)


