# Importing Libraries

In [None]:
import os
import numpy as np
import pandas as pd
#visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8-dark')
#image processing libraries
import glob as gb
from PIL import Image
from tensorflow.keras.preprocessing.image import  ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.preprocessing import image
#Models builidng essentials libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential,Model, load_model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Conv2D, Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping,ModelCheckpoint
#spliting the dataset into train, validation and training library
from sklearn.model_selection import train_test_split
#to check the time for execution
import time

In [None]:

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/MyDrive/Specie_finder_5C.zip"

# Setting the basic stuff

In [None]:
data_dir = 'Specie_finder_5C'
# classes = ['char', 'perch', 'tilapia', 'trout', 'pikeperch']
classes = os.listdir(data_dir)
img_height, img_width = 224, 224
batch_size = 16
num_epochs = 100

In [None]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from concurrent.futures import ThreadPoolExecutor

def load_and_preprocess_image(img_path):
    img = Image.open(img_path).convert('RGB')
    img = img.resize((img_height, img_width))
    img = np.array(img) / 255.0
    return img

def load_images_from_folder(folder):
    images = []
    labels = []

    with ThreadPoolExecutor() as executor:
        for class_idx, class_name in enumerate(classes):
            class_folder = os.path.join(folder, class_name)
            for filename in os.listdir(class_folder):
                img_path = os.path.join(class_folder, filename)
                img = executor.submit(load_and_preprocess_image, img_path)
                images.append(img)
                labels.append(class_idx)

    images = [img.result() for img in images]  # Retrieve results from threads

    return np.array(images), np.array(labels)

# Load and preprocess the images
images, labels = load_images_from_folder(data_dir)

# Convert labels to one-hot encoded vectors
num_classes = len(classes)
labels_one_hot = tf.keras.utils.to_categorical(labels, num_classes)


# Loading the Dataset

In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for class_idx, class_name in enumerate(classes):
        class_folder = os.path.join(folder, class_name)
        for filename in os.listdir(class_folder):
            img_path = os.path.join(class_folder, filename)
            img = Image.open(img_path).convert('RGB')
            img = img.resize((img_height, img_width))
            img = np.array(img) / 255.0
            images.append(img)
            labels.append(class_idx)

    return np.array(images), np.array(labels)

# Load and preprocess the images
images, labels = load_images_from_folder(data_dir)

# Convert labels to one-hot encoded vectors
num_classes = len(classes)
labels_one_hot = tf.keras.utils.to_categorical(labels, num_classes)


In [None]:
print(images.shape)

# Exploratory Data Analysis on the Fish Image Data

**Ploting the History Plot for trained model**

In [None]:
def plot_history(history):
  fig, axs = plt.subplots(2)

  #create accuracy subplot
  axs[0].plot(history.history["accuracy"], label = 'train accuracy')
  axs[0].plot(history.history["val_accuracy"], label= 'test accuracy')
  axs[0].set_ylabel("Accuracy")
  axs[0].legend(loc='lower right')
  axs[0].set_title("Accuracy eval")


  #create loss subplot
  axs[1].plot(history.history["loss"], label = 'train error')
  axs[1].plot(history.history["val_loss"], label = 'test error')
  axs[1].set_ylabel("Error")
  axs[1].set_xlabel("Epochs")
  axs[1].legend(loc='upper right')
  axs[1].set_title("Error eval")

  plt.show()

**To check the Number of images in each class**

In [None]:
all_path=[] # To include the full path of each image
for img_path in os.listdir(data_dir):
    if img_path in ['Segmentation_example_script.m','README.txt','license.txt'] :
        continue
    # print(img_path)

    all_data=gb.glob(pathname=data_dir+'/'+img_path+'/*.*')
    print(' found {} in {} '.format(len(all_data),img_path))
    all_path.extend(all_data)

In [None]:
print(len(all_path))

**Creating the Dataframe from images**

In [None]:
images_df = pd.DataFrame({'FilePath': all_path})
images_df['Label'] = images_df['FilePath'].apply(lambda x: x.split('/')[-2])
pd.options.display.max_colwidth = 200


images_df = images_df.sample(frac=1).reset_index(drop=True)
images_df.head(5)

**Check the Distribution of data for each class**

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
sns.countplot(data=images_df,x='Label')
plt.xticks(rotation = 60)
plt.subplot(1,2,2)
plt.pie(x=images_df['Label'].value_counts().values,labels=images_df['Label'].value_counts().index,autopct='%1.1f%%')
plt.suptitle('Distribution of each class in data',size=20)
plt.show()

**Displaying the some pictures from the image dataset**

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=5, figsize=(15,7), subplot_kw={'xticks':[], 'yticks':[]})
for i, ax in enumerate(axes.flat):
  ax.imshow(plt.imread(images_df.FilePath[i]))
  ax.set_title(images_df.Label[i])

plt.tight_layout()
plt.show()

# Splitting the Image Dataset

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(images, labels_one_hot, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.33, random_state=42)

#**1st Experiment Setting the Base of VGG16 apply on the loaded images**

In [None]:
vgg_pretrained_model = VGG16(
    input_shape=(img_height, img_width, 3),
    include_top = False,
    weights = 'imagenet',
    pooling='avg'
)

vgg_pretrained_model.trainable = False

# Early Stopping, Reduces the Regularization term, and saving the best model weights

In [None]:
early_stopping =EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

reduce_lr=ReduceLROnPlateau(monitor='val_loss',patience=2,verbose=0,factor=0.1)

model_check_point=ModelCheckpoint(monitor='val_accuracy',filepath='/content/drive/MyDrive/kanwal_work/vgg16_fish/bestmodel.h5',save_best_only=True,verbose=True)

In [None]:
inputs = vgg_pretrained_model.input

x = Dense(128,activation='relu')(vgg_pretrained_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = BatchNormalization()(x)
x = Dense(64, activation='relu')(x)

outputs = Dense(len(classes), activation='sigmoid')(x)

model = Model(inputs = inputs, outputs = outputs)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    X_train,
    y_train,
    epochs=num_epochs,
    batch_size=batch_size,
    callbacks=[early_stopping, reduce_lr, model_check_point],
    validation_split=0.1
)

**Plot the History of the VGG image Model**

In [None]:
plot_history(history)

In [None]:
vgg_load_model = load_model('/content/drive/MyDrive/kanwal_work/vgg16_fish/bestmodel.h5')

In [None]:
loss, acc = vgg_load_model.evaluate(X_test,y_test)
print(f'Testing Accuracy : {acc*100:.2f}')
loss_val, acc_val = vgg_load_model.evaluate(X_val, y_val)
print(f'validation Accuracy : {acc_val*100:.2f}')
loss_tr, acc_tr = vgg_load_model.evaluate(X_train, y_train)
print(f'Training Accuracy : {acc_tr*100:.2f}')


In [None]:
print(y_test)

# **2nd Experiment Training VGG16 using the another approach DataFrame**

**Spliting the Dataset into Training and Testing**

In [None]:
training_df,testing_df=train_test_split(images_df,test_size=0.1,shuffle=True,random_state=1)

print('The dimension of training data :',training_df.shape)
print('The dimension of testing data :',testing_df.shape)

**Spliting the dataset into testing and training**

In [None]:
training_generator=ImageDataGenerator(
                                     tf.keras.applications.vgg16.preprocess_input,
                                       validation_split = 0.1

                                      )

testing_generator=ImageDataGenerator(
                                    tf.keras.applications.vgg16.preprocess_input
                                    )

**Split the Dataset into Training, validation and Testing**

In [None]:
training_images=training_generator.flow_from_dataframe(

    dataframe=training_df,
    x_col='FilePath',
    y_col='Label',
    class_mode='categorical',
    target_size=(224, 224),
    color_mode='rgb',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training',



)
validation_images=training_generator.flow_from_dataframe(


    dataframe=training_df,
    x_col='FilePath',
    y_col='Label',
    class_mode='categorical',
    target_size=(224,224),
    color_mode='rgb',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'

)
testing_images=testing_generator.flow_from_dataframe(


    dataframe=testing_df,
    x_col='FilePath',
    y_col='Label',
    class_mode='categorical',
    target_size=(224,224),
    color_mode='rgb',
    batch_size=32,
    shuffle=False,
)


In [None]:
vgg_pretrained_model = VGG16(
    input_shape=(img_height, img_width, 3),
    include_top = False,
    weights = 'imagenet',
    pooling='avg'
)

vgg_pretrained_model.trainable = False

In [None]:
from tensorflow.keras.callbacks import Callback

class CustomEarlyStopping(Callback):
    def __init__(self, target_accuracy):
        super(CustomEarlyStopping, self).__init__()
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        if logs['accuracy'] >= self.target_accuracy:
            print(f"\nTraining accuracy reached {self.target_accuracy*100}%.\nTraining stopped.")
            self.model.stop_training = True

custom_early_stopping = CustomEarlyStopping(target_accuracy=0.92)

In [None]:
from tensorflow.keras.optimizers import Adam

inputs = vgg_pretrained_model.input

x = Dense(128,activation='relu')(vgg_pretrained_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = BatchNormalization()(x)
x = Dense(64, activation='relu')(x)

optimizer = Adam(learning_rate=0.0001)

outputs = Dense(len(classes), activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    training_images,
    validation_data=validation_images,
    epochs=100,
    batch_size=batch_size,
    callbacks=[custom_early_stopping]
)

In [None]:
# Save the model
model.save("/content/drive/MyDrive/kanwal_work/fish_specie_5C_v2.h5")

In [None]:
plot_history(history)

In [None]:
print(plt.style.available)


**Loading the VGG16 Model (DataFram)**

In [None]:
vgg_dataframe_model = load_model('/content/drive/MyDrive/kanwal_work/fish_specie_5C_v2.h5')

**Evaluation of VGG16 Model**

In [None]:
Eval = vgg_dataframe_model.evaluate(testing_images)
print("Test Accuracy: {:.2f}%".format(Eval[1] * 100))
print("Test Loss: {:.5f}".format(Eval[0]))

**Summary Model of the VGG16 using DataFrame Approach**

In [None]:
model.summary()