In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder
import zipfile
import xml.etree.ElementTree as ET
import tensorflow as tf
from sklearn.metrics import classification_report

In [3]:
class EpochProgressCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch+1}/{self.params['epochs']} - loss: {logs['loss']:.4f} - accuracy: {logs['accuracy']:.4f} - val_loss: {logs['val_loss']:.4f} - val_accuracy: {logs['val_accuracy']:.4f}")


In [4]:
def parse_xml_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_paths = []
    disease_labels = []

    for object_tag in root.findall('object'):
        name = object_tag.find('name').text
        bndbox = object_tag.find('bndbox')

        image_paths.append(os.path.join(train_image_folder, root.find('filename').text))
        disease_labels.append(name)

    return image_paths, disease_labels


In [5]:
zip_file_path = "/content/drive/MyDrive/Colab Notebooks/fish.zip"
train_folder = 'train'
test_folder = 'test'


In [6]:
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall()

train_image_folder = train_folder
test_image_folder = test_folder


In [7]:
def load_data(image_folder):
    images = []
    labels = []

    for filename in os.listdir(image_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_file_path = os.path.join(image_folder, filename)
            xml_file_name = os.path.splitext(filename)[0] + ".xml"
            xml_file_path = os.path.join(image_folder, xml_file_name)

            try:
                image = cv2.imread(img_file_path)
                if image is None:
                    print(f"Failed to load image: {img_file_path}")
                    continue

                image = cv2.resize(image, (224, 224))
                image = image / 255.0
                images.append(image)
                image_paths, disease_labels = parse_xml_annotation(xml_file_path)
                labels.append(disease_labels[0])
            except Exception as e:
                print(f"Error while processing image: {img_file_path}")
                print(e)

    return np.array(images), np.array(labels)


In [8]:
X_train, y_train = load_data(train_image_folder)
X_test, y_test = load_data(test_image_folder)


In [9]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [10]:
num_classes = len(np.unique(y_train_encoded))
y_train = tf.keras.utils.to_categorical(y_train_encoded, num_classes)
y_test = tf.keras.utils.to_categorical(y_test_encoded, num_classes)


In [11]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [12]:
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)


In [13]:
class_weights = {class_index: 1.0 / np.sum(y_train_encoded == class_index) for class_index in range(num_classes)}


In [14]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [15]:
for layer in base_model.layers:
    layer.trainable = False


In [16]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)


In [17]:
model = Model(inputs=base_model.input, outputs=predictions)


In [18]:
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [19]:
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=2, min_lr=1e-7)


In [20]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)


In [21]:
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    steps_per_epoch=len(X_train) / 32,
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=[EpochProgressCallback(), lr_scheduler, early_stopping_callback],
    class_weight=class_weights
)


Epoch 1/50
Epoch 1/50 - loss: 7.3464 - accuracy: 0.3368 - val_loss: 8.5987 - val_accuracy: 0.4583
Epoch 2/50
Epoch 2/50 - loss: 7.2414 - accuracy: 0.3368 - val_loss: 8.5280 - val_accuracy: 0.3750
Epoch 3/50
Epoch 3/50 - loss: 7.1256 - accuracy: 0.3579 - val_loss: 8.4240 - val_accuracy: 0.1250
Epoch 4/50
Epoch 4/50 - loss: 7.0262 - accuracy: 0.3684 - val_loss: 8.2971 - val_accuracy: 0.0833
Epoch 5/50
Epoch 5/50 - loss: 6.9234 - accuracy: 0.3684 - val_loss: 8.1509 - val_accuracy: 0.1667
Epoch 6/50
Epoch 6/50 - loss: 6.8239 - accuracy: 0.4105 - val_loss: 7.9920 - val_accuracy: 0.2500
Epoch 7/50
Epoch 7/50 - loss: 6.7231 - accuracy: 0.4526 - val_loss: 7.8316 - val_accuracy: 0.3750
Epoch 8/50
Epoch 8/50 - loss: 6.6230 - accuracy: 0.4526 - val_loss: 7.6682 - val_accuracy: 0.4583
Epoch 9/50
Epoch 9/50 - loss: 6.5225 - accuracy: 0.5053 - val_loss: 7.5209 - val_accuracy: 0.5417
Epoch 10/50
Epoch 10/50 - loss: 6.4285 - accuracy: 0.5158 - val_loss: 7.3876 - val_accuracy: 0.5833
Epoch 11/50
Epoch 

In [22]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f} - Test Accuracy: {accuracy:.4f}")


Test Loss: 4.1239 - Test Accuracy: 0.8750


In [23]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)




In [24]:

# Convert class indices back to disease labels
predicted_diseases = label_encoder.inverse_transform(y_pred_classes)
actual_diseases = label_encoder.inverse_transform(y_test_classes)


In [25]:
print(classification_report(actual_diseases, predicted_diseases))

              precision    recall  f1-score   support

        Carp       1.00      1.00      1.00         2
       Catla       1.00      0.50      0.67         2
      Kendai       1.00      1.00      1.00         2
      Silver       0.67      1.00      0.80         2

    accuracy                           0.88         8
   macro avg       0.92      0.88      0.87         8
weighted avg       0.92      0.88      0.87         8

