In [1]:
# Genrate name files
import os

# Path to your dataset folders
train_dir = "dataset/Freshwater Fish Disease Aquaculture in south asia/Train"
val_dir   = "dataset/Freshwater Fish Disease Aquaculture in south asia/Test"

# Output text files
train_file = "trainimages.txt"
val_file   = "validationimages.txt"

def save_image_list(directory, output_file):
    """
    Walks through subfolders in a directory and writes all image filenames to a text file.
    """
    image_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):  # only images
                image_list.append(file)

    # Save to text file
    with open(output_file, 'w') as f:
        for filename in image_list:
            f.write(filename + "\n")

    print(f"Saved {len(image_list)} images to {output_file}")

# Generate train and validation image lists
save_image_list(train_dir, train_file)
save_image_list(val_dir, val_file)


Saved 1747 images to trainimages.txt
Saved 697 images to validationimages.txt


In [10]:
import os

# Set this to your train or test folder path
BASE_DIR = "dataset/Freshwater Fish Disease Aquaculture in south asia/Train"
OUTPUT_FILE = "train_descriptions.txt"

# List of your class names (folder names)
class_names = [
    'Bacterial Red disease',
    'Bacterial diseases - Aeromoniasis',
    'Bacterial gill disease',
    'Fungal diseases Saprolegniasis',
    'Healthy Fish',
    'Parasitic diseases',
    'Viral diseases White tail disease'
]

# Simple template captions per image
caption_templates = [
    "A fish showing symptoms of {}",
    "This fish is affected by {}",
    "{} symptoms are visible on the fish",
    "Signs of {} are present on this fish",
    "The fish has indications of {}"
]

with open(OUTPUT_FILE, "w") as f_out:
    for class_name in class_names:
        class_path = os.path.join(BASE_DIR, class_name)
        if not os.path.exists(class_path):
            print(f"Folder not found: {class_path}")
            continue
        for img_file in os.listdir(class_path):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                for idx, template in enumerate(caption_templates):
                    caption = template.format(class_name)
                    f_out.write(f"{img_file}#{idx} {caption}\n")

print(f"Descriptions saved to {OUTPUT_FILE}")


Descriptions saved to train_descriptions.txt


In [3]:
import os

# Set this to your train or test folder path
BASE_DIR = "dataset/Freshwater Fish Disease Aquaculture in south asia/Test"
OUTPUT_FILE = "test_descriptions.txt"

# List of your class names (folder names)
class_names = [
    'Bacterial Red disease',
    'Bacterial diseases - Aeromoniasis',
    'Bacterial gill disease',
    'Fungal diseases Saprolegniasis',
    'Healthy Fish',
    'Parasitic diseases',
    'Viral diseases White tail disease'
]

# Simple template captions per image
caption_templates = [
    "A fish showing symptoms of {}",
    "This fish is affected by {}",
    "{} symptoms are visible on the fish",
    "Signs of {} are present on this fish",
    "The fish has indications of {}"
]

with open(OUTPUT_FILE, "w") as f_out:
    for class_name in class_names:
        class_path = os.path.join(BASE_DIR, class_name)
        if not os.path.exists(class_path):
            print(f"Folder not found: {class_path}")
            continue
        for img_file in os.listdir(class_path):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                for idx, template in enumerate(caption_templates):
                    caption = template.format(class_name)
                    f_out.write(f"{img_file}#{idx} {caption}\n")

print(f"Descriptions saved to {OUTPUT_FILE}")


Descriptions saved to test_descriptions.txt


In [4]:
import os
import numpy as np
import pickle
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm

# Paths to dataset
train_dir = "dataset/Freshwater Fish Disease Aquaculture in south asia/Train"
val_dir   = "dataset/Freshwater Fish Disease Aquaculture in south asia/Test"

# Output pickle files
train_features_file = "features_train.pkl"
val_features_file   = "features_val.pkl"

# Load DenseNet201 without top
conv_base = DenseNet201(weights='imagenet', include_top=False, pooling='avg')  # pooling='avg' to get 1920-d vector

def extract_features(directory):
    features = {}
    # Walk through all subfolders and images
    for root, dirs, files in os.walk(directory):
        for file in tqdm(files, desc=f"Processing {directory}"):
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                filepath = os.path.join(root, file)
                # Load image
                image = load_img(filepath, target_size=(224, 224))
                image = img_to_array(image)
                image = np.expand_dims(image, axis=0)
                image = preprocess_input(image)
                # Extract features
                feature = conv_base.predict(image, verbose=0)
                # Save with filename (without extension) as key
                key = os.path.splitext(file)[0]
                features[key] = feature
    return features

# Extract train and validation features
train_features = extract_features(train_dir)
val_features   = extract_features(val_dir)

# Save features to pickle
with open(train_features_file, 'wb') as f:
    pickle.dump(train_features, f)
print(f"Saved train features to {train_features_file}")

with open(val_features_file, 'wb') as f:
    pickle.dump(val_features, f)
print(f"Saved validation features to {val_features_file}")







Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 0it [00:00, ?it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [00:56<00:00,  4.41it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [00:48<00:00,  5.14it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [00:49<00:00,  5.08it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [01:00<00:00,  4.15it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [01:13<00:00,  3.41it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [01:12<00:00,  3.46it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south asia/Train: 100%|████| 250/250 [01:09<00:00,  3.62it/s]
Processing dataset/Freshwater Fish Disease Aquaculture in south a

Saved train features to features_train.pkl
Saved validation features to features_val.pkl





In [11]:
# =====================================
#  Fish Image Captioning - DenseNet201 + LSTM
# =====================================
import os
import numpy as np
from pickle import load, dump
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add
from tensorflow.keras.callbacks import ModelCheckpoint

# ----------------------------
# 1. Utility Functions
# ----------------------------

def load_doc(filename):
    """Load text file into memory."""
    with open(filename, 'r') as file:
        text = file.read()
    return text

def load_set(filename):
    """Load image names (without extension) from a file."""
    doc = load_doc(filename)
    dataset = [line.split('.')[0] for line in doc.split('\n') if len(line) > 0]
    return set(dataset)

def load_clean_descriptions(filename, dataset):
    """Load clean descriptions for given dataset image IDs."""
    doc = load_doc(filename)
    descriptions = {}
    for line in doc.split('\n'):
        tokens = line.split('#')
        if len(tokens) < 2:
            continue
        image_id_no = tokens[1][0]
        image_id, image_desc = tokens[0].split(".")[0], tokens[1][1:].strip()
        if image_id in dataset:
            if image_id not in descriptions:
                descriptions[image_id] = []
            desc = 'startseq ' + image_desc + ' endseq'
            descriptions[image_id].append(desc)
    return descriptions

def load_photo_features(filename, dataset):
    """Load pre-extracted DenseNet201 features (1920-D) from pickle file."""
    all_features = load(open(filename, 'rb'))
    features = {k: all_features[k] for k in dataset if k in all_features}
    return features

def to_lines(descriptions):
    """Flatten a dictionary of descriptions to a list."""
    all_desc = []
    for key in descriptions.keys():
        all_desc.extend(descriptions[key])
    return all_desc

def create_tokenizer(descriptions):
    """Create a tokenizer from the training descriptions."""
    lines = to_lines(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

def max_length(descriptions):
    """Calculate the maximum caption length."""
    lines = to_lines(descriptions)
    return max(len(d.split()) for d in lines)

def create_sequences(tokenizer, max_length, descriptions, photos, vocab_size):
    """Create input-output sequences for the model."""
    X1, X2, y = [], [], []
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            seq = tokenizer.texts_to_sequences([desc])[0]
            for i in range(1, len(seq)):
                in_seq, out_seq = seq[:i], seq[i]
                in_seq = pad_sequences([in_seq], maxlen=max_length, padding="post")[0]
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                X1.append(photos[key][0])  # DenseNet201 feature
                X2.append(in_seq)
                y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)

# ----------------------------
# 2. Model Definition
# ----------------------------

def define_model(vocab_size, max_length):
    """Define the DenseNet201 + LSTM captioning model."""
    # Feature extractor branch
    inputs1 = Input(shape=(1920,))  # <-- DenseNet201 output dimension
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    # Sequence processor branch
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    # Decoder (merge)
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    print(model.summary())
    plot_model(model, to_file='fish_caption_model.png', show_shapes=True)
    return model

# ----------------------------
# 3. Load Datasets
# ----------------------------

train_images_file = 'trainimages.txt'
val_images_file   = 'validationimages.txt'
train_descriptions_file = 'train_descriptions.txt'
val_descriptions_file   = 'test_descriptions.txt'
train_features_file = 'features_train.pkl'
val_features_file   = 'features_val.pkl'

# Training data
train_set = load_set(train_images_file)
train_descriptions = load_clean_descriptions(train_descriptions_file, train_set)
train_features = load_photo_features(train_features_file, train_set)

# Validation data
val_set = load_set(val_images_file)
val_descriptions = load_clean_descriptions(val_descriptions_file, val_set)
val_features = load_photo_features(val_features_file, val_set)

print(f"Training images: {len(train_set)}")
print(f"Validation images: {len(val_set)}")




Training images: 1639
Validation images: 589


In [12]:
# ----------------------------
# 4. Tokenizer and Data Prep
# ----------------------------

tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
max_len = max_length(train_descriptions)

print(f"Vocab size: {vocab_size}")
print(f"Max caption length: {max_len}")

# Save tokenizer for inference later
with open('tokenizer.pkl', 'wb') as f:
    dump(tokenizer, f)

# Create train and validation sequences
X1train, X2train, ytrain = create_sequences(tokenizer, max_len, train_descriptions, train_features, vocab_size)
X1val, X2val, yval = create_sequences(tokenizer, max_len, val_descriptions, val_features, vocab_size)


Vocab size: 33
Max caption length: 14


In [8]:
print(len(train_set))
print(list(train_set)[:10])


1639
['Bacterial diseases - Aeromoniasis (209)', 'Healthy Fish (103)', 'Parasitic diseases (229)', 'Bacterial Red disease (134)', 'Parasitic diseases (115)', 'Parasitic diseases (67)', 'Healthy Fish (12)', 'Healthy Fish (15)', 'Bacterial diseases - Aeromoniasis (172)', 'Healthy Fish (223)']


In [14]:
# ----------------------------
# 5. Define and Train Model
# ----------------------------

model = define_model(vocab_size, max_len)

# Save the best model (lowest val_loss) during training in .hdf5 format
checkpoint = ModelCheckpoint(
    'best_fish_caption_model.hdf5',  # <-- note the .hdf5 extension
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min'
)

# Train the model
history = model.fit(
    [X1train, X2train], ytrain,
    epochs=30,
    verbose=2,
    batch_size=64,
    validation_data=([X1val, X2val], yval),
    callbacks=[checkpoint]
)

# After training, load the best weights before final saving
model.load_weights('best_fish_caption_model.hdf5')

# Save final model again (same best weights, but as a full Model object)
model.save('final_fish_caption_model.hdf5')

print("✅ Training complete. Best model saved as 'best_fish_caption_model.hdf5' and final model as 'final_fish_caption_model.hdf5'.")


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 14)]                 0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 1920)]               0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, 14, 256)              8448      ['input_5[0][0]']             
                                                                                                  
 dropout_2 (Dropout)         (None, 1920)                 0         ['input_4[0][0]']             
                                                                                            

In [9]:
print(len(train_descriptions))
print(list(train_descriptions.items())[:3])


0
[]
