In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import necessary libraries
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from scipy.optimize import minimize
from skimage.feature import hog
import os

# 1. Load and Preprocess Data with Masks
def load_fish4knowledge_data(data_dir, mask_dir):
    images, masks = [], []

    labels = species_mapping = ['Dascyllus reticulatus', 'Plectroglyphidodon dickii', 'Chromis chrysura', 'Amphiprion clarkia', 'Chaetodon lunulatus', 'Chaetodon trifascialis', 'Myripristis kuntee', 'Acanthurus nigrofuscus','Hemigymnus fasciatus', 'Neoniphon samara', 'Abudefduf vaigiensis', 'Canthigaster valentine', 'Pomacentrus moluccensis', 'Zebrasoma scopas', 'Hemigymnus melapterus', 'Lutjanus fulvus', 'Scolopsis bilineata', 'Scaridae', 'Pempheris vanicolensis','Zanclus cornutus', 'Neoglyphidodon nigroris', 'Balistapus undulatus', 'Siganus fuscescens']

    fish_label = []
    folder_file_counts = {}
    folder_species = {}

    for root, dirs, files in os.walk(data_dir):
        folder_name = os.path.basename(root) or root
        folder_file_counts[folder_name] = len(files)
    
    i = 1
    for s in species_mapping:
        folder_species[s] = f'fish_{i:02}'
        i = i + 1
    
    for s in species_mapping:
        print(f'species: {s}      files: {folder_file_counts[folder_species[s]]}')
        labels.extend([s] * folder_file_counts[folder_species[s]])
    
    # Collect fish images and corresponding masks
    fish_images = {os.path.splitext(file)[0]: os.path.join(root, file)
                   for root, _, files in os.walk(data_dir) for file in files if file.endswith('.png')}
    fish_masks = {os.path.splitext(file)[0]: os.path.join(root, file)
                  for root, _, files in os.walk(mask_dir) for file in files if file.endswith('.png')}
    
    # Pair images with their corresponding masks
    for key in fish_images:
        if key in fish_masks:
            # Load the fish image
            image = cv2.imread(fish_images[key], cv2.IMREAD_GRAYSCALE)
            if image is not None:
                # Resize for consistency
                image = cv2.resize(image, (200, 200))
                images.append(image)
                
                # Load and process the corresponding mask
                mask = cv2.imread(fish_masks[key], cv2.IMREAD_GRAYSCALE)
                if mask is not None:
                    mask = cv2.resize(mask, (200, 200))
                    masks.append(mask / 255.0)  # Normalize mask to binary
            
            # Use folder name or another identifier for labels if needed
            labels.append(os.path.basename(os.path.dirname(fish_images[key])))

    return np.array(images), np.array(masks), np.array(labels)

# Define data directories
data_dir = '/kaggle/input/fish4knowledge/fish_image'
mask_dir = '/kaggle/input/fish4knowledge/mask_image'

# Load data with images and masks
images, masks, labels = load_fish4knowledge_data(data_dir, mask_dir)
print(labels)

In [None]:
import cv2
import os
from PIL import ImageDraw, Image
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from scipy.ndimage import gaussian_filter, maximum_filter
import matplotlib.pyplot as plt

# image = cv2.imread('/kaggle/input/fish23/fish_000007390001_01020.png')
# blur = cv2.GaussianBlur(image, (5,5), 0)
# blur_hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)

# # create NumPy arrays from the boundaries
# lower = np.array([0,0,0], dtype = "uint8")
# upper = np.array([180,255,40], dtype = "uint8")

# # find the colors within the specified boundaries and apply
# mask = cv2.inRange(blur_hsv, lower, upper)  
# mask = 255 - mask
# output = cv2.bitwise_and(image, image, mask = mask)
# output = Image.fromarray(output)
# mask = Image.fromarray(mask)
# # show the images
# output.save('output.png')
# mask.save("mask.png")

In [None]:
def gaussian_kernel(size: int,
                    mean: float,
                    std: float,
                   ):
    """Makes 2D gaussian Kernel for convolution."""

    d = tfp.distributions.Normal(mean, std)

    vals = d.prob(tf.range(start = -size, limit = size + 1, dtype = tf.float32))

    gauss_kernel = tf.einsum('i,j->ij',
                                  vals,
                                  vals)

    return gauss_kernel / tf.reduce_sum(gauss_kernel)


def getSaliencyMap(image, mask):
    """
    Get SaliencyMap using PFT method and apply a mask to only keep maxima within the mask.
    
    input ： 
        image shape == H*W*3
        mask: binary mask of the fish region shape == H*W
    output ：Saliency Map shape==H*W
    """
    a_gray = tf.cast(tf.image.rgb_to_grayscale(image), tf.complex64)
    gauss_kernel = gaussian_kernel(49, 0., 8.)
    gauss_kernel = gauss_kernel[:, :, tf.newaxis, tf.newaxis]

    a_fft = tf.signal.fft2d(a_gray)
    phase = tf.math.angle(a_fft)
    phase = tf.complex(real=tf.math.cos(phase), imag=tf.math.sin(phase))
    s = tf.signal.ifft2d(phase)
    salientmap = tf.pow(tf.abs(s), 2)
    salientmap = tf.expand_dims(salientmap, axis=0)

    salientmap = gaussian_filter(salientmap, sigma=0.1)
    salientmap = tf.squeeze(salientmap, axis=0)
    
    # Apply maximum filter to get suppressed map
    suppressed_map = maximum_filter(salientmap, size=10)
    suppressed_map = np.where(salientmap == suppressed_map, salientmap, 0)
    
#     maxval = tf.reduce_max(suppressed_map)
#     minval = tf.reduce_min(suppressed_map)
#     scale = 255 / (maxval - minval)
#     suppressed_map = ((suppressed_map - minval) * scale)
#     mean = tf.reduce_mean(suppressed_map)
#     suppressed_map = suppressed_map - mean
#     suppressed_map = (tf.sign(suppressed_map) + 1) / 2
    
#     # Ensure the suppressed_map is in numpy format for the mask operation
#     suppressed_map_np = suppressed_map.numpy()
    mask_resized = np.array(Image.fromarray(mask).resize((image_rgb.shape[1], image_rgb.shape[0])))

    # Normalize the mask if necessary
    mask_resized = mask_resized / 255.0
    # Apply the binary mask to keep only maxima inside the fish region
    final_map = suppressed_map[:,:, 0] * mask_resized
    
    return final_map

def getTopMaxima(saliency_map, top_n=6):
    """
    Function to extract the top N maxima from the saliency map.
    
    input:
        saliency_map: numpy array (H * W), the saliency map after applying the mask
        top_n: Number of top maxima to extract (default is 6)
        
    output:
        top_coords: List of (x, y) coordinates of the top N maxima
        top_values: List of the top N saliency values
    """
    # Flatten the saliency map
    flattened_map = saliency_map.flatten()

    # Get the indices of the top N values
    top_indices = np.argsort(flattened_map)[-top_n:][::-1]  # Sort in descending order
    
    # Convert flat indices back to 2D coordinates
    top_coords = np.unravel_index(top_indices, saliency_map.shape)
    
    # Get the top N values
    top_values = flattened_map[top_indices]
    
    # Return coordinates and values
    return list(zip(top_coords[0], top_coords[1])), top_values


def drawMaximaOnImage(image, top_coords, radius=5, color='red'):
    """
    Draw circles around the top maxima on the image.
    
    input:
        image: The original image (PIL Image object)
        top_coords: List of (x, y) coordinates of the top maxima
        radius: Radius of the circle to draw
        color: Color of the circles
    output:
        image: The image with the maxima highlighted
    """
    # Ensure the image is a PIL Image object
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)

    # Create a drawing context
    draw = ImageDraw.Draw(image)

    # Draw circles at each of the top maxima coordinates
    for (y, x) in top_coords:
        # Draw a circle centered at (x, y)
        draw.ellipse((x - radius, y - radius, x + radius, y + radius), outline=color, width=3)
    
    return image

def load_fish_images_and_masks(base_dir_fish, base_dir_mask):
    image_paths = []
    mask_paths = []
    
    for root, dirs, files in os.walk(base_dir_fish):
        for file in files:
            if file.endswith('.png'):
                image_paths.append(os.path.join(root, file))
                
    for root, dirs, files in os.walk(base_dir_mask):
        for file in files:
            if file.endswith('.png'):
                mask_paths.append(os.path.join(root, file))

    return image_paths, mask_paths


# if __name__ == '__main__':
#     base_dir_fish = '/kaggle/input/fish4knowledge/fish_image'
#     base_dir_mask = '/kaggle/input/fish4knowledge/mask_image'
    
#     image_paths, mask_paths = load_fish_images_and_masks(base_dir_fish, base_dir_mask)
    
#     for image_path, mask_path in zip(image_paths, mask_paths):
#         image = Image.open(image_path)  # Load the image
#         mask = Image.open(mask_path)    # Load the mask

#         image = np.array(image)
#         image_rgb = image[:, :, :3]

#         mask = np.array(mask)           # Convert mask to numpy array
#         mask = mask / 255.0    # Convert mask to binary (assuming mask is grayscale or binary)

#         # Get the saliency map with the mask applied
#         final_saliency_map = getSaliencyMap(image_rgb, mask)

#     #     # Convert to uint8 for saving as an image
#     #     final_saliency_map = (final_saliency_map * 255).astype(np.uint8)

    #     # Save the resulting saliency map with mask applied
    # #     Image.fromarray(final_saliency_map).save('final_saliency2.png')
    #     top_coords, top_values = getTopMaxima(final_saliency_map, top_n=6)

    #     print("Top 6 Maxima Coordinates:", top_coords)
    #     print("Top 6 Maxima Values:", top_values)

    #     image_with_maxima = drawMaximaOnImage(image_rgb, top_coords, radius=5, color='red')
    #     image_with_maxima = np.array(image_with_maxima)
    #     # Save the resulting image with highlighted maxima
    #     Image.fromarray(image_with_maxima).save('highlighted_maxima_{}.png'.format(image_path.split('/')[-1]))

In [None]:
# 2. Saliency-based Part Initialization and Feature Extraction
def compute_saliency_map(image):
    # Compute the phase Fourier transform (PFT) for saliency detection
    img_dft = np.fft.fft2(image)
    magnitude, phase = np.abs(img_dft), np.angle(img_dft)
    saliency_map = np.fft.ifft2(np.exp(1j * phase)).real
    return cv2.GaussianBlur(np.abs(saliency_map), (5, 5), 0)

# def extract_salient_points(image, num_points=6):
#     saliency_map = compute_saliency_map(image)
#     keypoints = cv2.goodFeaturesToTrack(saliency_map, num_points, 0.01, 10)
#     return np.int0(keypoints).reshape(-1, 2)

# def extract_salient_points(image, num_points=6):
    # saliency_map = compute_saliency_map(image)
    
    # # Normalize the saliency map to the range [0, 255] and convert to uint8
    # saliency_map = cv2.normalize(saliency_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    
    # # Detect keypoints based on saliency
    # keypoints = cv2.goodFeaturesToTrack(saliency_map, num_points, 0.01, 10)
    # return np.int0(keypoints).reshape(-1, 2)

# def extract_salient_points(image, num_points=6):
#     saliency_map = compute_saliency_map(image)
    
#     # Normalize the saliency map to the range [0, 255] and convert to uint8
#     saliency_map = cv2.normalize(saliency_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    
#     # Detect keypoints based on saliency
#     keypoints = cv2.goodFeaturesToTrack(saliency_map, num_points, 0.01, 10)
#     return np.intp(keypoints).reshape(-1, 2)  # Use np.intp instead of np.int0

# def extract_salient_points(image, num_points=6):
#     saliency_map = compute_saliency_map(image)
    
#     # Normalize the saliency map to the range [0, 255] and convert to uint8
#     saliency_map = cv2.normalize(saliency_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    
#     # Detect keypoints based on saliency
#     keypoints = cv2.goodFeaturesToTrack(saliency_map, num_points, 0.01, 10)
    
#     # Check if keypoints were found; if not, return an empty array
#     if keypoints is None:
#         print("No keypoints detected, returning an empty array.")
#         return np.array([])  # Return an empty array if no keypoints detected
    
#     # If keypoints are detected, reshape and return them
#     return np.intp(keypoints).reshape(-1, 2)  # Use np.intp instead of np.int0

def extract_salient_points(image, mask=None, num_points=6):
    # First attempt to detect keypoints using cv2.goodFeaturesToTrack
    saliency_map = compute_saliency_map(image)
    saliency_map = cv2.normalize(saliency_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    keypoints = cv2.goodFeaturesToTrack(saliency_map, num_points, 0.005, 5)
    
    if keypoints is None or len(keypoints) < num_points:
        # If no keypoints are detected or fewer than required, use fallback
        print("Using fallback method to get top maxima.")
        final_saliency_map = getSaliencyMap(image, mask)
        fallback_keypoints, _ = getTopMaxima(final_saliency_map, top_n=num_points)
        keypoints = np.array(fallback_keypoints)  # Convert to NumPy array
    
    return np.intp(keypoints).reshape(-1, 2)



# def extract_features(image, keypoints):
#     # Extract SIFT and color histogram features at keypoint locations
#     sift = cv2.SIFT_create()
#     keypoints = [cv2.KeyPoint(x, y, 48) for (x, y) in keypoints]
#     _, descriptors = sift.compute(image, keypoints)
#     hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#     color_hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
#     return descriptors, color_hist.flatten()

# def extract_features(image, keypoints):
#     # Extract SIFT and color histogram features at keypoint locations
#     sift = cv2.SIFT_create()
    
#     # Ensure keypoints are in float format
#     keypoints = [cv2.KeyPoint(float(x), float(y), 48) for (x, y) in keypoints]
#     _, descriptors = sift.compute(image, keypoints)
    
#     # Convert image to HSV and calculate color histogram
#     hsv = cv2.cvtColor(cv2.cvtColor(image, cv2.COLOR_GRAY2BGR), cv2.COLOR_BGR2HSV)
#     color_hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
#     return descriptors, color_hist.flatten()

def extract_features(image, keypoints):
    # If no keypoints were detected, return empty descriptors and color histogram
    if keypoints.size == 0:
        return np.array([]), np.zeros(512)  # Return an empty descriptor array and empty histogram
    
    # Extract SIFT and color histogram features at keypoint locations
    sift = cv2.SIFT_create()
    keypoints = [cv2.KeyPoint(float(x), float(y), 48) for (x, y) in keypoints]
    _, descriptors = sift.compute(image, keypoints)
    
    # Convert image to HSV and calculate color histogram
    hsv = cv2.cvtColor(cv2.cvtColor(image, cv2.COLOR_GRAY2BGR), cv2.COLOR_BGR2HSV)
    color_hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
    
    return descriptors, color_hist


# Extract features for all images
all_descriptors, all_histograms, all_keypoints = [], [], []
for image in images:
    keypoints = extract_salient_points(image)
    descriptors, color_hist = extract_features(image, keypoints)
    all_keypoints.append(keypoints)
    all_descriptors.append(descriptors)
    all_histograms.append(color_hist)

In [None]:
# 3. Non-Rigid Part Model - EM-like Optimization
def initialize_non_rigid_model(descriptors, num_parts=6):
    # PCA for dimensionality reduction
    pca = PCA(n_components=128)
    reduced_descriptors = pca.fit_transform(np.vstack(descriptors))
    return reduced_descriptors, pca

reduced_descriptors, pca_model = initialize_non_rigid_model(all_descriptors)


In [None]:
# 4. Hierarchical Partial Classification - SVM with Hierarchy
def train_hierarchical_svm(features, labels, penalty_param=1.0):
    # Train a hierarchical SVM model with class grouping
    classifier = SVC(kernel='rbf', class_weight='balanced', C=penalty_param, probability=True)
    classifier.fit(features, labels)
    return classifier

# Prepare training data (combine SIFT descriptors and color histograms)
train_features = [np.concatenate((desc.mean(axis=0), hist)) for desc, hist in zip(all_descriptors, all_histograms)]
train_labels = labels
X_train, X_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

# Train the model
svm_model = train_hierarchical_svm(X_train, y_train)

In [None]:
# 5. Testing and Evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    return accuracy, precision, recall

In [None]:
# Evaluate
accuracy, precision, recall = evaluate_model(svm_model, X_test, y_test)
print(f'Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}')

# End-to-end testing is now set up for the Fish4Knowledge dataset
