In [73]:
import pandas as pd
import numpy as np

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import StratifiedKFold

In [None]:
body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

In [None]:
def calculate_features(data_numeric, target_length=141, body_parts=None, num_individuals=8):
    if body_parts is None:
        body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

    def process_column(column, target_length):
        result_array = np.zeros(target_length)
        non_nan_indices = np.where(~column.isna())[0]
        if len(non_nan_indices) > 1:
            valid_values = column[non_nan_indices]
            differences = np.diff(valid_values)
            for i, diff in enumerate(differences):
                result_array[non_nan_indices[i + 1]] = diff
        return result_array

    features_per_individual_and_bodypart = {}
    
    for individual in range(1, num_individuals + 1):
        column_offset = (individual - 1) * len(body_parts)

        for i, body_part in enumerate(body_parts):
            x_col_name = f'x.{column_offset + i}' if column_offset + i > 0 else 'x'
            y_col_name = f'y.{column_offset + i}' if column_offset + i > 0 else 'y'

            if x_col_name in data_numeric.columns and y_col_name in data_numeric.columns:
                delta_x = process_column(data_numeric[x_col_name], target_length)
                delta_y = process_column(data_numeric[y_col_name], target_length)

                if len(delta_x) > 0 and len(delta_y) > 0:
                    speed = np.insert(np.sqrt(delta_x**2 + delta_y**2), 0, 0)
                    direction = np.insert(np.arctan2(delta_y, delta_x), 0, 0)
                    direction_degrees = np.degrees(direction)
                    features_per_individual_and_bodypart[f'individual{individual}_{body_part}'] = pd.DataFrame({
                        'Speed': speed,
                        'Direction': direction_degrees
                    })

    return features_per_individual_and_bodypart


def calculate_average_features(features_per_individual_and_bodypart):
    average_features_per_individual = {}

    for key, df in features_per_individual_and_bodypart.items():
        individual_number = key.split('_')[0]

        if isinstance(df, pd.Series):
            df = df.to_frame()

        if individual_number in average_features_per_individual:
            average_features_per_individual[individual_number] = pd.concat([
                average_features_per_individual[individual_number],
                df
            ], axis=1)
        else:
            average_features_per_individual[individual_number] = df

    for individual, combined_df in average_features_per_individual.items():
        speed_columns = [col for col in combined_df.columns if 'Speed' in col]
        direction_columns = [col for col in combined_df.columns if 'Direction' in col]
        average_speed = combined_df[speed_columns].mean(axis=1)
        average_direction = combined_df[direction_columns].mean(axis=1)

        average_features_per_individual[individual] = pd.DataFrame({
            'Speed': average_speed,
            'Direction': average_direction
        })

    return average_features_per_individual

In [95]:
first_video_data_numeric = pd.read_csv('Collecteddata_first_video.csv', skiprows=3)
first_video_data = calculate_features(first_video_data_numeric, target_length=141, body_parts=body_parts, num_individuals=8)
first_video_data_individual = calculate_average_features(first_video_data)
data_first_video = [df.values for df in first_video_data_individual.values()]
first_video_data_padded = pad_sequences(data_first_video, padding='post', dtype='float32')

second_video_data_numeric = pd.read_csv('new_Collecteddata_second_video.csv', skiprows=3)
second_video_data = calculate_features(second_video_data_numeric, target_length=56, body_parts=body_parts, num_individuals=11)
second_video_data_individual = calculate_average_features(second_video_data)
data_second_video = [df.values for df in second_video_data_individual.values()]
second_video_data_padded = pad_sequences(data_second_video, padding='post', dtype='float32')

# species 0 black
# species 1 yellow
# species 2 grey
# species 3 - the one that test video has but training video doesnt
firs_video_labels = [0, 0, 2, 2, 1, 2, 1, 1]
firs_video_labels = np.array(firs_video_labels)
firs_video_labels_categorical = to_categorical(firs_video_labels)

second_video_labels = [0, 1, 0, 2, 1, 3, 3, 1, 1, 1, 3]
second_video_labels = np.array(second_video_labels)
second_video_labels_categorical = to_categorical(second_video_labels)

In [None]:
number_of_species = 4
first_video_number_of_individuals = 8
second_video_number_of_individuals = 11

## CHOOSE WHICH ONE USE FOR TRAIN AND TEST

In [None]:
train_data = first_video_data_padded
train_labels = firs_video_labels_categorical
test_data_padded = second_video_data_padded

## MODEL CREATION

In [88]:
def create_model(input_shape, num_classes):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(number_of_species, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

## TRAINING

In [89]:
n_splits = 3
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

for train_index, test_index in skf.split(train_data, train_labels):
    X_train, X_val = data_padded[train_index], data_padded[test_index]
    y_train, y_val = labels_categorical[train_index], labels_categorical[test_index]

    model = create_model((data_padded.shape[1], data_padded.shape[2]), len(labels_categorical[0]))
    history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [90]:
model.save('model_fish_detection.h5')

## TEST

In [94]:
from tensorflow.keras.models import load_model

# Cargar el modelo
model_loaded = load_model('model_fish_detection.h5')

# Hacer predicciones
predictions = model_loaded.predict(test_data_padded)

# Convertir las probabilidades de predicciones a clases concretas
predicted_classes = np.argmax(predictions, axis=1)

# Imprimir la clase predicha
print(predicted_classes)

[2 2 2 2 1 2 2 0 2 0 2]


In [None]:
second_video_labels =   [0, 1, 0, 2, 1, 3, 3, 1, 1, 1, 3]
predicted_test = [2, 2, 2, 2, 1, 2, 2, 0, 2, 0, 2]