In [1]:
import pandas as pd
import numpy as np

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.layers import LSTM, Dense, Masking

from sklearn.model_selection import KFold

In [2]:
body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

In [3]:
import numpy as np
import pandas as pd

def prepare_individuals(data_numeric, target_length=141, body_parts=None, num_individuals=8):
    if body_parts is None:
        body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

    def process_column(column, target_length):
        result_array = np.zeros(target_length)
        non_nan_indices = np.where(~column.isna())[0]
        if len(non_nan_indices) > 1:
            valid_values = column[non_nan_indices]
            differences = np.diff(valid_values)
            for i, diff in enumerate(differences):
                result_array[non_nan_indices[i + 1]] = diff
        return result_array

    individual_features = {}

    for individual in range(1, num_individuals + 1):
        features_list = []

        for idx, body_part in enumerate(body_parts):
            if individual == 1 and idx == 0:
                x_col_name = 'x'
                y_col_name = 'y'
            else:
                x_col_name = f'x.{(individual - 1) * len(body_parts) + idx}'
                y_col_name = f'y.{(individual - 1) * len(body_parts) + idx}'

            if x_col_name in data_numeric.columns and y_col_name in data_numeric.columns:
                delta_x = process_column(data_numeric[x_col_name], target_length)
                delta_y = process_column(data_numeric[y_col_name], target_length)

                if len(delta_x) > 0 and len(delta_y) > 0:
                    speed = np.insert(np.sqrt(delta_x**2 + delta_y**2), 0, 0)
                    direction = np.insert(np.arctan2(delta_y, delta_x), 0, 0)
                    direction_degrees = np.degrees(direction)

                    features_list.append(speed)
                    features_list.append(direction_degrees)

        if features_list:
            individual_features[f'individual{individual}'] = pd.DataFrame(features_list).transpose()

    return individual_features

In [4]:
jaime_data_numeric = pd.read_csv('CollectedData_jaime.csv', skiprows=3)
print(jaime_data_numeric.shape)

jaime_data = prepare_individuals(jaime_data_numeric, target_length=141, body_parts=body_parts, num_individuals=8)

if 'individual8' in jaime_data:
    del jaime_data['individual8']

for key in jaime_data.keys():
    print(f"{key}: {len(jaime_data[key].columns)} columns")

(141, 227)
individual1: 28 columns
individual2: 28 columns
individual3: 28 columns
individual4: 28 columns
individual5: 28 columns
individual6: 28 columns
individual7: 28 columns


In [5]:
# Asignar las etiquetas a los individuos
jaime_labels = [0, 0, 2, 2, 1, 2, 1, 1]

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split

# Asignar las etiquetas a los individuos
jaime_labels = [0, 0, 2, 2, 1, 2, 1, 1]

# Asumiendo que jaime_data es un diccionario de DataFrames y jaime_labels ya está definido
all_data = []
all_labels = []

for key, df in jaime_data.items():
    # Convertir el DataFrame a un array 3D (samples, time steps, features)
    individual_data = np.expand_dims(df.values, axis=0)
    all_data.append(individual_data)
    all_labels.append(jaime_labels[len(all_data)-1])  # Asegúrate de que jaime_labels esté en el orden correcto

# Convertir listas a arrays de NumPy
all_data = np.concatenate(all_data, axis=0)
all_labels = to_categorical(all_labels, num_classes=3)  # Convertir etiquetas a categóricas

# Dividir los datos y las etiquetas en conjuntos de entrenamiento y validación (80% - 20%)
train_data, validation_data, train_labels, validation_labels = train_test_split(
    all_data, all_labels, test_size=0.2, random_state=42)

# Continuar con la definición y entrenamiento del modelo como antes


model = Sequential([
    LSTM(50, input_shape=(train_data.shape[1], train_data.shape[2])),  # 50 unidades LSTM
    Dense(3, activation='softmax')  # Capa de salida para 3 clases
])
model.add(Masking(mask_value=0., input_shape=(141, 56)))  # Assuming each body part has 2 features: speed and direction
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_data, train_labels, epochs=10, validation_data=(validation_data, validation_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
katia_data_numeric = pd.read_csv('new_CollectedData_katia.csv', skiprows=3)
print(katia_data_numeric.shape)

katia_data = prepare_individuals(katia_data_numeric, target_length=56, body_parts=body_parts, num_individuals=11)
    
if 'individual11' in katia_data:
    del katia_data['individual11']
    
for key in katia_data.keys():
    print(f"{key}: {len(katia_data[key].columns)} columns")

(56, 311)
individual1: 28 columns
individual2: 28 columns
individual3: 28 columns
individual4: 28 columns
individual5: 28 columns
individual6: 28 columns
individual7: 28 columns
individual8: 28 columns
individual9: 28 columns
individual10: 28 columns


In [8]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Prepare test data with padding
test_data_padded = []

for key, df in katia_data.items():
    # Convert the DataFrame to a 3D array (samples, time steps, features)
    individual_data = np.expand_dims(df.values, axis=0)
    # Pad sequences to match the model's expected input shape (142 time steps)
    individual_data_padded = pad_sequences(individual_data, maxlen=142, dtype='float32', padding='post', truncating='post', value=0.0)
    test_data_padded.append(individual_data_padded)

# Convert list to a NumPy array
test_data_padded = np.concatenate(test_data_padded, axis=0)

# Make predictions with padded test data
predictions = model.predict(test_data_padded)

# Optionally, convert predictions to labels
predicted_labels = np.argmax(predictions, axis=1)
predicted_labels




array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)