In [10]:
import pandas as pd
import numpy as np

In [11]:
# Load the CSV file and skip the first three rows of metadata to directly inspect the data columns
data_numeric = pd.read_csv('Collecteddata_first_video.csv', skiprows=3)

In [12]:
# Display the actual column names from the data
actual_data_columns = data_numeric.columns.tolist()

actual_data_columns

['coords',
 'Unnamed: 1',
 'Unnamed: 2',
 'x',
 'y',
 'x.1',
 'y.1',
 'x.2',
 'y.2',
 'x.3',
 'y.3',
 'x.4',
 'y.4',
 'x.5',
 'y.5',
 'x.6',
 'y.6',
 'x.7',
 'y.7',
 'x.8',
 'y.8',
 'x.9',
 'y.9',
 'x.10',
 'y.10',
 'x.11',
 'y.11',
 'x.12',
 'y.12',
 'x.13',
 'y.13',
 'x.14',
 'y.14',
 'x.15',
 'y.15',
 'x.16',
 'y.16',
 'x.17',
 'y.17',
 'x.18',
 'y.18',
 'x.19',
 'y.19',
 'x.20',
 'y.20',
 'x.21',
 'y.21',
 'x.22',
 'y.22',
 'x.23',
 'y.23',
 'x.24',
 'y.24',
 'x.25',
 'y.25',
 'x.26',
 'y.26',
 'x.27',
 'y.27',
 'x.28',
 'y.28',
 'x.29',
 'y.29',
 'x.30',
 'y.30',
 'x.31',
 'y.31',
 'x.32',
 'y.32',
 'x.33',
 'y.33',
 'x.34',
 'y.34',
 'x.35',
 'y.35',
 'x.36',
 'y.36',
 'x.37',
 'y.37',
 'x.38',
 'y.38',
 'x.39',
 'y.39',
 'x.40',
 'y.40',
 'x.41',
 'y.41',
 'x.42',
 'y.42',
 'x.43',
 'y.43',
 'x.44',
 'y.44',
 'x.45',
 'y.45',
 'x.46',
 'y.46',
 'x.47',
 'y.47',
 'x.48',
 'y.48',
 'x.49',
 'y.49',
 'x.50',
 'y.50',
 'x.51',
 'y.51',
 'x.52',
 'y.52',
 'x.53',
 'y.53',
 'x.54',
 '

In [34]:
body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

In [38]:
import numpy as np
import pandas as pd

def calculate_features(data_numeric, target_length=141, body_parts=None, num_individuals=8):
    if body_parts is None:
        body_parts = [
            'mouth', 'eye', 'skull', 'upper tail bone', 'lower tail bone',
            'upper tail', 'lower tail', 'pectoral fin', 'anal fin start',
            'anal fin mid', 'dorsal fin_base', 'dorsal fin_tip', 'stomach', 'middle'
        ]

    def process_column(column, target_length):
        result_array = np.zeros(target_length)
        non_nan_indices = np.where(~column.isna())[0]
        if len(non_nan_indices) > 1:
            valid_values = column[non_nan_indices]
            differences = np.diff(valid_values)
            for i, diff in enumerate(differences):
                result_array[non_nan_indices[i + 1]] = diff
        return result_array

    features_per_individual_and_bodypart = {}
    
    for individual in range(1, num_individuals + 1):
        column_offset = (individual - 1) * len(body_parts)

        for i, body_part in enumerate(body_parts):
            x_col_name = f'x.{column_offset + i}' if column_offset + i > 0 else 'x'
            y_col_name = f'y.{column_offset + i}' if column_offset + i > 0 else 'y'

            if x_col_name in data_numeric.columns and y_col_name in data_numeric.columns:
                delta_x = process_column(data_numeric[x_col_name], target_length)
                delta_y = process_column(data_numeric[y_col_name], target_length)

                if len(delta_x) > 0 and len(delta_y) > 0:
                    speed = np.insert(np.sqrt(delta_x**2 + delta_y**2), 0, 0)
                    direction = np.insert(np.arctan2(delta_y, delta_x), 0, 0)
                    direction_degrees = np.degrees(direction)
                    features_per_individual_and_bodypart[f'individual{individual}_{body_part}'] = pd.DataFrame({
                        'Speed': speed,
                        'Direction': direction_degrees
                    })

    return features_per_individual_and_bodypart


import pandas as pd

def calculate_average_features(features_per_individual_and_bodypart):
    # Nuevo diccionario para almacenar los DataFrames combinados por individuo de manera adecuada
    average_features_per_individual = {}

    # Iterar sobre los elementos del diccionario y combinar los DataFrames correctamente
    for key, df in features_per_individual_and_bodypart.items():
        # Extraer el número del individuo de la clave
        individual_number = key.split('_')[0]

        # Asegurarse de que df es un DataFrame y no una Serie
        if isinstance(df, pd.Series):
            df = df.to_frame()

        # Agregar el DataFrame actual al DataFrame combinado del individuo correspondiente
        if individual_number in average_features_per_individual:
            # Concatenar horizontalmente para mantener las columnas de Speed y Direction separadas
            average_features_per_individual[individual_number] = pd.concat([
                average_features_per_individual[individual_number],
                df
            ], axis=1)
        else:
            average_features_per_individual[individual_number] = df

    # Calcular la media de 'Speed' y 'Direction' para cada individuo y almacenar en un nuevo DataFrame
    for individual, combined_df in average_features_per_individual.items():
        # Calcular la media de todas las columnas de Speed y Direction, respectivamente
        speed_columns = [col for col in combined_df.columns if 'Speed' in col]
        direction_columns = [col for col in combined_df.columns if 'Direction' in col]
        average_speed = combined_df[speed_columns].mean(axis=1)
        average_direction = combined_df[direction_columns].mean(axis=1)

        # Crear un nuevo DataFrame con las medias calculadas
        average_features_per_individual[individual] = pd.DataFrame({
            'Speed': average_speed,
            'Direction': average_direction
        })

    return average_features_per_individual



In [37]:
first_video_data = calculate_features(data_numeric, target_length=141, body_parts=body_parts, num_individuals=8)

print(first_video_data.keys())
first_video_data['individual1_mouth']

dict_keys(['individual1_mouth', 'individual1_eye', 'individual1_skull', 'individual1_upper tail bone', 'individual1_lower tail bone', 'individual1_upper tail', 'individual1_lower tail', 'individual1_pectoral fin', 'individual1_anal fin start', 'individual1_anal fin mid', 'individual1_dorsal fin_base', 'individual1_dorsal fin_tip', 'individual1_stomach', 'individual1_middle', 'individual2_mouth', 'individual2_eye', 'individual2_skull', 'individual2_upper tail bone', 'individual2_lower tail bone', 'individual2_upper tail', 'individual2_lower tail', 'individual2_pectoral fin', 'individual2_anal fin start', 'individual2_anal fin mid', 'individual2_dorsal fin_base', 'individual2_dorsal fin_tip', 'individual2_stomach', 'individual2_middle', 'individual3_mouth', 'individual3_eye', 'individual3_skull', 'individual3_upper tail bone', 'individual3_lower tail bone', 'individual3_upper tail', 'individual3_lower tail', 'individual3_pectoral fin', 'individual3_anal fin start', 'individual3_anal fin 

Unnamed: 0,Speed,Direction
0,0.000000,0.000000
1,0.000000,0.000000
2,12.466371,28.220251
3,1.969532,98.327717
4,5.953209,-9.393528
...,...,...
137,28.674713,-24.281933
138,57.889672,2.629704
139,0.000000,0.000000
140,0.000000,0.000000


In [40]:
first_video_data_individual = calculate_average_features(first_video_data)

print(first_video_data_individual.keys())
print(first_video_data_individual['individual1'])

dict_keys(['individual1', 'individual2', 'individual3', 'individual4', 'individual5', 'individual6', 'individual7', 'individual8'])
         Speed  Direction
0     0.000000   0.000000
1     0.000000   0.000000
2    11.978222 -37.339047
3    11.844991  33.781048
4     5.986275  -6.425180
..         ...        ...
137  24.128041 -19.081253
138  35.426386  -3.869269
139  28.052819  -7.363651
140  25.670810 -12.221839
141  23.649012 -10.605975

[142 rows x 2 columns]


In [14]:
# Showing the first 10 column names to understand the naming pattern
actual_data_columnsfeatures_per_individual_and_bodypart = pd.DataFrame()

def process_column(column, target_length=141):
    # Initialize a result array with zeros of the target size
    result_array = np.zeros(target_length)
    
    # Find indices of non-NaN values
    non_nan_indices = np.where(~column.isna())[0]
    
    # Only calculate differences for non-NaN values, without adding a 0 at the beginning
    if len(non_nan_indices) > 1:  # Needs at least two values to compute a difference
        valid_values = column[non_nan_indices]
        differences = np.diff(valid_values)
        
        # Insert the calculated differences into the result array
        # Note: Here we assume we want the difference at the index of the second value in each pair of consecutive non-NaN values
        for i, diff in enumerate(differences):
            # Place the difference at the index of the second value of each pair
            result_array[non_nan_indices[i + 1]] = diff

    return result_array

# Initialize a dictionary to store the calculated features for each individual's body parts
features_per_individual_and_bodypart = {}
features_per_bodypart = {}
body_parts = [
    'mouth',
    'eye',
    'skull',
    'upper tail bone',
    'lower tail bone',
    'upper tail',
    'lower tail',
    'pectoral fin',
    'anal fin start',
    'anal fin mid',
    'dorsal fin_base',
    'dorsal fin_tip',
    'stomach',
    'middle'
]
# Assuming you have 4 individuals and 14 body parts per individual
num_individuals = 8
num_body_parts = 14

for individual in range(1, num_individuals + 1):
    # Offset to get to the correct set of columns for the current individual
    column_offset = (individual - 1) * num_body_parts

    for i, body_part in enumerate(body_parts):
        # Adjusted column names for the current individual
        x_col_name = f'x.{column_offset + i}' if column_offset + i > 0 else 'x'
        y_col_name = f'y.{column_offset + i}' if column_offset + i > 0 else 'y'

        if x_col_name in data_numeric.columns and y_col_name in data_numeric.columns:
            # Process each column separately
            delta_x = process_column(data_numeric[x_col_name])
            #print(delta_x)
            delta_y = process_column(data_numeric[y_col_name])
            #print(delta_y)

            # Calculate speed and direction if possible
            if len(delta_x) > 0 and len(delta_y) >= 0:
                speed = np.insert(np.sqrt(delta_x**2 + delta_y**2), 0, 0)
                direction = np.insert(np.arctan2(delta_y, delta_x), 0, 0)
                direction_degrees = np.degrees(direction)
                # Store the results
                features_per_individual_and_bodypart[f'individual{individual}_{body_part}'] = pd.DataFrame({
                    'Speed': speed,
                    'Direction': direction_degrees
                })

In [15]:
# Display features for a specific body part of a specific individual
# print(features_per_individual_and_bodypart['individual2_dorsal fin_base'])
print(features_per_individual_and_bodypart.keys())
features_per_individual_and_bodypart['individual1_mouth']

dict_keys(['individual1_mouth', 'individual1_eye', 'individual1_skull', 'individual1_upper tail bone', 'individual1_lower tail bone', 'individual1_upper tail', 'individual1_lower tail', 'individual1_pectoral fin', 'individual1_anal fin start', 'individual1_anal fin mid', 'individual1_dorsal fin_base', 'individual1_dorsal fin_tip', 'individual1_stomach', 'individual1_middle', 'individual2_mouth', 'individual2_eye', 'individual2_skull', 'individual2_upper tail bone', 'individual2_lower tail bone', 'individual2_upper tail', 'individual2_lower tail', 'individual2_pectoral fin', 'individual2_anal fin start', 'individual2_anal fin mid', 'individual2_dorsal fin_base', 'individual2_dorsal fin_tip', 'individual2_stomach', 'individual2_middle', 'individual3_mouth', 'individual3_eye', 'individual3_skull', 'individual3_upper tail bone', 'individual3_lower tail bone', 'individual3_upper tail', 'individual3_lower tail', 'individual3_pectoral fin', 'individual3_anal fin start', 'individual3_anal fin 

Unnamed: 0,Speed,Direction
0,0.000000,0.000000
1,0.000000,0.000000
2,12.466371,28.220251
3,1.969532,98.327717
4,5.953209,-9.393528
...,...,...
137,28.674713,-24.281933
138,57.889672,2.629704
139,0.000000,0.000000
140,0.000000,0.000000


In [19]:
# Asumiendo que 'features_per_individual_and_bodypart' es tu diccionario original con los datos

# Nuevo diccionario para almacenar los DataFrames combinados por individuo de manera adecuada
average_features_per_individual = {}

# Iterar sobre los elementos del diccionario y combinar los DataFrames correctamente
for key, df in features_per_individual_and_bodypart.items():
    # Extraer el número del individuo de la clave
    individual_number = key.split('_')[0]
    
    # Asegurarse de que df es un DataFrame y no una Serie
    if isinstance(df, pd.Series):
        df = df.to_frame()
    
    # Agregar el DataFrame actual al DataFrame combinado del individuo correspondiente
    if individual_number in average_features_per_individual:
        # Concatenar horizontalmente para mantener las columnas de Speed y Direction separadas
        average_features_per_individual[individual_number] = pd.concat([
            average_features_per_individual[individual_number],
            df
        ], axis=1)
    else:
        average_features_per_individual[individual_number] = df

# Calcular la media de 'Speed' y 'Direction' para cada individuo y almacenar en un nuevo DataFrame
for individual, combined_df in average_features_per_individual.items():
    # Calcular la media de todas las columnas de Speed y Direction, respectivamente
    # Esto asume que todas las columnas alternas son 'Speed' y las siguientes son 'Direction'
    speed_columns = [col for col in combined_df.columns if 'Speed' in col]
    direction_columns = [col for col in combined_df.columns if 'Direction' in col]
    average_speed = combined_df[speed_columns].mean(axis=1)
    average_direction = combined_df[direction_columns].mean(axis=1)
    
    # Crear un nuevo DataFrame con las medias calculadas
    average_features_per_individual[individual] = pd.DataFrame({
        'Speed': average_speed,
        'Direction': average_direction
    })

# Mostrar el DataFrame resultante para 'individual1' como ejemplo
average_features_per_individual.keys()


dict_keys(['individual1', 'individual2', 'individual3', 'individual4', 'individual5', 'individual6', 'individual7', 'individual8'])

In [26]:
import pandas as pd

# Assuming 'features_per_individual_and_bodypart' is your dictionary
number_of_individuals = 8
number_of_species = 3
labels = [1, 1, 2, 2, 3, 2, 3, 3]

In [33]:
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import StratifiedKFold

# Asumiendo que 'average_features_per_individual' es tu diccionario con los datos
# y 'labels' es una lista de tus etiquetas

# Convertir los DataFrames a una lista de matrices numpy
data = [df.values for df in average_features_per_individual.values()]

# Aplicar padding a los datos para asegurar que todas las secuencias tienen la misma longitud
data_padded = pad_sequences(data, padding='post', dtype='float32')

# Convertir las etiquetas a formato categórico
labels_categorical = to_categorical(labels)
labels = np.array(labels)  # Asegurarse de que 'labels' es un array de numpy para StratifiedKFold

# Definir el modelo dentro de una función para facilitar la reutilización
def create_model(input_shape, num_classes):
    model = Sequential([
        LSTM(50, activation='relu', input_shape=input_shape),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Preparar la validación cruzada estratificada
n_splits = 3
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Iterar sobre los folds
for train_index, test_index in skf.split(data_padded, labels):
    X_train, X_val = data_padded[train_index], data_padded[test_index]
    y_train, y_val = labels_categorical[train_index], labels_categorical[test_index]

    # Crear un nuevo modelo para este fold
    model = create_model((data_padded.shape[1], data_padded.shape[2]), len(labels_categorical[0]))

    # Entrenar el modelo
    history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))






Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# TESTING

In [None]:
import pandas as pd

# Assuming 'features_per_individual_and_bodypart' is your dictionary
number_of_individuals = 8
number_of_species = 3
labels = [1, 1, 2, 2, 3, 2, 3, 3]

# This list will hold the data for all individuals
individual_sequences = []

# Loop over each individual
for i in range(1, number_of_individuals + 1):  # Adjust the range according to your number of individuals
    individual_key_prefix = f'individual{i}_'
    # Filter keys for the current individual
    individual_keys = [key for key in features_per_individual_and_bodypart.keys() if key.startswith(individual_key_prefix)]
    
    # Combine the features for all body parts of the individual into a single DataFrame
    individual_data_frames = [features_per_individual_and_bodypart[key] for key in individual_keys]
    individual_combined = pd.concat(individual_data_frames, axis=1)
    
    # You may want to flatten the DataFrame here so that each time step is a single vector
    individual_flattened = individual_combined.values.reshape(individual_combined.shape[0], -1)
    
    individual_sequences.append(individual_flattened)

# At this point, 'individual_sequences' is a list of 2D numpy arrays where each array represents an individual
print(len(individual_sequences))
print(individual_sequences[0].shape)

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Assuming 'data' is your preprocessed data with all individuals' sequences

# Scale features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(individual_sequences)

# Prepare the input X and the output y
X = np.array([scaled_data[individual] for individual in range(number_of_individuals)])
y = labels

# Define the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=number_of_species, activation='softmax'))  # number of species is the number of prediction classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Fit the model
model.fit(X, y, epochs=100, batch_size=32)

8
(142, 28)


ValueError: Found array with dim 3. MinMaxScaler expected <= 2.

In [None]:
# 1. Formatting the Data

# Example conversion to 3D array
# Assuming `features_per_individual_and_bodypart` contains your data
num_samples = len(features_per_individual_and_bodypart)
num_time_steps = 142  # Based on your setup
num_features = 2  # Speed and direction

# Initialize an empty array to hold the reshaped data
data_3d = np.zeros((num_samples, num_time_steps, num_features))
#print(data_3d.shape)

# Fill in your data
for i, key in enumerate(features_per_individual_and_bodypart.keys()):
    data_3d[i, :, 0] = features_per_individual_and_bodypart[key]['Speed'].values  # Speed
    print(data_3d.shape)
    print(data_3d[i, :, 0])
    data_3d[i, :, 1] = features_per_individual_and_bodypart[key]['Direction'].values  # Direction

# 2. Normalizing the Data

from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Initialize the scaler
scaler = StandardScaler()

# Reshape data for normalization: from 3D to 2D
# The shape of data_3d is (num_samples, num_time_steps, num_features)
# We want to normalize across all samples for each feature, so we reshape to (-1, num_features)
data_reshaped = data_3d.reshape(-1, num_features)

# Normalize the reshaped data
data_normalized = scaler.fit_transform(data_reshaped)

# Reshape back to 3D after normalization
data_3d_normalized = data_normalized.reshape(num_samples, num_time_steps, num_features)

data_3d_normalized.shape

# Check for NaNs across the entire 3D array
nan_in_data_3d = np.isnan(data_3d).any()
# Check for NaNs in the normalized 3D data
nan_in_data_3d_normalized = np.isnan(data_3d_normalized).any()

nan_in_data_3d, nan_in_data_3d_normalized

(112, 142, 2)
[  0.           0.          12.46637109   1.96953227   5.95320862
   6.65992188  16.53796113  34.07972025  20.59696114  14.98219924
  15.33986395   0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.         158.63263236   5.61555995
   6.33852084   7.5143035    2.39334753  14.12284737  10.76061727
  10.97615891  10.42895554  16.19784232  27.98980304  49.90770169
  49.02656475  35.76375458  21.38187952  12.45211566  27.57546262
   7.55182817  12.71404741  16.37687063   9.31772471  12.01379737
  11.19906521   9.92341158  14.75557971  11.12520543  11.50446872
  11.18594403  10.62680623   7.26985399   8.12099392

(False, False)

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np

# Create species assignments according to your specification
# First 14 samples are 'species1', next 14 samples are 'species2', and so on
species_labels = ['species1'] * 14 + ['species1'] * 14 + ['species2'] * 14 + ['species2'] * 14 + ['species3'] * 14 + ['species2'] * 14 + ['species3'] * 14 + ['species3'] * 14 

# Label Encoding
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(species_labels)

# One-Hot Encoding
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)  # Reshape for OneHotEncoder
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

# 'onehot_encoded' is your labels array ready for training, matching the 56 samples
labels = onehot_encoded
print(labels)
print(labels.shape)


[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0.



In [None]:
from sklearn.model_selection import train_test_split

# Ensure `data_3d_normalized` is your input features in 3D shape ready for LSTM
# `labels` is your one-hot encoded labels matching the samples in `data_3d_normalized`

# Correctly using `data_3d_normalized` for splitting
X_train, X_test, y_train, y_test = train_test_split(data_3d_normalized, labels, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking

model = Sequential([
    LSTM(50, activation='tanh', recurrent_activation='sigmoid', input_shape=(num_time_steps, num_features), use_bias=True),
    Dense(4, activation='softmax')  # Assuming 4 classes for classification
])
model.add(Masking(mask_value=0., input_shape=(141, 2)))  # Assuming each body part has 2 features: speed and direction
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Before training, check for NaNs in your data
assert not np.isnan(X_train).any(), "NaNs in X_train"
assert not np.isnan(y_train).any(), "NaNs in y_train"

# Adjust the learning rate if necessary
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Use gradient clipping in the optimizer
optimizer = Adam(learning_rate=0.001, clipvalue=0.5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Fit the model with adjusted batch size if necessary
model.fit(X_train, y_train, epochs=200, validation_split=0.2)


Epoch 1/200


ValueError: in user code:

    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\engine\training.py:788 train_step
        loss = self.compiled_loss(
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\engine\compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\losses.py:1665 categorical_crossentropy
        return backend.categorical_crossentropy(
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\keras\backend.py:4839 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    c:\Users\JAIME\anaconda3\envs\gpu0\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 3) and (None, 4) are incompatible


In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)

# Print the results
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Test Loss: 0.03762766718864441
Test Accuracy: 0.95652174949646
