In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

In [None]:
import random

random.seed(10)

# Carga de datos

In [26]:
train_data = pd.read_csv("data/train_letters.csv")
test_data = pd.read_csv("data/validation_letters.csv")

In [27]:
train_data.head()

Unnamed: 0,sequence_id,target
0,1,o
1,2,e
2,3,y
3,4,h
4,5,k


# Analisis Exploratorio

In [28]:
print("--------------------Train data--------------------")
print(f"Cantidad de filas : {train_data.shape[0]}")
train_letters = list(train_data.target.unique())
train_letters.sort()
print(f"Frases unicas : {train_letters}")

--------------------Train data--------------------
Cantidad de filas : 183
Frases unicas : ['a', 'b', 'c', 'e', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ñ']


In [29]:
print("--------------------Validation data--------------------")
print(f"Cantidad de filas : {test_data.shape[0]}")
val_letters = list(test_data.target.unique())
val_letters.sort()
print(f"Frases unicas : {val_letters}")

--------------------Validation data--------------------
Cantidad de filas : 23
Frases unicas : ['a', 'b', 'c', 'e', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ñ']


In [30]:
if (train_letters != val_letters):
    raise ValueError("Error between target and train")

In [None]:
train_data.describe()

# Preprocesamiento del Modelo

Solo se obtienen 20 indices de las coordenadas x y y, ya que son las unicas que han sido altamente entrenadas del modelo mediapipe de Google

In [None]:
def get_needed_cols():
    cols = []

    for i in range(21):
        # cols.append(f'x_Right_hand_{i}')
        # cols.append(f'y_Right_hand_{i}')
        cols.append(f'x_Left_hand_{i}')
        cols.append(f'y_Left_hand_{i}')
    
    return cols

In [None]:
df = pd.read_csv("data/data_letters.csv")
df_test = df[df['sequence_id'].isin(test_data['sequence_id'])]
df_train = df[df['sequence_id'].isin(train_data['sequence_id'])]

In [None]:
df.head()

In [None]:
print(len(df))
print(len(df_test) == len(test_data))
print(len(df_train) == len(train_data))

# Modelo

In [None]:
# Compute the lengths of the video sequences
video_lengths = df.groupby('sequence_id').size()
max_seq_length = video_lengths.max()
# max_seq_length = 30
# Plot the histogram
plt.hist(video_lengths, bins=30)  # Adjust the number of bins as needed
plt.xlabel('Video Length')
plt.ylabel('Frequency')
plt.title('Histogram of Video Lengths')
plt.show()

In [None]:
video_lengths.max()

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
# Create a one-hot encoder
label_encoder = LabelEncoder()

In [None]:
def padding_videos(df):
    # Create a new DataFrame to store the filled rows
    filled_df = pd.DataFrame()
    target = []


    # Iterate over each group and fill remaining rows with zero
    for _, group in df.groupby('sequence_id'):
        remaining_rows = max_seq_length - len(group)
        zeros_df = pd.DataFrame([[0] * len(group.columns)] * remaining_rows, columns=group.columns)
        zeros_df['sequence_id'] = group['sequence_id'].unique()[0]
        zeros_df['target'] = group['target'].unique()[0]
        group = pd.concat([group, zeros_df])
    
        filled_df = filled_df.append(group)
        target.append(group["target"].unique()[0])
        
    filled_df.reset_index(drop=True, inplace=True)
    filled_df = filled_df.fillna(0)
    return filled_df, target

def padding_labels(target):
    integer_encoded = label_encoder.fit_transform(target)
    integer_encoded = integer_encoded.reshape(-1, 1)

    # Encode the word "Hello"
    onehot_encoder = OneHotEncoder(sparse=False)  # sparse=False to get a numpy array as output
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return onehot_encoded

In [None]:
X_train, target = padding_videos(df_train)
y_train = padding_labels(target)

In [None]:

del X_train["sequence_id"] 
del X_train["target"] 

print(len(X_train), len(y_train))

In [None]:
X_test, target = padding_videos(df_test)
y_test = padding_labels(target)
del X_test["sequence_id"] 
del X_test["target"] 

print(len(X_test), len(y_test))

In [None]:
len(y_train) + len(y_test) == len(df["sequence_id"].unique())

In [None]:
len(X_train)

In [None]:
# num_samples_train = int(len(X_train)/max_seq_length)
# num_features_train = len(get_needed_cols())
num_classes_train = len(y_train[1])

# X_train = X_train.values.reshape(num_samples_train, max_seq_length, num_features_train)

In [None]:
# X_train = flat_X(X_train)
# X_test = flat_X(X_test)

In [None]:
print('Train:', X_train.shape, y_train.shape)
print('Test:', X_test.shape, y_test.shape)

# Modelo
La entrada son las coordenadas de las manos. Cada video cuenta con n cantidad de filas, 84 columnas (21 columnas por cada coordenada y por ambas manos).
La salida es la frase. La frase se representa por un entero que da el one hot encoder.

Se usa convoluciones para resaltar las caracteristicas en la entrada. Debido a que la entrada son coordenadas normalizadas de un video, se supone que funciona igual que si la entrada fuera una imagen. Estas redes extraen caracteristicas de forma automatica para clasificar objetos luego. Al buscar patrones, se espera que pueda predecir un video que ya ha sido entrenado previamente.

Se reduce el tamaño de la entrada haciendo uso de max pooling y flatten.

Se hace uso de Dense para conectar entradas con salidas.

Se hace uso de Dropout para evitar el sobreajuste.

Relu elimina negativos. 
Sigmoid nos ayuda a obtener la probabilidad de que un ejemplo pertenezca a la clase positiva.
Softmax hace clasificacion multiclase (en nuestro caso las palabras a predecir).

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [None]:
# model = Sequential()
# model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(max_seq_length, num_features_train)))
# model.add(LSTM(128, return_sequences=True, activation='relu'))
# model.add(LSTM(64, return_sequences=False, activation='relu'))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(32, activation='relu'))
# model.add(Dense(num_classes_train, activation='softmax'))

In [None]:
# # Add input layer with appropriate input shape (84 features)
# model = Sequential()
# model.add(Dense(128, activation='relu', input_shape=(len(get_needed_cols()),)))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(32, activation='relu'))

# # Output layer (adjust units according to your task)
# model.add(Dense(num_classes_train, activation='softmax'))  # For classification, or 'linear' for regression


In [None]:
# Add input layer with appropriate input shape (84 features)
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(len(get_needed_cols()),)))
model.add(Dropout(0.2))  # Adding dropout for regularization
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))  # Adding dropout for regularization
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))  # Adding dropout for regularization

# Output layer (adjust units according to your task)
model.add(Dense(num_classes_train, activation='softmax'))  # For classification, or 'linear' for regression


In [None]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=300, batch_size=8)

# Evaluate the model
score = model.evaluate(X_test, y_test)

# Print the accuracy
print('Test accuracy:', score[1])

In [None]:
# Get the predictions from the model
predictions = model.predict(X_test)

# Find the most likely prediction for each sample
most_likely_predictions = np.argmax(predictions, axis=1)

In [None]:
most_likely_predictions

In [None]:
predicted_labels = label_encoder.inverse_transform(most_likely_predictions)

In [None]:
expected_labels = train_data.target.unique()

In [None]:
correct = 0
for i in range(len(predicted_labels)):
    if (predicted_labels[i] == expected_labels[i]):
        correct += 1
        print("Correctamente predicha: ", predicted_labels[i])

In [None]:
print("Correct >> ", correct)
print("Ratio >> ", str(correct/len(test_data.target.unique())))