In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import joblib

In [None]:
import random

random.seed(10)

# Carga de datos

In [None]:
train_data = pd.read_csv("data/train_letters.csv")
test_data = pd.read_csv("data/validation_letters.csv")

In [None]:
train_data.head()

# Analisis Exploratorio

In [None]:
print("--------------------Train data--------------------")
print(f"Cantidad de filas : {train_data.shape[0]}")
train_letters = list(train_data.target.unique())
train_letters.sort()
print(f"Frases unicas : {train_letters}")

In [None]:
print("--------------------Validation data--------------------")
print(f"Cantidad de filas : {test_data.shape[0]}")
val_letters = list(test_data.target.unique())
val_letters.sort()
print(f"Frases unicas : {val_letters}")

In [None]:
if (train_letters != val_letters):
    raise ValueError("Error between target and train")

In [None]:
train_data.describe()

# Preprocesamiento del Modelo

Solo se obtienen 20 indices de las coordenadas x y y, ya que son las unicas que han sido altamente entrenadas del modelo mediapipe de Google

In [None]:
def get_needed_cols():
    cols = []

    for i in range(21):
        cols.append(f'x_Right_hand_{i}')
        cols.append(f'y_Right_hand_{i}')
        cols.append(f'x_Left_hand_{i}')
        cols.append(f'y_Left_hand_{i}')
    
    return cols

In [None]:
df = pd.read_csv("data/data_letters.csv")
df_test = df[df['sequence_id'].isin(test_data['sequence_id'])]
df_train = df[df['sequence_id'].isin(train_data['sequence_id'])]

In [None]:
df.head()

In [None]:
print(len(df))
print(len(df_test) == len(test_data))
print(len(df_train) == len(train_data))

# Modelo

In [None]:
# Compute the lengths of the video sequences
video_lengths = df.groupby('sequence_id').size()
max_seq_length = video_lengths.max()
# max_seq_length = 30
# Plot the histogram
plt.hist(video_lengths, bins=30)  # Adjust the number of bins as needed
plt.xlabel('Video Length')
plt.ylabel('Frequency')
plt.title('Histogram of Video Lengths')
plt.show()

In [None]:
video_lengths.max()

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
# Create a one-hot encoder
label_encoder = LabelEncoder()

In [None]:
def padding_videos(df):
    # Create a new DataFrame to store the filled rows
    filled_df = pd.DataFrame()
    target = []


    # Iterate over each group and fill remaining rows with zero
    for _, group in df.groupby('sequence_id'):
        filled_df = filled_df.append(group)
        
    filled_df.reset_index(drop=True, inplace=True)
    filled_df = filled_df.fillna(0)
    return filled_df, target

def padding_labels(target):
    integer_encoded = label_encoder.fit_transform(target)
    integer_encoded = integer_encoded.reshape(-1, 1)

    # Encode the word "Hello"
    onehot_encoder = OneHotEncoder(sparse=False)  # sparse=False to get a numpy array as output
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return onehot_encoded

In [None]:
X_train, target = padding_videos(df_train)

In [None]:
y_train = padding_labels(X_train["target"])

In [None]:

del X_train["sequence_id"] 
del X_train["target"] 

print(len(X_train), len(y_train))

In [None]:
X_test, target = padding_videos(df_test)
y_test = padding_labels(X_test["target"] )
del X_test["sequence_id"] 
del X_test["target"] 

print(len(X_test), len(y_test))

In [None]:
len(y_train) + len(y_test) == len(df["sequence_id"].unique())

In [None]:
len(X_train)

In [None]:
# num_samples_train = int(len(X_train)/max_seq_length)
# num_features_train = len(get_needed_cols())
num_classes_train = len(y_train[1])

# X_train = X_train.values.reshape(num_samples_train, max_seq_length, num_features_train)

In [None]:
# X_train = flat_X(X_train)
# X_test = flat_X(X_test)

In [None]:
# print('Train:', X_train.shape, y_train.shape)
# print('Test:', X_test.shape, y_test.shape)

# Modelo
La entrada son las coordenadas de la mano que están relacionados en totalidad con la letra (target).
Es un tipo de GLM donde se asume que la distribusión es Gauseana normal. 
No necesita de epocas porque aprende cual es la mejor ecuacion para minimizar el error de predicción.

In [None]:
# Initialize Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
most_likely_predictions = np.argmax(y_pred, axis=1)
expected_labels = np.argmax(y_test, axis=1)

In [None]:
most_likely_predictions

In [None]:
expected_labels

In [None]:
predicted_labels = label_encoder.inverse_transform(most_likely_predictions)

In [None]:
expected_labels = label_encoder.inverse_transform(expected_labels)

In [None]:
correct = 0
for i in range(len(predicted_labels)):
    if (predicted_labels[i] == expected_labels[i]):
        correct += 1
        print("Correctamente predicha: ", predicted_labels[i])
    else:
        print(">> Incorrecta: ", predicted_labels[i], "contra", expected_labels[i])

In [None]:
print("Correct >> ", correct)
print("Expected >> ", len(test_data.target.unique()))
print("Ratio >> ", str(correct/len(test_data.target.unique())))

# Reutilizando el modelo

In [None]:
# Save the model to a file
model_filename = 'letters_model.joblib'
joblib.dump(model, model_filename)

In [None]:

# Load the model back from the file
loaded_model = joblib.load(model_filename)

In [None]:
predicted_class = loaded_model.predict([X_test.iloc[0]])
predicted_class = np.argmax(predicted_class, axis=1)
predicted_label = label_encoder.inverse_transform(predicted_class)
"".join(predicted_label)