In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import chardet
import re
import warnings
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Embedding, Concatenate, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

# Modelo devolucion

In [2]:
df_devolucion = pd.read_csv('data_output/full_sequence.csv', low_memory=False)

In [3]:
df_devolucion.head()

Unnamed: 0,1stIn,2ndIn,1st_final,2nd_final,Pt,Set1,Set2,Gm1,Gm2,TbSet,...,Ret,isAce,isUnret,isRallyWinner,isForced,isUnforced,isDouble,PtWinner,isSvrWinner,rallyCount
0,1,,6 f2n#,,1,0,0,0.0,0.0,1,...,2,False,False,False,True,False,False,1,1,1
1,0,1.0,6 b19 f1 b2 s1 f3 f2 j2*,6 b19 f1 b2 s1 f3 f2 j2*,2,0,0,0.0,0.0,1,...,2,False,False,True,False,False,False,2,0,8
2,0,1.0,4 b28 f2 o1*,4 b28 f2 o1*,3,0,0,0.0,0.0,1,...,2,False,False,True,False,False,False,2,0,4
3,1,,6 s28 f3*,,4,0,0,0.0,0.0,1,...,2,False,False,True,False,False,False,1,1,3
4,1,,4 b37 b3*,,5,0,0,0.0,0.0,1,...,2,False,False,True,False,False,False,1,1,3


In [4]:
df_devolucion = df_devolucion[df_devolucion['1st_final'].apply(lambda x: len(x.split())) >= 2]
df_devolucion = df_devolucion.sample(frac=0.5, random_state=42).reset_index(drop=True)

# Preprocesamiento de los datos

In [5]:
context_data_devolucion = df_devolucion[['Pt', 'Set1', 'Set2', 'Gm1', 'Gm2']]

scaler = MinMaxScaler()
context_data_normalized = scaler.fit_transform(context_data_devolucion)

In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df_devolucion['1st_final'])
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

In [7]:
sequences_tokenized = tokenizer.texts_to_sequences(df_devolucion['1st_final'])

In [8]:
# Preparar X (golpe actual) e y (siguiente golpe) a partir de secuencias
X = []
y = []
context_features = []

for seq in sequences_tokenized:
    for i in range(len(seq) - 1):
        X.append(seq[i])   # Golpe actual
        y.append(seq[i + 1])  # Siguiente golpe
        context_features.append(context_data_normalized[i])

X = np.array(X).reshape(-1, 1)  # Redimensionar X para que sea una matriz de una columna
y = np.array(y)
X_context = np.array(context_features)

print(f"Verificación")
print(f"Entrada X (golpe actual): {X[0][0]}")
print(f"Salida y (siguiente golpe): {y[0]}")

sequences_tokenized[0]

Verificación
Entrada X (golpe actual): 4
Salida y (siguiente golpe): 14


[4, 14, 10, 1, 1, 7, 48]

# Modelo

In [9]:
# Definir la entrada para el golpe actual
input_golpe = Input(shape=(1,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=32)(input_golpe)
lstm_layer = LSTM(32)(embedding_layer)

# Definir la entrada para las características contextuales
input_context = Input(shape=(X_context.shape[1],))

# Concatenar las dos entradas (golpe actual + contexto)
concat_layer = Concatenate()([lstm_layer, input_context])

# Añadir una capa densa para la predicción del siguiente golpe
output = Dense(vocab_size, activation='softmax')(concat_layer)

# Definir el modelo con las dos entradas
model = Model(inputs=[input_golpe, input_context], outputs=output)

# Compilar el modelo
model.compile(optimizer='RMSprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [10]:
from sklearn.model_selection import train_test_split

# Dividir los datos en conjuntos de entrenamiento y testeo (80%-20%)
X_train_golpe, X_test_golpe, X_train_context, X_test_context, y_train, y_test = train_test_split(
    X, X_context, y, test_size=0.2, random_state=42
)

# Entrenar el modelo con las dos entradas
model.fit(
    [X_train_golpe, X_train_context],
    y_train,
    epochs=10,
    batch_size=128,
    validation_data=([X_test_golpe, X_test_context], y_test)
)

Epoch 1/10




[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - accuracy: 0.1676 - loss: 3.6242 - val_accuracy: 0.2109 - val_loss: 3.1618
Epoch 2/10
[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.2118 - loss: 3.1590 - val_accuracy: 0.2131 - val_loss: 3.1481
Epoch 3/10
[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.2127 - loss: 3.1523 - val_accuracy: 0.2138 - val_loss: 3.1451
Epoch 4/10
[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.2129 - loss: 3.1491 - val_accuracy: 0.2138 - val_loss: 3.1444
Epoch 5/10
[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - accuracy: 0.2130 - loss: 3.1484 - val_accuracy: 0.2141 - val_loss: 3.1439
Epoch 6/10
[1m8830/8830[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - accuracy: 0.2129 - loss: 3.1450 - val_accuracy: 0.2141 - val_loss: 3.1427
Epoch 7/10
[1m8830/8

<keras.src.callbacks.history.History at 0x1d3ab58b1f0>

In [11]:
# Guardar el modelo
#model.save('model_one_shot_devolucion.keras')

In [12]:
# Ejemplo de predicción con el golpe 'f2' y algunas características contextuales
input_golpe_test = 'f3'
input_golpe_sequence = tokenizer.texts_to_sequences([input_golpe_test])
input_context_test = context_data_normalized[0]  # Usamos la primera fila de las características normalizadas como ejemplo

# Realizar la predicción
predicted_probabilities = model.predict([np.array(input_golpe_sequence), np.array([input_context_test])])

# Obtener los tres índices con las mayores probabilidades
top_3_indices = predicted_probabilities[0].argsort()[-3:][::-1]  # Ordenar y obtener los 3 índices más altos

# Convertir los índices a los golpes correspondientes
top_3_golpes = [tokenizer.index_word[idx] for idx in top_3_indices]

# Mostrar resultados
print(f"Golpe actual: {input_golpe_test}")
print(f"Los 3 golpes más probables que siguen: {top_3_golpes}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
Golpe actual: f3
Los 3 golpes más probables que siguen: ['b3', 'b2', 's3']
