In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np

  # Cargar el dataset
data = pd.read_csv('FIS\dataset.csv')

  # Mostrar las primeras filas del dataset
print(data.head())

          P         D          L          R P_fuzzy_set  P_membership  \
0 -0.344708  0.267949  50.949905  24.525778       lejos      0.861770   
1 -0.512704 -0.725829  90.972135 -53.639732       lejos      0.812159   
2  0.388981 -0.031825 -49.601000  63.894966       cerca      0.972454   
3 -0.918004 -0.325898  92.482375 -50.164266   muy lejos      0.863339   
4  0.046981 -0.488745  51.090865  -2.041475          ok      0.882547   

   D_fuzzy_set  D_membership                     L_fuzzy_set  L_membership  \
0  acercandose      1.000000      medio rápido hacia delante      0.905009   
1   alejandose      1.000000        muy rápido hacia delante      1.000000   
2   sin cambio      0.840877  más o menos rápido hacia atrás      0.759975   
3   alejandose      1.000000        muy rápido hacia delante      1.000000   
4   alejandose      1.000000      medio rápido hacia delante      0.890913   

                      R_fuzzy_set  R_membership  \
0         despacito hacia delante      0.

In [3]:
X = data[['P','D','L','R',]]
y = data['CAT']
z = data[['P_fuzzy_set', 'D_fuzzy_set', 'L_fuzzy_set', 'R_fuzzy_set']].apply(lambda x: ' '.join(x), axis=1)

In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()
tokenizer.fit_on_texts(y)
num_decoder_tokens = len(tokenizer.word_index) + 1
y_tokenized = tokenizer.texts_to_sequences(y)

In [6]:
from sklearn.model_selection import train_test_split

# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y_tokenized, test_size=0.2, random_state=42)

# Dividir el conjunto de entrenamiento en conjunto de entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

# Obtener el índice de las filas de X_train, X_val y X_test
idx_train = X_train.index
idx_val = X_val.index
idx_test = X_test.index

In [7]:
# Dividir z en z_train, z_val y z_test
z_train = z.loc[idx_train].values
z_val = z.loc[idx_val].values
z_test = z.loc[idx_test].values

In [8]:
z_train = tokenizer.texts_to_sequences(z_train)
z_val = tokenizer.texts_to_sequences(z_val)
z_test = tokenizer.texts_to_sequences(z_test)

In [9]:
X_train = X_train[['P','D','L','R']].values
X_val = X_val[['P','D','L','R']].values
X_test = X_test[['P','D','L','R']].values

In [10]:
from sklearn.preprocessing import StandardScaler

# Ajusta las columnas numéricas con StandardScaler
scaler_values = StandardScaler()

scaler_values.fit(X_train)

In [11]:
X_train_scaled = scaler_values.transform(X_train)
X_val_scaled =scaler_values.transform(X_val)
X_test_scaled =scaler_values.transform(X_test)

In [12]:
max_len = max(len(s) for s in y_tokenized)

In [13]:
from tensorflow.keras.preprocessing.sequence import pad_sequences


y_train = pad_sequences(y_train, maxlen=num_decoder_tokens, padding='post', truncating='post')
y_val = pad_sequences(y_val, maxlen=num_decoder_tokens, padding='post', truncating='post')
y_test = pad_sequences(y_test, maxlen=num_decoder_tokens, padding='post', truncating='post')
z_train = pad_sequences(z_train, maxlen=num_decoder_tokens, padding='post', truncating='post')
z_val = pad_sequences(z_val, maxlen=num_decoder_tokens, padding='post', truncating='post')
z_test = pad_sequences(z_test, maxlen=num_decoder_tokens, padding='post', truncating='post')

In [14]:
print('tamaño X train_scaled', X_train_scaled.shape)
print('tamaño y train', y_train.shape)
print('tamaño z train', z_train.shape)


print('tamaño X val_scaled', X_val_scaled.shape)
print('tamaño y val', y_val.shape)
print('tamaño z val_scaled', z_val.shape)

print('tamaño X test_scaled', X_test_scaled.shape)
print('tamaño y test', y_test.shape)
print('tamaño z test', z_test.shape)


tamaño X train_scaled (5999, 4)
tamaño y train (5999, 40)
tamaño z train (5999, 40)
tamaño X val_scaled (2000, 4)
tamaño y val (2000, 40)
tamaño z val_scaled (2000, 40)
tamaño X test_scaled (2000, 4)
tamaño y test (2000, 40)
tamaño z test (2000, 40)


In [15]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding

# Definir dimension de entrada para el encoder
encoder_inputs = Input(shape=(1, 4))

# Capa LSTM en el encoder
encoder_lstm = LSTM(64, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_inputs)

# Se descartan las salidas del encoder y solo se toman los estados
encoder_states = [state_h, state_c]

In [16]:
# Definir dimension de entrada para el decoder
decoder_inputs = Input(shape=(None,))

# Capa de embedding en el decoder
decoder_embedding = Embedding(num_decoder_tokens, output_dim=100)
decoder_inputs_embedded = decoder_embedding(decoder_inputs)

# Capa LSTM en el decoder, con los estados del encoder como inicialización
decoder_lstm = LSTM(64, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs_embedded, initial_state=encoder_states)

# Capa densa con activación softmax en el output
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Definir modelo encoder-decoder
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

In [26]:
# Entrenar el modelo
model.fit([X_train_scaled.reshape(5999,1,4), z_train], 
    y_train.reshape(y_train.shape[0], y_train.shape[1], 1),
     batch_size=128, 
     epochs=35, 
     validation_data=([X_val_scaled.reshape(X_val.shape[0],1,4), z_val], y_val.reshape(y_val.shape[0], y_val.shape[1], 1)))

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


<keras.callbacks.History at 0x1ec0907ed30>

In [27]:
model.evaluate([X_test.reshape(X_test.shape[0],1,4),z_test])



0.0

In [28]:
def predictions(preds):
    preds_words = []
    for i in range(preds.shape[0]):
        preds_row = preds[i]
        preds_row_words = []
        for idx in preds_row:
            if idx in tokenizer.index_word:
                preds_row_words.append(tokenizer.index_word[idx])
        preds_words.append(preds_row_words)

    return preds_words

In [35]:
inPrueba = np.array([[0.431366405,0.75319591,-86.86335949,83.72671899]])
inPrueba = scaler_values.transform(inPrueba).reshape(1,1,4)

In [36]:
z_prueba = ['muy lejos alejandose muy rápido hacia delante más o menos rápido hacia atrás']
z_prueba = tokenizer.texts_to_sequences(z_prueba)
z_prueba = pad_sequences(z_prueba, maxlen=num_decoder_tokens, padding='post', truncating='post')

In [37]:
inPrueba

array([[[ 0.65919191,  1.4731019 , -1.97016084,  2.00444061]]])

In [38]:
z_prueba

array([[15, 27, 15, 23,  4, 14, 24, 38, 39, 23,  4,  9,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0]])

In [39]:
preds = model.predict([inPrueba, np.array(z_prueba)])
preds = np.argmax(preds, axis=-1)



In [40]:
frasesnlg = predictions(preds)
print(frasesnlg)

[['cuando', 'estés', 'muy', 'cerca', 'a', 'la', 'pared', 'y', 'te', 'estás', 'acercando', 'aun', 'a', 'ella', 'gira', 'tu', 'rueda', 'izquierda', 'izquierda', 'despacito', 'hacia', 'atrás', 'y', 'gira', 'tu', 'rueda', 'derecha', 'despacito', 'hacia', 'atrás']]
