# Data

In [1]:
%%capture
%pip install pandas
%pip install scikit-learn
%pip install numpy
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load initial dataset
initial_df = pd.read_csv('./datasets/accions.csv')
initial_df.drop(['Usuari', 'Representat'], axis=1, inplace=True)

# Remove sessions with less than n actions
n = 4
session_counts = initial_df.groupby('Sessio').size().reset_index(name='count')
sessions_to_keep = session_counts[session_counts['count'] >= n]
df = initial_df[initial_df['Sessio'].isin(sessions_to_keep['Sessio'])]

# Encode action values
df = df.copy()  # If df is a subset of another dataframe, make an explicit copy first
df['Accio_Tramit'] = df['Accio'] + '_' + df['Tramit']
label_encoder = LabelEncoder()
df['action_id'] = label_encoder.fit_transform(df['Tramit'])
df.drop(['Accio', 'Tramit', 'Accio_Tramit'], axis=1, inplace=True)

# Store sequences in a dictionary in order
df_sorted = df.sort_values(by=['Sessio', 'Data'])
session_sequences = {}
for session_id, group in df_sorted.groupby('Sessio'):
    action_sequence = group['action_id'].tolist()
    session_sequences[session_id] = action_sequence

# Generate sequences + label
num_actions = len(label_encoder.classes_)
def one_hot_encode(action_id, num_classes):
    one_hot_vector = np.zeros(num_classes, dtype=np.int8)
    one_hot_vector[action_id] = 1
    return one_hot_vector
sequence_data = []

for session_id, action_sequence in session_sequences.items():
    for i in range(len(action_sequence) - 3):
        input1 = one_hot_encode(action_sequence[i], num_actions)
        input2 = one_hot_encode(action_sequence[i + 1], num_actions)
        input3 = one_hot_encode(action_sequence[i + 2], num_actions)
        label = one_hot_encode(action_sequence[i + 3], num_actions)
        sequence_data.append((input1, input2, input3, label))

# RNN

## Imports

In [2]:
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Input, LSTM, GRU
from sklearn.model_selection import train_test_split
import numpy as np

2024-11-23 19:40:36.249037: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-23 19:40:36.264262: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-23 19:40:36.281813: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-23 19:40:36.286941: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-23 19:40:36.300213: I tensorflow/core/platform/cpu_feature_guar

## Data

In [7]:
aux = sequence_data[:10]
# Convert sequence_data to numpy arrays
X = []
y = []
for seq in aux:
    X.append(np.stack(seq[:3]))  # input1, input2, input3
    y.append(seq[3])  # label

X = np.array(X)  # Shape: (num_samples, 3, num_actions)
y = np.array(y)  # Shape: (num_samples, num_actions)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Model

### Simple RNN

In [None]:
model = Sequential([
    Input(shape=(3, 50)),
    SimpleRNN(32, activation='tanh', input_shape=(3, 50)),  # 3 timesteps, 1 feature
    Dense(50, activation='linear')  # Cambia softmax según el tipo de tarea
])

# Compilar el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])


### LSTM

In [None]:
model = Sequential([
    LSTM(64, activation='tanh', input_shape=(3, 50), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(50, activation='linear')  # 50 clases en la salida
])

# Compilar el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

### GRU

In [None]:
model = Sequential([
    Input(shape=(3, 50)),
    GRU(64, activation='tanh', input_shape=(3, 50), return_sequences=False),  # GRU en lugar de SimpleRNN
    Dense(32, activation='relu'),
    Dense(50, activation='linear')  # Cambia softmax según el tipo de tarea
])
# Compilar el modelo
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])


## Training

In [10]:
# Entrenar el modelo
history = model.fit(X_train, y_train, epochs=5, batch_size=4, validation_data=(X_test, y_test))


Epoch 1/5



KeyboardInterrupt



## Testing

In [None]:
# Evaluar el modelo en el conjunto de prueba
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")

# Visualizar el rendimiento durante el entrenamiento
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.show()