In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import classification_report

# 1. Cargar los datos
def load_data(train_path, valid_path):
    train_df = pd.read_csv(train_path)
    valid_df = pd.read_csv(valid_path)
    return train_df, valid_df

train_path = "sent_train.csv"
valid_path = "sent_valid.csv"
train_df, valid_df = load_data(train_path, valid_path)

# 2. Preprocesamiento de texto
MAX_NUM_WORDS = 10000  # Máximo número de palabras en el vocabulario
MAX_SEQUENCE_LENGTH = 50  # Longitud máxima de las secuencias
EMBEDDING_DIM = 100  # Dimensión de las incrustaciones de palabras

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(train_df["text"])

X_train = tokenizer.texts_to_sequences(train_df["text"])
X_valid = tokenizer.texts_to_sequences(valid_df["text"])

X_train = pad_sequences(X_train, maxlen=MAX_SEQUENCE_LENGTH, padding="post", truncating="post")
X_valid = pad_sequences(X_valid, maxlen=MAX_SEQUENCE_LENGTH, padding="post", truncating="post")

y_train = to_categorical(train_df["label"], num_classes=3)
y_valid = to_categorical(valid_df["label"], num_classes=3)

# 3. Definir el modelo LSTM
def build_lstm_model():
    model = Sequential([
        Embedding(MAX_NUM_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH),
        LSTM(128, return_sequences=True),
        LSTM(64),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dense(3, activation='softmax')  # 3 clases de salida
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = build_lstm_model()

# 4. Entrenar el modelo
EPOCHS = 10
BATCH_SIZE = 32

history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCHS, batch_size=BATCH_SIZE)

# 5. Evaluar el modelo
y_pred = model.predict(X_valid)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_valid, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=["Bearish", "Bullish", "Neutral"]))


Epoch 1/10




[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 49ms/step - accuracy: 0.6366 - loss: 0.9283 - val_accuracy: 0.6558 - val_loss: 0.8783
Epoch 2/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 48ms/step - accuracy: 0.6378 - loss: 0.9120 - val_accuracy: 0.6558 - val_loss: 0.8814
Epoch 3/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 51ms/step - accuracy: 0.6447 - loss: 0.9066 - val_accuracy: 0.6558 - val_loss: 0.8724
Epoch 4/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 51ms/step - accuracy: 0.6557 - loss: 0.8727 - val_accuracy: 0.6558 - val_loss: 0.8752
Epoch 5/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 51ms/step - accuracy: 0.6514 - loss: 0.8717 - val_accuracy: 0.6558 - val_loss: 0.8656
Epoch 6/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 49ms/step - accuracy: 0.6479 - loss: 0.8744 - val_accuracy: 0.6558 - val_loss: 0.8715
Epoch 7/10
[1m299/299[0m 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
