# Recurrent Neural Network

In [None]:
import sys
import os

# Agregar el directorio raiz al PYTHONPATH
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from src.trainers.utils import build_datasets, save_metrics
from constants.constants_twitter import TWITTER_DATASET_TRAIN_PATH

dataset_train, dataset_test, dataset_val = build_datasets(
    TWITTER_DATASET_TRAIN_PATH,
    test_size=0.3,
    val_size=0.5, # 0.5 de 0.3    
    random_state=42
)
print(dataset_train.shape)
print(dataset_test.shape)
print(dataset_val.shape)

In [None]:
from src.preprocesamiento.nlp_spacy import Tokenizer

tokenizer = Tokenizer()

dataset_train_tokenized = {}
dataset_val_tokenized = {}
dataset_test_tokenized = {}

dataset_train_tokenized['tokens'] = tokenizer.tokenize(dataset_train['text'], True)
dataset_val_tokenized['tokens'] = tokenizer.tokenize(dataset_val['text'], True)
dataset_test_tokenized['tokens'] = tokenizer.tokenize(dataset_test['text'], True)

dataset_train_tokenized['polarity'] = dataset_train['polarity'].to_numpy()
dataset_val_tokenized['polarity'] = dataset_val['polarity'].to_numpy()
dataset_test_tokenized['polarity'] = dataset_test['polarity'].to_numpy()

## Entrenar RNN

In [None]:
import numpy as np
from src.trainers.utils import ModelArgs, EarlyStopping 

list_optim = ["adam", "sgd"]
batch_size = 64
list_lr = np.logspace(-3, -0.4, 5)
print(list_lr)
epochs = 100

patience = 15
min_delta = 1e-4

model_args = ModelArgs()
model_args.hidden_size = 128
model_args.num_layers = 2
model_args.output_size = 3
model_args.dropout = 0.20

In [None]:

import os
from src.trainers.train_rnn import train_rnn
from src.trainers.utils import show_loss_val_curves, save_model_torch
from constants.constants_twitter import RNN_LOSS_CURVES_DIR, RNN_MODEL_PATH, TWITTER_RNN_METRICS_PATH, EMBEDDING_W2V_TWITTER_PATH

best_accuracy = -1
for optim in list_optim:
    for lr in list_lr:
        cont = 0
        model, metrics, train_losses, val_losses = train_rnn(
            dataset_train=dataset_train_tokenized,
            dataset_val=dataset_val_tokenized,
            embeddings_path=EMBEDDING_W2V_TWITTER_PATH,
            model_args=model_args,
            early_stopping = EarlyStopping(patience, min_delta), # reinicio
            batch_size=batch_size,
            lr=lr,
            epochs=epochs,
            optim=optim,
            use_class_weights=True
        )
        print(f"[RNN {optim} {lr:.5f} {model_args.dropout}] acc: {metrics['accuracy']:.4f}")
        save_metrics(metrics, TWITTER_RNN_METRICS_PATH)
        title = f"RNN_{metrics['optim']} {'-'.join(str(lr).split('.'))}"
        path = os.path.join(RNN_LOSS_CURVES_DIR, f"{title}_{str(cont)}.png")
        show_loss_val_curves(train_losses, val_losses, len(train_losses))
        if metrics['accuracy'] > best_accuracy:
            best_accuracy = metrics['accuracy']
            save_model_torch(model.get_model(), RNN_MODEL_PATH)

## Modelo con mayor accuracy

In [None]:
import pandas as pd
from constants.constants_twitter import TWITTER_RNN_METRICS_PATH, EMBEDDING_W2V_TIKTOK_SENTENCES_PATH

# Seleccionar los hiperparámetros que generan mayor accuracy
df_metrics = pd.read_csv(TWITTER_RNN_METRICS_PATH)

best_acc = df_metrics.loc[df_metrics['accuracy'].idxmax()]
print(best_acc)

In [None]:
from constants.constants_twitter import RNN_MODEL_PATH
from src.trainers.trainer_rnn import evaluate_model
from src.trainers.train_rnn import RNNModel
from src.trainers.utils import load_model_torch

batch_size = 64
model_args = ModelArgs()
model_args.input_size= best_acc['embedding_dim']
model_args.hidden_size = int(best_acc['hidden_size'])
model_args.num_layers = int(best_acc['num_layers'])
model_args.output_size = 3
model_args.dropout = best_acc['dropout']

model = RNNModel(model_args)
model = load_model_torch(model, RNN_MODEL_PATH)

metrics = evaluate_model(
    model,
    dataset_test_tokenized,
    "RNN",
    EMBEDDING_W2V_TWITTER_PATH,
    batch_size,
)
print(metrics)

In [None]:
from src.trainers.train_rnn import RNNModel
from constants.constants_twitter import RNN_MODEL_PATH
from src.trainers.utils import load_model_torch
from src.trainers.train_rnn import SentimentAnalysis

from src.preprocesamiento.nlp_spacy import Tokenizer

tokenizer = Tokenizer()

model = RNNModel(model_args)
model = load_model_torch(model, RNN_MODEL_PATH)
cls = SentimentAnalysis(model, tokenizer, EMBEDDING_W2V_TWITTER_PATH,"cpu")