In [1]:
!pip install --upgrade tensorflow --user
!pip install transformers
!pip install torch

Requirement already up-to-date: tensorflow in c:\users\ignac\appdata\roaming\python\python38\site-packages (2.6.0)


# Redes recurrentes con capas de atención

In [2]:
from importlib import reload
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

import pandas as pd
import numpy as np
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Concatenate, Dense, Input, LSTM, Embedding, Dropout, Activation, GRU, Flatten
from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Convolution1D
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ignac\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ignac\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Primero se leen las frases financieras con el 100% y se añade una columna de sentimiento, tambien preparamos un segundo dataset donde iremos guardando los datos limpios. Tras esto realimos un bucle en el que separamos la frase del sentimiento.

In [3]:
#Leemos el documento y creamos las columnas texto y sentimiento
data=pd.read_csv('C:\\Users\\ignac\\OneDrive\\Escritorio\\BME\\Master\\CLASES\\Modulo 4\\Lenguage Natural\\Practica\\Financial_Phrases\\Sentences_AllAgree.txt',sep='\n ', header=None)
data.columns = ["text"]
data.dropna(inplace=True)
data.insert(1,'sentiment',0)
data.head()
dataclean=data*1

  data=pd.read_csv('C:\\Users\\ignac\\OneDrive\\Escritorio\\BME\\Master\\CLASES\\Modulo 4\\Lenguage Natural\\Practica\\Financial_Phrases\\Sentences_AllAgree.txt',sep='\n ', header=None)


In [None]:
#Separamos el texto original en dos columnas, una para el texto y la otra para el sentimiento
for a in range(0,len(data)):
  x = re.search('@\w', data.iloc[a].text)
  s = x.start()
  dataclean.iloc[a,0] = data.iloc[a].text[:s]
  dataclean.iloc[a,1] = data.iloc[a].text[s+1:]

In [None]:
#Definimos las herramientas para la limpieza de texto(stop words y lemmatizer), definimos la funcion de limpieza y la aplicamos creando una nueva columna de texto procesado
stop_words = set(stopwords.words("english")) 
lemmatizer = WordNetLemmatizer()


def clean_text(text):
    text = re.sub(r'[^\w\s]','',text, re.UNICODE)
    text = text.lower()
    text = [lemmatizer.lemmatize(token) for token in text.split(" ")]
    text = [lemmatizer.lemmatize(token, "v") for token in text]
    text = [word for word in text if not word in stop_words]
    text = " ".join(text)
    return text

dataclean['Processed_Text'] = dataclean.text.apply(lambda x: clean_text(x))
dataclean.head()

In [None]:
#observamos la longitud media de cada frase
dataclean.text.apply(lambda x: len(x.split(" "))).mean()

In [None]:
#Realizamos el codificado del sentimiento en numeros para poder tratarlo posteriormente en el modelo y hacemos la separacion de los datos en train y test

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
dataclean['sentiment'] = le.fit_transform(dataclean['sentiment'])

X_train, X_test, y_train, y_test = train_test_split(dataclean['Processed_Text'],
                                                    dataclean['sentiment'],
                                                    test_size=0.2)

In [None]:
#Identificamos cuantas etiquetas distintas hay
np.unique(dataclean['sentiment'])

In [None]:
#definimos los parametros de nuestras redes

EMBED_SIZE = 5
RNN_CELL_SIZE = 64
MAX_LEN = 25   # Since our mean length is 22.45


In [None]:
#Definimos el tokenizado separando por espacios y posteriormente lo aplicamos al xtrain, ademas definimos el max sequence y el max features que utilizaremos posteriormente

tokenizer = Tokenizer(split=" ") 
tokenizer.fit_on_texts(X_train)
sequences = tokenizer.texts_to_sequences(X_train)
x_train = pad_sequences(sequences, maxlen=25,padding="pre")
word_index = tokenizer.word_index
MAX_FEATURES = len(word_index)+1
max_sequence = x_train.shape[1]

In [None]:
#Aplicamos el tokenizado tambien al test
sequences_test = tokenizer.texts_to_sequences(X_test)
x_test = pad_sequences(sequences_test, maxlen=25,padding="pre")

In [None]:
#Creamos la capa de atencion, en este caso es la de bahdanau

class Attention(tf.keras.Model):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
 
    def call(self, features, hidden):
        # hidden shape == (batch_size, hidden size)
        # hidden_with_time_axis shape == (batch_size, 1, hidden size)
        # we are doing this to perform addition to calculate the score
        hidden_with_time_axis = tf.expand_dims(hidden, 1)

        # score shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = tf.nn.tanh(
            self.W1(features) + self.W2(hidden_with_time_axis))
        
        # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=1)
 
        return context_vector, attention_weights

In [None]:
#Definimos la capa de embeding mediante keras funcional
sequence_input = Input(shape=(MAX_LEN,), dtype="int32")
embedded_sequences = Embedding(MAX_FEATURES, EMBED_SIZE)(sequence_input) 

In [None]:
#Una vez crada la capa de embeding se pueden introducir las capas que se deseen, en este caso se introduce una lstm bidireccional y se conecta con el embeding
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Recogiendo los outputs de la LSTM , al ser bidireccional tenemos la informacion forward ademas de la backward
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True), name="bi_lstm_1")(lstm)

In [None]:
#Dado que nuestro modelo utiliza un RNN bidireccional, primero concatenamos los estados ocultos de cada RNN antes de calcular los pesos de atención y aplicar la suma ponderada.
#primero concatenamos la informacion hacia delante y hacia detras de las sequences y los states
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
#a la capa de atencion le pasamos las salidas de la lstm y los estados ocultos
context_vector, attention_weights = Attention(10)(lstm, state_h)
#con el vector contexto se le introduce a una densa para realizar el problema de clasificacion con una capa dropout

dense2 = Dense(20, activation="relu")(context_vector)
dropout2 = Dropout(0.5)(dense2)
#Al tener 3 clasificaciones(pos, neg y neutro) se introduce una capa de salida con 3 neuronas 
output = Dense(3, activation="softmax")(dropout2)
#Definimos el modelo, sus entradas y salidas.
model = keras.Model(inputs=sequence_input, outputs=output)

In [None]:
#vemos un resumen del modelo
print(model.summary())

In [None]:
#pintamos el modelo 
!pip install pydot
#!pip install graphviz
#!winget install graphviz

keras.utils.plot_model(model, show_shapes=True, dpi=90)

In [None]:
#Compilamos el modelo
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.Accuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [None]:
#Entrenaremos nuestro modelo de atención por épocas de 5 en mini lotes de muestras de 100.
BATCH_SIZE = 100
EPOCHS = 30
history = model.fit(x_train,y_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2)

In [None]:
#Evaluamos sobre test y sacamos el accuracy del test
from sklearn.metrics import accuracy_score
prediction = model.predict(x_test)
y_pred = (prediction > 0.5)
accuracy_score(y_test, np.argmax(y_pred, axis=1))

In [None]:
#Definimos la matriz de confusion y la ploteamos
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report,
                             confusion_matrix,
                             roc_auc_score)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'


def plot_cm(labels, predictions, p=0.5):
    cm = confusion_matrix(y_test, np.argmax(y_pred,axis=1))
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title("Confusion matrix (non-normalized))")
    plt.ylabel("Actual label")
    plt.xlabel("Predicted label")


plot_cm(y_test, y_pred)

# CONVOLUCIONAL

In [None]:
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dropout

In [None]:
#con el mismo preprocesado del apartado anterior combino capas recurrentes y convolucionales
model = tf.keras.Sequential()
#Definimos la capa de embeding 
model.add(tf.keras.layers.Embedding(MAX_FEATURES,5,input_length=x_train.shape[1]))
#se introduce una lstm bidireccional y se conecta con el embeding
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.3,return_sequences=True)))
#Añadimos la capa convolucional y capas de regularizacion (en este caso max pooling y dropout) para evitar el sobreentrenamiento
model.add(Conv1D(10, 2,padding="valid",activation="selu",strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.7))
#Añadimos capas densas 
model.add(tf.keras.layers.Dense(64, activation='selu'))
#Al tener 3 clasificaciones(pos, neg y neutro) se introduce una capa de salida con 3 neuronas 
model.add(tf.keras.layers.Dense(3, activation='softmax'))
#Compilamos el modelo
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc']) 

In [None]:
#Entrenaremos nuestro modelo de atención por épocas de 5 en mini lotes de muestras de 100.
BATCH_SIZE = 100
EPOCHS = 30
history = model.fit(x_train,y_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2)

In [None]:
#Evaluamos sobre test 
from sklearn.metrics import accuracy_score
prediction = model.predict(x_test)
y_pred = (prediction > 0.5)
accuracy_score(y_test, np.argmax(y_pred, axis=1))

In [None]:
#Definimos la matriz de confusion y la ploteamos
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report,
                             confusion_matrix,
                             roc_auc_score)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'


def plot_cm(labels, predictions, p=0.5):
    cm = confusion_matrix(y_test, np.argmax(y_pred,axis=1))
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title("Confusion matrix (non-normalized))")
    plt.ylabel("Actual label")
    plt.xlabel("Predicted label")


plot_cm(y_test, y_pred)

# BERT

In [None]:
!pip install -q tensorflow-text
!pip install -q tf-models-official
!pip install -q -U keras-tuner
import kerastuner as kt

import os
import shutil

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import official.nlp.optimization
from official.nlp import optimization  # to create AdamW optmizer

import matplotlib.pyplot as plt

tf.get_logger().setLevel('ERROR')

In [None]:
#shuffleamos el dataset, en este ejercicio usaremos el texto crudo
from sklearn.utils import shuffle
X_train, X_test, y_train, y_test = train_test_split(dataclean['text'],
                                                    dataclean['sentiment'],
                                                    test_size=0.2)
data = pd.DataFrame()
data['text'] = pd.Series(X_train)
data['sentiment'] = pd.Series(y_train)

datatest = pd.DataFrame()
datatest['text'] = pd.Series(X_train)
datatest['sentiment'] = pd.Series(y_train)

dataset = shuffle(data).reset_index(
  drop=True)

In [None]:
dataset

In [None]:
#Aqui encontramos los diferentes modelos de bert y al final de la celda seleccionamos el que vamos a usar

bert_model_name = 'small_bert/bert_en_uncased_L-2_H-128_A-2'

map_name_to_handle = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_base/2',
    'electra_small':
        'https://tfhub.dev/google/electra_small/2',
    'electra_base':
        'https://tfhub.dev/google/electra_base/2',
    'experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2',
    'experts_wiki_books':
        'https://tfhub.dev/google/experts/bert/wiki_books/2',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',
}

map_model_to_preprocess = {
    'bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'bert_en_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/1',
    'small_bert/bert_en_uncased_L-2_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-2_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-2_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-2_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-4_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-4_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-4_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-4_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-6_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-6_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-6_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-6_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-8_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-8_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-8_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-8_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-10_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-10_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-10_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-10_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-12_H-128_A-2':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-12_H-256_A-4':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-12_H-512_A-8':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'small_bert/bert_en_uncased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'bert_multi_cased_L-12_H-768_A-12':
        'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/1',
    'albert_en_base':
        'https://tfhub.dev/tensorflow/albert_en_preprocess/1',
    'electra_small':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'electra_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'experts_wiki_books':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
    'talking-heads_base':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/1',
}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

In [None]:
#Construimos el modelo clasificador
def build_classifier_model(encoder, processer, epochs_set, initial_learning_rate):
    # Definición de la entrada del modelo
    text_input = tf.keras.layers.Input(
        shape=(),
        dtype=tf.string,
        name='text'
    )
    # Definición del preprocesador para bert
    preprocessing_layer = hub.KerasLayer(
        processer,
        name='preprocessing'
    )
    encoder_inputs = preprocessing_layer(text_input)

    # Definición del Modelo Bert para codificar la información.
    encoder = hub.KerasLayer(
        encoder,
        trainable=True,
        name='BERT_encoder'
    )
    outputs = encoder(encoder_inputs)
   
    # Seleccionamos la representación del documento entero.
    net = outputs['pooled_output']
   
    # Definimos la arquitectura del modelo de salida ajustada a la tarea
    net = tf.keras.layers.Dropout(0.5)(net)
    net = tf.keras.layers.Dense(units = 1024, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 512, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 256, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 128, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 64, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 32, activation='selu')(net)
    net = tf.keras.layers.Dense(units = 16, activation='selu')(net)
    net = tf.keras.layers.Dense(8, activation='selu')(net)  
    net = tf.keras.layers.Dense(3, activation='softmax', name='classifier')(net)
    
    classifier_model = tf.keras.Model(text_input, net)
    
    # Definición del optimizador para el problema
    epochs = epochs_set
    steps_per_epoch = dataset.shape[0] # 625
    num_train_steps = steps_per_epoch * epochs
    num_warmup_steps = int(0.1*num_train_steps)
   
    optimizer = optimization.create_optimizer(
        init_lr=initial_learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        optimizer_type='adamw'
      )
    # Compilación del modelo
    classifier_model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
      )

    
    return classifier_model 

    

In [None]:
#Con la funcion que hemos creado en la ultima celda definimos el modelo y mostramos el resumen en pantalla
epocas = 200

model = build_classifier_model(
    tfhub_handle_encoder,
    tfhub_handle_preprocess,
    epocas,
    5e-5
)
model.summary()

In [None]:
#Entrenamos el modelo
history = model.fit(x=dataset.text,
                    y=dataset.sentiment,
                    validation_split=0.2,
                    epochs=220,
                    )

In [None]:
#Para comprobar resultados procedemos a hacer una particion en 10 partes y entrenamos con las distintas particiones y evaluamos resultados.

from sklearn.model_selection import KFold
n_split = 10

epocas = 30
eval = []
numero = 1
for train_index,test_index in KFold(n_split).split(dataset):
  x_train,x_test=dataset.text[train_index],dataset.text[test_index]
  y_train,y_test=dataset.sentiment[train_index],dataset.sentiment[test_index]
  
  model = build_classifier_model(
    tfhub_handle_encoder,
    tfhub_handle_preprocess,
    epocas,
    3e-5
  )

  model.fit(x_train, y_train,epochs=epocas)
  eval.append(model.evaluate(x_test,y_test))
  print('Model evaluation ',model.evaluate(x_test,y_test))
  numero=numero+1

In [None]:
#Evaluamos sobre test para realizar la misma evaluacion que en los modelos anteriores y que la comparacion sea justa
from sklearn.metrics import accuracy_score
prediction = model.predict(X_test)
y_pred = (prediction > 0.5)
accuracy_score(y_test, np.argmax(y_pred, axis=1))

In [None]:
#Definimos la matriz de confusion y la ploteamos
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (classification_report,
                             confusion_matrix,
                             roc_auc_score)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'


def plot_cm(labels, predictions, p=0.5):
    cm = confusion_matrix(y_test, np.argmax(y_pred,axis=1))
    plt.figure(figsize=(5, 5))
    sns.heatmap(cm, annot=True, fmt="d")
    plt.title("Confusion matrix (non-normalized))")
    plt.ylabel("Actual label")
    plt.xlabel("Predicted label")


plot_cm(y_test, y_pred)

Respecto al rendimiento de los modelos, consideranco que el benchmark tiene un 0.98 de accuracy estqamos un poco lejos en la mayoria, aunque son rendimientos aceptables. Vemos como el mejor accuracy lo proporciona el modelo bert, aunque los otros modelos realizados obtienen resultados decentes. Personalmente me sorprende los buenos resultados obtenidos por el modelo que combina capas recurrentes y convolucionales ya que no esperaba que un modelo a priori tan sencillo obtuviera esos resultados, pero predice muy mal una de las clases(negativa).
Tambien observamos que todos los modelos terminan sobreajustando en mayor o menor medida a pesar de incluir capas de regularizacion. 

Utilizamos un modelo de sentimiento ya implementado para comparar con el nuestro posteriormente

In [None]:
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()
result=[]
for i in range(0,len(X_test)):
  b = sia.polarity_scores(X_test.iloc[i])
  del b['compound']
  result.append(b)

In [None]:
result =pd.DataFrame.from_dict(result)
result.head()

result.columns =[0, 1,2] 

In [None]:
pred = result.idxmax(axis=1)

In [None]:
accuracy_score(y_test, pred)

In [None]:
plot_cm(y_test, pred)

Observamos que el accuracy para un modelo ya implementado es menor que el que hemos conseguido en todos los modelos, por lo que se podria decir que los modelos conseguidos son mejores que el modelo ya implementado.