# GRU  + ElMO



In [1]:
import pandas as pd
df_train = pd.read_table("../../../data/es/train_es.tsv", index_col="id")
df_dev = pd.read_table("../../../data/es/dev_es.tsv", index_col="id")

text_train, y_train = df_train["text"].values, df_train["HS"].values
text_dev, y_dev = df_dev["text"].values, df_dev["HS"].values

Tengo que hacer dos cosas:

- Primero, convertir los tweets a secuencias de texto
- Luego, paddear las secuencias a cierta longitud (Keras necesita esto para poder paralelizar cálculo)

In [2]:
from nltk.tokenize import TweetTokenizer
from keras.preprocessing.sequence import pad_sequences


max_length = 30

tokenizer = TweetTokenizer(preserve_case=False, reduce_len=True, strip_handles=True)

def preprocess_tweet(tweet):
    tokens = tokenizer.tokenize(tweet)
    
    if len(tokens) >= max_length:
        tokens = tokens[:max_length]
    else:
        tokens = tokens + [''] * (max_length - len(tokens))
    return tokens


text_train = [preprocess_tweet(tweet) for tweet in df_train["text"].values]
text_dev = [preprocess_tweet(tweet) for tweet in df_dev["text"].values]

Using TensorFlow backend.


In [3]:
from elmoformanylangs import Embedder

e = Embedder("../../../models/elmo/es/")

2019-01-10 17:23:13,504 INFO: char embedding size: 2637
2019-01-10 17:23:14,437 INFO: word embedding size: 185214
2019-01-10 17:23:23,671 INFO: Model(
  (token_embedder): ConvTokenEmbedder(
    (word_emb_layer): EmbeddingLayer(
      (embedding): Embedding(185214, 100, padding_idx=3)
    )
    (char_emb_layer): EmbeddingLayer(
      (embedding): Embedding(2637, 50, padding_idx=2634)
    )
    (convolutions): ModuleList(
      (0): Conv1d(50, 32, kernel_size=(1,), stride=(1,))
      (1): Conv1d(50, 32, kernel_size=(2,), stride=(1,))
      (2): Conv1d(50, 64, kernel_size=(3,), stride=(1,))
      (3): Conv1d(50, 128, kernel_size=(4,), stride=(1,))
      (4): Conv1d(50, 256, kernel_size=(5,), stride=(1,))
      (5): Conv1d(50, 512, kernel_size=(6,), stride=(1,))
      (6): Conv1d(50, 1024, kernel_size=(7,), stride=(1,))
    )
    (highways): Highway(
      (_layers): ModuleList(
        (0): Linear(in_features=2048, out_features=4096, bias=True)
        (1): Linear(in_features=2048, out_fe

Carguemos embeddings

In [4]:
import numpy as np

print(text_train[0])


X_train = np.array(e.sents2elmo(text_train))
X_dev = np.array(e.sents2elmo(text_dev))

['easyjet', 'quiere', 'duplicar', 'el', 'número', 'de', 'mujeres', 'piloto', "'", 'verás', 'tú', 'para', 'aparcar', 'el', 'avión', '..', 'http://t.co/46NuLkm09x', '', '', '', '', '', '', '', '', '', '', '', '', '']


2019-01-10 17:23:30,714 INFO: 70 batches, avg len: 32.0
2019-01-10 17:23:33,326 INFO: Finished 1000 sentences.
2019-01-10 17:23:35,286 INFO: Finished 2000 sentences.
2019-01-10 17:23:37,306 INFO: Finished 3000 sentences.
2019-01-10 17:23:39,181 INFO: Finished 4000 sentences.
2019-01-10 17:23:41,085 INFO: 8 batches, avg len: 32.0


In [5]:
X_train.shape, X_dev.shape

((4469, 30, 1024), (500, 30, 1024))

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Embedding, GRU, Dropout, LSTM
from keras.optimizers import Adam
from keras.preprocessing import sequence

embedding_dim = 1024

optimizer_args = {
    "lr": 0.0005,
    "decay": 0.01
}

model = Sequential()
model.add(GRU(256, input_shape=(max_length, embedding_dim)))
model.add(Dropout(0.75))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(**optimizer_args), 
              metrics=['accuracy'])
print(model.summary())

model.fit(X_train, y_train, validation_data=(X_dev, y_dev), epochs=20, batch_size=32)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 256)               983808    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 1,049,857
Trainable params: 1,049,857
Non-trainable params: 0
_________________________________________________________________
None
Train on 4469 samples, validate on 500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epo

<keras.callbacks.History at 0x7f50c252f9b0>

In [7]:
from hate.utils import print_evaluation

print_evaluation(model, X_dev, y_dev)

Loss        : 0.4543
Accuracy    : 0.8020
Precision   : 0.7808
Recall      : 0.7703
F1          : 0.7755


## Bidirectional GRU

In [8]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GRU, Dropout, LSTM, Bidirectional
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.optimizers import Adam


embedding_dim = 1024

model = Sequential()
model.add(Bidirectional(GRU(256, input_shape=(max_length, embedding_dim))))
model.add(Dropout(0.75))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.50))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(**optimizer_args), 
              metrics=['accuracy'])



In [9]:

model.fit(X_train, y_train, validation_data=(X_dev, y_dev), epochs=25, batch_size=32)


Train on 4469 samples, validate on 500 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f507c146fd0>

In [10]:
from hate.utils import print_evaluation

print_evaluation(model, X_dev, y_dev)

Loss        : 0.4378
Accuracy    : 0.8120
Precision   : 0.7832
Recall      : 0.7973
F1          : 0.7902


## Conv+GRU

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import GRU, Dropout, LSTM, Bidirectional, Conv1D, MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

embedding_dim = 1024

model = Sequential()
model.add(Conv1D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Bidirectional(GRU(256)))
model.add(Dropout(0.75))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(**optimizer_args), 
              metrics=['accuracy'])



In [12]:


model.fit(X_train, y_train, validation_data=(X_dev, y_dev), epochs=25, batch_size=32)

Train on 4469 samples, validate on 500 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f50426e2ef0>

In [13]:
from hate.utils import print_evaluation

print_evaluation(model, X_dev, y_dev)

Loss        : 1.3088
Accuracy    : 0.7840
Precision   : 0.7689
Recall      : 0.7342
F1          : 0.7512
