In [1]:
import keras
import numpy as np
import tensorflow as tf

from absl import flags
from pathlib import Path
from operator import itemgetter
from keras.optimizers import Adam
from sklearn.metrics import classification_report, recall_score, precision_score, f1_score

Using TensorFlow backend.


In [2]:
import importlib

import model as nn
importlib.reload(nn)

elsa_architecture = nn.elsa_architecture

In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

lang = "ja"
batch_size = 250
lr = 1e-3
epochs = 100
patience = 3
data_dir = "/data/elsa2"
checkpoint_dir = "./ckpt"
optimizer = "adam"

In [4]:
lstm_hidden = 512
lstm_drop = 0.5
final_drop = 0.5
embed_drop = 0.0
highway = False
compute_class_weight = False
multilabel = True

In [5]:
data_dir = Path(data_dir)
wv_path = (data_dir / "{:s}_wv.npy".format(lang)).__str__()
X_path = (data_dir / "{:s}_X.npy".format(lang)).__str__()
y_path = (data_dir / "{:s}_y.npy".format(lang)).__str__()
emoji_path = (data_dir / "{:s}_emoji.txt".format(lang)).__str__()

wv = np.load(wv_path, allow_pickle=True)
input_vec = np.load(X_path, allow_pickle=True)
input_label = np.load(y_path, allow_pickle=True)

nb_tokens = len(wv)
embed_dim = wv.shape[1]
input_len = len(input_label)
nb_classes = input_label.shape[1]
maxlen = input_vec.shape[1]

train_end = int(input_len*0.7)
val_end = int(input_len*0.9)

(X_train, y_train) = (input_vec[:train_end], input_label[:train_end])
(X_val, y_val) = (input_vec[train_end:val_end], input_label[train_end:val_end])
(X_test, y_test) = (input_vec[val_end:], input_label[val_end:])

if multilabel:
    def to_multilabel(y):
        outputs = []
        for i in range(nb_classes):
            outputs.append(y[:, i])
        return outputs

    y_train = to_multilabel(y_train)
    y_val = to_multilabel(y_val)
    y_test = to_multilabel(y_test)

model = elsa_architecture(nb_classes=nb_classes,
                          nb_tokens=nb_tokens,
                          maxlen=maxlen,
                          final_dropout_rate=final_drop,
                          embed_dropout_rate=embed_drop,
                          load_embedding=True,
                          pre_embedding=wv,
                          high=highway,
                          embed_dim=embed_dim,
                          multilabel=multilabel)
model.summary()

computed_class_weight = None

if multilabel:
    loss = "binary_crossentropy"
else:
    loss = "categorical_crossentropy"
    if compute_class_weight:
        y_train_sps = []
        for row in y_train:
            y_train_sps.extend(np.where(row)[0].tolist())
        computed_class_weight = class_weight.compute_class_weight(
            'balanced', list(range(nb_classes)), y_train_sps)
        print("computed class weight = {:s}".format(str(computed_class_weight)))

if optimizer == 'adam':
    adam = Adam(clipnorm=1, lr=lr)
    model.compile(loss=loss, optimizer=adam, metrics=['accuracy'])
elif optimizer == 'rmsprop':
    model.compile(loss=loss, optimizer='rmsprop', metrics=['accuracy'])

checkpoint_dir = Path(checkpoint_dir)
if not checkpoint_dir.exists():
    checkpoint_dir.mkdir()
checkpoint_weight_path = (checkpoint_dir / "elsa_{:s}.hdf5".format(lang)).__str__()

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=0, patience=patience, verbose=0, mode='auto'),
    keras.callbacks.ModelCheckpoint(checkpoint_weight_path, monitor='val_loss',
                                    verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
]
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(X_val, y_val),
          class_weight=computed_class_weight,
          callbacks=callbacks,
          verbose=1)

freq = {line.split()[0]: int(line.split()[1]) for line in open(emoji_path).readlines()}
freq_topn = sorted(freq.items(), key=itemgetter(1), reverse=True)[:nb_classes]

if multilabel:
    y_pred = model.predict([X_test], batch_size=batch_size)
    y_pred = [np.squeeze(p) for p in y_pred]

    y_test_1d = np.array(y_test).flatten()
    y_pred_1d = np.array(y_pred).flatten()
    print(f1_score(y_test_1d, y_pred_1d > 0.5))
    print(classification_report(y_test_1d, y_pred_1d > 0.5))

    gold, pred = [], []
    for i in range(len(X_test)):
        each_gold, each_pred = [], []
        for c in range(nb_classes):
            if y_test[c][i] == 1.0:
                each_gold.append(c+1)
            else:
                each_gold.append(0)
            if y_pred[c][i] > 0.5:
                each_pred.append(c+1)
            else:
                each_pred.append(0)
        gold.extend(each_gold)
        pred.extend(each_pred)

    target_name = [""] + [e[0] for e in freq_topn]
    print(classification_report(gold, pred, target_names=target_name))
else:
    _, acc = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    print(acc)

    y_pred = model.predict(X_test)
    print(classification_report(y_test.argmax(axis=1), y_pred.argmax(
        axis=1), target_names=[e[0] for e in freq_topn]))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 50)           0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 50, 200)      65831000    input_1[0][0]                    
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 50, 200)      0           embedding[0][0]                  
__________________________________________________________________________________________________
bi_lstm_0 (Bidirectio

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 1721192 samples, validate on 491770 samples
Epoch 1/100
Epoch 2/100


    250/1721192 [..............................] - ETA: 1:01:08 - loss: 3.2896 - sigmoid_0_loss: 0.2371 - sigmoid_1_loss: 0.1863 - sigmoid_2_loss: 0.2301 - sigmoid_3_loss: 0.0875 - sigmoid_4_loss: 0.1461 - sigmoid_5_loss: 0.0867 - sigmoid_6_loss: 0.1147 - sigmoid_7_loss: 0.1655 - sigmoid_8_loss: 0.1096 - sigmoid_9_loss: 0.0645 - sigmoid_10_loss: 0.0885 - sigmoid_11_loss: 0.1067 - sigmoid_12_loss: 0.1153 - sigmoid_13_loss: 0.0880 - sigmoid_14_loss: 0.0339 - sigmoid_15_loss: 0.0593 - sigmoid_16_loss: 0.0778 - sigmoid_17_loss: 0.0143 - sigmoid_18_loss: 0.0670 - sigmoid_19_loss: 0.0905 - sigmoid_20_loss: 0.1229 - sigmoid_21_loss: 0.0105 - sigmoid_22_loss: 0.0121 - sigmoid_23_loss: 0.0150 - sigmoid_24_loss: 0.0267 - sigmoid_25_loss: 0.0326 - sigmoid_26_loss: 0.0722 - sigmoid_27_loss: 0.0356 - sigmoid_28_loss: 0.0612 - sigmoid_29_loss: 0.0639 - sigmoid_30_loss: 0.0117 - sigmoid_31_loss: 0.0113 - sigmoid_32_loss: 0.0144 - sigmoid_33_loss: 0.0112 - sigmoid_34_loss: 0.0047 - sigmoid_35_loss: 0.

Epoch 3/100


    250/1721192 [..............................] - ETA: 1:03:34 - loss: 3.4276 - sigmoid_0_loss: 0.1961 - sigmoid_1_loss: 0.1542 - sigmoid_2_loss: 0.1483 - sigmoid_3_loss: 0.1427 - sigmoid_4_loss: 0.1525 - sigmoid_5_loss: 0.1749 - sigmoid_6_loss: 0.1592 - sigmoid_7_loss: 0.0481 - sigmoid_8_loss: 0.0990 - sigmoid_9_loss: 0.0782 - sigmoid_10_loss: 0.1252 - sigmoid_11_loss: 0.0736 - sigmoid_12_loss: 0.1130 - sigmoid_13_loss: 0.0428 - sigmoid_14_loss: 0.0275 - sigmoid_15_loss: 0.0797 - sigmoid_16_loss: 0.0314 - sigmoid_17_loss: 0.0814 - sigmoid_18_loss: 0.0519 - sigmoid_19_loss: 0.0673 - sigmoid_20_loss: 0.0577 - sigmoid_21_loss: 0.0509 - sigmoid_22_loss: 0.0939 - sigmoid_23_loss: 0.0268 - sigmoid_24_loss: 0.0770 - sigmoid_25_loss: 0.0610 - sigmoid_26_loss: 0.0539 - sigmoid_27_loss: 0.0376 - sigmoid_28_loss: 0.0393 - sigmoid_29_loss: 0.0195 - sigmoid_30_loss: 0.0383 - sigmoid_31_loss: 0.0548 - sigmoid_32_loss: 0.0097 - sigmoid_33_loss: 0.0056 - sigmoid_34_loss: 0.0010 - sigmoid_35_loss: 0.

Epoch 4/100


    250/1721192 [..............................] - ETA: 1:00:37 - loss: 3.4147 - sigmoid_0_loss: 0.1537 - sigmoid_1_loss: 0.2111 - sigmoid_2_loss: 0.1851 - sigmoid_3_loss: 0.0970 - sigmoid_4_loss: 0.1226 - sigmoid_5_loss: 0.1329 - sigmoid_6_loss: 0.0940 - sigmoid_7_loss: 0.0789 - sigmoid_8_loss: 0.0816 - sigmoid_9_loss: 0.0439 - sigmoid_10_loss: 0.0605 - sigmoid_11_loss: 0.1187 - sigmoid_12_loss: 0.0789 - sigmoid_13_loss: 0.0553 - sigmoid_14_loss: 0.0056 - sigmoid_15_loss: 0.0504 - sigmoid_16_loss: 0.0526 - sigmoid_17_loss: 0.0907 - sigmoid_18_loss: 0.0794 - sigmoid_19_loss: 0.0378 - sigmoid_20_loss: 0.1253 - sigmoid_21_loss: 0.0715 - sigmoid_22_loss: 0.0538 - sigmoid_23_loss: 0.0313 - sigmoid_24_loss: 0.0800 - sigmoid_25_loss: 0.0226 - sigmoid_26_loss: 0.0388 - sigmoid_27_loss: 0.1017 - sigmoid_28_loss: 0.0633 - sigmoid_29_loss: 0.0205 - sigmoid_30_loss: 0.0806 - sigmoid_31_loss: 0.0237 - sigmoid_32_loss: 0.0069 - sigmoid_33_loss: 0.0046 - sigmoid_34_loss: 6.2210e-04 - sigmoid_35_loss

Epoch 5/100


    250/1721192 [..............................] - ETA: 59:52 - loss: 3.4080 - sigmoid_0_loss: 0.1655 - sigmoid_1_loss: 0.1733 - sigmoid_2_loss: 0.1220 - sigmoid_3_loss: 0.0841 - sigmoid_4_loss: 0.1137 - sigmoid_5_loss: 0.0848 - sigmoid_6_loss: 0.0540 - sigmoid_7_loss: 0.1258 - sigmoid_8_loss: 0.0746 - sigmoid_9_loss: 0.0596 - sigmoid_10_loss: 0.0482 - sigmoid_11_loss: 0.1662 - sigmoid_12_loss: 0.0880 - sigmoid_13_loss: 0.0548 - sigmoid_14_loss: 0.0363 - sigmoid_15_loss: 0.0413 - sigmoid_16_loss: 0.0197 - sigmoid_17_loss: 0.0676 - sigmoid_18_loss: 0.0664 - sigmoid_19_loss: 0.0694 - sigmoid_20_loss: 0.0496 - sigmoid_21_loss: 0.0733 - sigmoid_22_loss: 0.0590 - sigmoid_23_loss: 0.0540 - sigmoid_24_loss: 0.0757 - sigmoid_25_loss: 0.0561 - sigmoid_26_loss: 0.0705 - sigmoid_27_loss: 0.0270 - sigmoid_28_loss: 0.0414 - sigmoid_29_loss: 0.0187 - sigmoid_30_loss: 0.0239 - sigmoid_31_loss: 0.0531 - sigmoid_32_loss: 0.0089 - sigmoid_33_loss: 0.0033 - sigmoid_34_loss: 4.5690e-04 - sigmoid_35_loss: 

0.2366327890991568
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99  15490755
         1.0       0.80      0.14      0.24    245885

    accuracy                           0.99  15736640
   macro avg       0.89      0.57      0.61  15736640
weighted avg       0.98      0.99      0.98  15736640

              precision    recall  f1-score   support

                   0.99      1.00      0.99  15490755
           ✨       0.88      0.31      0.46     21680
           😭       0.64      0.14      0.23     17030
           😊       0.84      0.13      0.22     15699
           💦       0.76      0.16      0.26      9767
           ❤       0.86      0.20      0.32      9994
           ☺       0.85      0.04      0.07     10480
           😂       0.56      0.01      0.02      9401
           💕       0.71      0.09      0.16      6472
           🤔       0.52      0.16      0.24      8166
           ❗       0.89      0.24      0.38      5328
     