In [1]:
!pip install keras==2.3.1

Looking in indexes: http://mirrors.tencentyun.com/pypi/simple
Collecting keras==2.3.1
  Downloading http://mirrors.tencentyun.com/pypi/packages/ad/fd/6bfe87920d7f4fd475acd28500a42482b6b84479832bdc0fe9e589a60ceb/Keras-2.3.1-py2.py3-none-any.whl (377 kB)
[K     |████████████████████████████████| 377 kB 8.9 MB/s eta 0:00:01
Installing collected packages: keras
Successfully installed keras-2.3.1


In [1]:
import os
import keras.backend as K

from data import DATA_SET_DIR
from elmo.lm_generator import LMDataGenerator
from elmo.model import ELMo

Using TensorFlow backend.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
with open('./data/datasets/txt/advertiser_id.vocab') as f:
    vocab = [line[:-2] for line in f.readlines()]

In [4]:
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0',
 '/job:localhost/replica:0/task:0/device:GPU:1']

In [None]:
parameters = {
    'multi_processing': True,
    'n_threads': -1,
    'cuDNN': True if len(K.tensorflow_backend._get_available_gpus()) else False,
    'train_dataset': 'txt/advertiser_id.train.tokens',
    'valid_dataset': 'txt/advertiser_id.dev.tokens',
    'test_dataset': 'txt/advertiser_id.test.tokens',
    'vocab': 'txt/ad_id.vocab',
    'vocab_size': 56044,
    'num_sampled': 500,
    'charset_size': 262,
    'sentence_maxlen': 100,
    'token_maxlen': 50,
    'token_encoding': 'word',
    'epochs': 3,
    'patience': 2,
    'batch_size': 4,
    'clip_value': 1,
    'cell_clip': 5,
    'proj_clip': 5,
    'lr': 0.2,
    'shuffle': True,
    'n_lstm_layers': 2,
    'n_highway_layers': 2,
    'cnn_filters': [[1, 32],
                    [2, 32],
                    [3, 64],
#                     [4, 128],
                    [5, 256],
#                     [6, 512],
                    [7, 512]
                    ],
    'lstm_units_size': 400,
    'hidden_units_size': 200,
    'char_embedding_size': 16,
    'dropout_rate': 0.1,
    'word_dropout_rate': 0.05,
    'weight_tying': True,
}

# Set-up Generators
train_generator = LMDataGenerator(os.path.join(DATA_SET_DIR, parameters['train_dataset']),
                                  os.path.join(DATA_SET_DIR, parameters['vocab']),
                                  sentence_maxlen=parameters['sentence_maxlen'],
                                  token_maxlen=parameters['token_maxlen'],
                                  batch_size=parameters['batch_size'],
                                  shuffle=parameters['shuffle'],
                                  token_encoding=parameters['token_encoding'])

val_generator = LMDataGenerator(os.path.join(DATA_SET_DIR, parameters['valid_dataset']),
                                os.path.join(DATA_SET_DIR, parameters['vocab']),
                                sentence_maxlen=parameters['sentence_maxlen'],
                                token_maxlen=parameters['token_maxlen'],
                                batch_size=parameters['batch_size'],
                                shuffle=parameters['shuffle'],
                                token_encoding=parameters['token_encoding'])

test_generator = LMDataGenerator(os.path.join(DATA_SET_DIR, parameters['test_dataset']),
                                os.path.join(DATA_SET_DIR, parameters['vocab']),
                                sentence_maxlen=parameters['sentence_maxlen'],
                                token_maxlen=parameters['token_maxlen'],
                                batch_size=parameters['batch_size'],
                                shuffle=parameters['shuffle'],
                                token_encoding=parameters['token_encoding'])

# Compile ELMo
elmo_model = ELMo(parameters)
elmo_model.compile_elmo(print_summary=True)

# Train ELMo
elmo_model.train(train_data=train_generator, valid_data=val_generator)

# Persist ELMo Bidirectional Language Model in disk
elmo_model.save(sampled_softmax=False)

# Evaluate Bidirectional Language Model
elmo_model.evaluate(test_generator)

# Build ELMo meta-model to deploy for production and persist in disk
elmo_model.wrap_multi_elmo_encoder(print_summary=True, save=True)

# Load ELMo encoder
elmo_model.load_elmo_encoder()

# Get ELMo embeddings to feed as inputs for downstream tasks
elmo_embeddings = elmo_model.get_outputs(test_generator, output_type='word', state='mean')

# BUILD & TRAIN NEW KERAS MODEL FOR DOWNSTREAM TASK (E.G., TEXT CLASSIFICATION)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
word_indices (InputLayer)       (None, None)         0                                            
__________________________________________________________________________________________________
token_encoding (Embedding)      (None, None, 200)    11208800    word_indices[0][0]               
__________________________________________________________________________________________________
spatial_dropout1d_6 (SpatialDro (None, None, 200)    0           token_encoding[0][0]             
__________________________________________________________________________________________________
timestep_dropout_2 (TimestepDro (None, None, 200)    0           spatial_dropout1d_6[0][0]        
___________

In [4]:
elmo_embeddings

array([[ -5.963724 ,  15.389417 , -14.823316 , ...,  -2.264004 ,
          2.0712223,   7.9908032],
       [ -0.765509 ,   3.2414885,  -1.96873  , ...,  -0.4388038,
          0.3472507,   1.1737093],
       [ -5.7815394,  15.513747 , -14.713231 , ...,  -2.2619948,
          2.0751145,   7.97468  ],
       ...,
       [ -5.811864 ,  15.317462 , -14.770313 , ...,  -2.2618427,
          2.0867224,   7.939979 ],
       [ -3.583551 ,  11.9998865,  -9.664206 , ...,  -2.7930157,
          2.3017042,   2.5163767],
       [ -5.783086 ,  14.875971 , -13.583462 , ...,  -2.2849314,
          2.0599265,   7.4962587]], dtype=float32)