From d186d1e928a32eecf8e76bc33efac2cdb2d02684 Mon Sep 17 00:00:00 2001
From: king-menin <login-const@mail.ru>
Date: Sun, 7 Jul 2019 00:11:22 +0300
Subject: [PATCH] add new version

---
 .gitignore                                    |  107 +-
 LICENSE                                       |   21 -
 README.md                                     |  177 +-
 convert_tf_checkpoint_to_pytorch.py           |  107 -
 examples/__init__.py                          |    0
 examples/atis-joint-nmt.ipynb                 |  504 -
 examples/atis-joint.ipynb                     |  708 --
 examples/atis-nmt.ipynb                       |  545 --
 examples/atis.ipynb                           |  689 --
 examples/conll-2003-nmt.ipynb                 |  731 --
 examples/conll-2003.ipynb                     | 3710 --------
 examples/factrueval-nmt.ipynb                 |  423 -
 examples/factrueval.ipynb                     |  790 --
 examples/samples.ipynb                        | 1019 ---
 examples_elmo/__init__.py                     |    0
 examples_elmo/factrueval-nmt.ipynb            |  438 -
 examples_elmo/factrueval.ipynb                |  697 --
 examples_elmo/samples.ipynb                   | 2345 -----
 ...onll2003 BERTBiLSTMAttnCRF base BERT.ipynb |  580 ++
 ...nll2003 BERTBiLSTMAttnNCRF base BERT.ipynb |  602 ++
 exps/conll2003 BERTBiLSTMCRF base BERT.ipynb  | 1027 +++
 exps/conll2003 BERTBiLSTMCRF.ipynb            | 1063 +++
 exps/fre BERTAttnCRF.ipynb                    |  557 ++
 exps/fre BERTBiLSTMAttnCRF-fit_BERT.ipynb     |  478 +
 exps/fre BERTBiLSTMAttnCRF.ipynb              |  582 ++
 exps/fre BERTBiLSTMAttnNCRF-fit_BERT.ipynb    | 1778 ++++
 exps/fre BERTBiLSTMAttnNCRF.ipynb             | 8071 +++++++++++++++++
 exps/fre BERTBiLSTMCRF.ipynb                  | 1137 +++
 exps/fre BERTBiLSTMNCRF.ipynb                 |  604 ++
 exps/fre BERTCRF.ipynb                        |  475 +
 exps/fre BERTNCRF.ipynb                       |  519 ++
 exps/prc fre.ipynb                            |  220 +
 modules/__init__.py                           |    9 +-
 modules/{utils => analyze_utils}/__init__.py  |    0
 modules/analyze_utils/main_metrics.py         |  214 +
 .../{utils => analyze_utils}/plot_metrics.py  |   78 +-
 modules/{utils => analyze_utils}/utils.py     |   27 +-
 modules/data/bert_data.py                     |  689 +-
 modules/data/bert_word_data.py                |  211 -
 modules/data/conll2003/__init__.py            |    4 +
 modules/data/conll2003/prc.py                 |   67 +
 modules/data/elmo_data.py                     |  444 -
 modules/data/fre/__init__.py                  |    5 +
 modules/data/fre/bilou/__init__.py            |    1 +
 modules/data/fre/bilou/from_bilou.py          |  100 +
 modules/data/fre/bilou/to_bilou.py            |   49 +
 modules/data/fre/entity/__init__.py           |    1 +
 modules/data/fre/entity/document.py           |  164 +
 modules/data/fre/entity/taggedtoken.py        |   24 +
 modules/data/fre/entity/token.py              |   52 +
 modules/data/fre/prc.py                       |   45 +
 modules/data/fre/reader.py                    |   68 +
 modules/data/fre/utils.py                     |   10 +
 modules/data/tokenization.py                  |  291 -
 modules/layers/bert_modeling.py               |  474 -
 modules/layers/crf.py                         |    9 +-
 modules/layers/decoders.py                    |  397 +-
 modules/layers/embedders.py                   |  131 +-
 modules/layers/encoders.py                    |   65 -
 modules/layers/layers.py                      |   30 +
 modules/layers/ncrf.py                        |  322 +-
 modules/models/__init__.py                    |    4 -
 modules/models/bert_models.py                 |  787 +-
 modules/models/elmo_models.py                 |  227 -
 modules/models/released_models.py             |   10 -
 modules/train/clr.py                          |  160 -
 modules/train/train.py                        |  112 +-
 modules/utils.py                              |   69 +
 requirements.txt                              |   14 +-
 69 files changed, 19875 insertions(+), 16193 deletions(-)
 delete mode 100644 LICENSE
 delete mode 100644 convert_tf_checkpoint_to_pytorch.py
 delete mode 100644 examples/__init__.py
 delete mode 100644 examples/atis-joint-nmt.ipynb
 delete mode 100644 examples/atis-joint.ipynb
 delete mode 100644 examples/atis-nmt.ipynb
 delete mode 100644 examples/atis.ipynb
 delete mode 100644 examples/conll-2003-nmt.ipynb
 delete mode 100644 examples/conll-2003.ipynb
 delete mode 100644 examples/factrueval-nmt.ipynb
 delete mode 100644 examples/factrueval.ipynb
 delete mode 100644 examples/samples.ipynb
 delete mode 100644 examples_elmo/__init__.py
 delete mode 100644 examples_elmo/factrueval-nmt.ipynb
 delete mode 100644 examples_elmo/factrueval.ipynb
 delete mode 100644 examples_elmo/samples.ipynb
 create mode 100644 exps/conll2003 BERTBiLSTMAttnCRF base BERT.ipynb
 create mode 100644 exps/conll2003 BERTBiLSTMAttnNCRF base BERT.ipynb
 create mode 100644 exps/conll2003 BERTBiLSTMCRF base BERT.ipynb
 create mode 100644 exps/conll2003 BERTBiLSTMCRF.ipynb
 create mode 100644 exps/fre BERTAttnCRF.ipynb
 create mode 100644 exps/fre BERTBiLSTMAttnCRF-fit_BERT.ipynb
 create mode 100644 exps/fre BERTBiLSTMAttnCRF.ipynb
 create mode 100644 exps/fre BERTBiLSTMAttnNCRF-fit_BERT.ipynb
 create mode 100644 exps/fre BERTBiLSTMAttnNCRF.ipynb
 create mode 100644 exps/fre BERTBiLSTMCRF.ipynb
 create mode 100644 exps/fre BERTBiLSTMNCRF.ipynb
 create mode 100644 exps/fre BERTCRF.ipynb
 create mode 100644 exps/fre BERTNCRF.ipynb
 create mode 100644 exps/prc fre.ipynb
 rename modules/{utils => analyze_utils}/__init__.py (100%)
 create mode 100644 modules/analyze_utils/main_metrics.py
 rename modules/{utils => analyze_utils}/plot_metrics.py (54%)
 rename modules/{utils => analyze_utils}/utils.py (86%)
 delete mode 100644 modules/data/bert_word_data.py
 create mode 100644 modules/data/conll2003/__init__.py
 create mode 100644 modules/data/conll2003/prc.py
 delete mode 100644 modules/data/elmo_data.py
 create mode 100644 modules/data/fre/__init__.py
 create mode 100644 modules/data/fre/bilou/__init__.py
 create mode 100644 modules/data/fre/bilou/from_bilou.py
 create mode 100644 modules/data/fre/bilou/to_bilou.py
 create mode 100644 modules/data/fre/entity/__init__.py
 create mode 100644 modules/data/fre/entity/document.py
 create mode 100644 modules/data/fre/entity/taggedtoken.py
 create mode 100644 modules/data/fre/entity/token.py
 create mode 100644 modules/data/fre/prc.py
 create mode 100644 modules/data/fre/reader.py
 create mode 100644 modules/data/fre/utils.py
 delete mode 100644 modules/data/tokenization.py
 delete mode 100644 modules/layers/bert_modeling.py
 delete mode 100644 modules/layers/encoders.py
 delete mode 100644 modules/models/elmo_models.py
 delete mode 100644 modules/models/released_models.py
 delete mode 100644 modules/train/clr.py
 create mode 100644 modules/utils.py

diff --git a/.gitignore b/.gitignore
index 2b2e64f..b3df64e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,105 @@
-*/__pycache__/*
-*.ipynb_checkpoints/*
-*.idea/*
+# Byte-compiled / optimized / DLL files
 __pycache__/
+*.py[cod]
+*$py.class
+.idea/
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 23bffaf..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2018 Emelyanov Anton
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/README.md b/README.md
index 1032cea..c825799 100644
--- a/README.md
+++ b/README.md
@@ -1,155 +1,36 @@
-# PyTorch solution of NER task with Google AI's BERT model
-## 0. Introduction
+## 
+This repository contains solution of NER task based on BERT withot fine-tuning BERT model.
 
-This repository contains solution of NER task based on PyTorch [reimplementation](https://github.com/huggingface/pytorch-pretrained-BERT) of [Google's TensorFlow repository for the BERT model](https://github.com/google-research/bert) that was released together with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova.
+### Papers
+There are two solutions based on this architecture.
+1. [BSNLP 2019 ACL workshop](http://bsnlp.cs.helsinki.fi/shared_task.html): [solution](https://github.com/king-menin/slavic-ner) and [paper](https://arxiv.org/abs/1906.09978) on multilingual shared task.
+2. The second place [solution](https://github.com/king-menin/AGRR-2019) of [Dialogue AGRR-2019](https://github.com/dialogue-evaluation/AGRR-2019) task.
 
-This implementation can load any pre-trained TensorFlow checkpoint for BERT (in particular [Google's pre-trained models](https://github.com/google-research/bert)) and a conversion script is provided (see below).
-
-## 1. Loading a TensorFlow checkpoint (e.g. [Google's pre-trained models](https://github.com/google-research/bert#pre-trained-models))
-
-You can convert any TensorFlow checkpoint for BERT (in particular [the pre-trained models released by Google](https://github.com/google-research/bert#pre-trained-models)) in a PyTorch save file by using the [`convert_tf_checkpoint_to_pytorch.py`](convert_tf_checkpoint_to_pytorch.py) script.
-
-This script takes as input a TensorFlow checkpoint (three files starting with `bert_model.ckpt`) and the associated configuration file (`bert_config.json`), and creates a PyTorch model for this configuration, loads the weights from the TensorFlow checkpoint in the PyTorch model and saves the resulting model in a standard PyTorch save file that can be imported using `torch.load()`.
-
-You only need to run this conversion script **once** to get a PyTorch model. You can then disregard the TensorFlow checkpoint (the three files starting with `bert_model.ckpt`) but be sure to keep the configuration file (`bert_config.json`) and the vocabulary file (`vocab.txt`) as these are needed for the PyTorch model too.
-
-To run this specific conversion script you will need to have TensorFlow and PyTorch installed (`pip install tensorflow`). The rest of the repository only requires PyTorch.
-
-Here is an example of the conversion process for a pre-trained `BERT-Base Uncased` model:
-
-```shell
-export BERT_BASE_DIR=/path/to/bert/multilingual_L-12_H-768_A-12
-
-python3 convert_tf_checkpoint_to_pytorch.py \
-    --tf_checkpoint_path $BERT_BASE_DIR/bert_model.ckpt \
-    --bert_config_file $BERT_BASE_DIR/bert_config.json \
-    --pytorch_dump_path $BERT_BASE_DIR/pytorch_model.bin
-```
-
-You can download Google's pre-trained models for the conversion [here](https://github.com/google-research/bert#pre-trained-models).
-
-There is used the [BERT-Base, Multilingual](https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip) and [BERT-Cased, Multilingual](https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip) (recommended) in this solution.
 
 ## 2. Results
-We didn't search best parametres and obtained the following results for no more than <b>10 epochs</b>.
+We didn't search best parametres and obtained the following results.
 
-### Only NER models
-#### Model: `BertBiLSTMAttnCRF`.
-
-| Dataset | Lang | IOB precision | Span precision | Total spans in test set | Notebook
+| Model | Data set | Dev F1 tok | Dev F1 span | Test F1 tok | Test F1 span
 |-|-|-|-|-|-|
-| [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | ru | <b>0.937</b> | <b>0.883</b> | 4 | [factrueval.ipynb](examples/factrueval.ipynb)
-| [Atis](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data) | en | 0.852 | 0.787 | 65 | [conll-2003.ipynb](examples/conll-2003.ipynb)
-| [Conll-2003](https://github.com/kyzhouhzau/BERT-NER/tree/master/NERdata) | en | <b>0.945</b> | 0.858 | 5 | [atis.ipynb](examples/atis.ipynb)
-
-* Factrueval (f1): 0.9163±0.006, best **0.926**.
-* Atis (f1): 0.882±0.02, best **0.896**
-* Conll-2003 (f1, dev): 0.949±0.002, best **0.951**; 0.892 (f1, test).
-
-#### Model: `BertBiLSTMAttnNMT`.
-
-| Dataset | Lang | IOB precision | Span precision | Total spans in test set | Notebook
+|**OURS**||||||
+| M-BERTCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8598 | 0.7676
+| M-BERTNCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8603 | 0.7783
+| M-BERTBiLSTMCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8780 | 0.8108
+| M-BERTBiLSTMCRF-BIO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8263 | 0.8051
+| M-BERTBiLSTMNCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8594 | 0.7842
+| M-BERTAttnCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8630 | 0.7879
+| M-BERTBiLSTMAttnCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8851 | 0.8244
+| M-BERTBiLSTMAttnNCRF-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | 0.8609 | 0.7869
+| M-BERTBiLSTMAttnNCRF-fit_BERT-IO | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - |  0.8739 | 0.8201
 |-|-|-|-|-|-|
-| [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | ru | 0.925 | 0.827 | 4 | [factrueval-nmt.ipynb](examples/factrueval-nmt.ipynb)
-| [Atis](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data) | en | <b>0.919</b> | <b>0.829</b> | 65 | [atis-nmt.ipynb](examples/atis-nmt.ipynb)
-| [Conll-2003](https://github.com/kyzhouhzau/BERT-NER/tree/master/NERdata) | en | 0.936 | <b>0.900</b> | 5 | [conll-2003-nmt.ipynb](examples/conll-2003-nmt.ipynb)
-
-### Joint Models
-#### Model: `BertBiLSTMAttnCRFJoint`
-
-| Dataset | Lang | IOB precision | Span precision | Clf precision | Total spans in test set | Total classes | Notebook
-|-|-|-|-|-|-|-|-|
-| [Atis](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data) | en | 0.877 | 0.824 | 0.894 | 65 | 17 | [atis-joint.ipynb](examples/atis-joint.ipynb)
-
-#### Model: `BertBiLSTMAttnNMTJoint`
-
-| Dataset | Lang | IOB precision | Span precision | Clf precision | Total spans in test set | Total classes | Notebook
-|-|-|-|-|-|-|-|-|
-| [Atis](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data) | en | 0.913 | 0.820 | 0.888 | 65 | 17 | [atis-joint-nmt.ipynb](examples/atis-joint-nmt.ipynb)
-
-### Comprasion with ELMo model
-We tested `BertBiLSTMCRF`, `BertBiLSTMAttnCRF` and `BertBiLSTMAttnNMT` on russian dataset [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) with freezed `ElmoEmbedder`:
-
-#### Model `BertBiLSTMCRF`:
-
-| Dataset | Lang | IOB precision | Span precision | Total spans in test set | Notebook
-|-|-|-|-|-|-|
-| [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | ru | 0.903 | 0.851 | 4 | [samples.ipynb](examples_elmo/samples.ipynb)
-
-#### Model `BertBiLSTMAttnCRF`:
-
-| Dataset | Lang | IOB precision | Span precision | Total spans in test set | Notebook
-|-|-|-|-|-|-|
-| [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | ru | 0.899 | 0.819 | 4 | [factrueval.ipynb](examples_elmo/factrueval.ipynb)
-
-#### Model `BertBiLSTMAttnNMT`:
-
-| Dataset | Lang | IOB precision | Span precision | Total spans in test set | Notebook
-|-|-|-|-|-|-|
-| [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | ru | 0.902 | 0.752 | 4 | [factrueval-nmt.ipynb](examples_elmo/factrueval.ipynb)
-
-
-## 3. Installation, requirements, test
-
-This code was tested on Python 3.5+. The requirements are:
-
-- PyTorch (>= 0.4.1)
-- tqdm
-- tensorflow (for convertion)
-
-To install the dependencies:
-
-````bash
-pip install -r ./requirements.txt
-````
-
-## PyTorch neural network models
-
-All models are organized as `Encoder`-`Decoder`. `Encoder` is a freezed and <i>weighted</i> (as proposed in [elmo](https://allennlp.org/elmo)) bert output from 12 layers. There are three models that is obtained by using different `Decoder`.
-
-`Encoder`: BertBiLSTM
-
-1. `BertBiLSTMCRF`: `Encoder` + `Decoder` (BiLSTM + CRF)
-2. `BertBiLSTMAttnCRF`: `Encoder` + `Decoder` (BiLSTM + MultiHead Attention + CRF)
-3. `BertBiLSTMAttnNMT`: `Encoder` + `Decoder` (LSTM + Bahdanau Attention - NMT Decode)
-4. `BertBiLSTMAttnCRFJoint`: `Encoder` + `Decoder` (BiLSTM + MultiHead Attention + CRF) + (PoolingLinearClassifier - for classification) - joint model with classification.
-5. `BertBiLSTMAttnNMTJoint`: `Encoder` + `Decoder` (LSTM + Bahdanau Attention - NMT Decode) + (LinearClassifier - for classification) - joint model with classification.
-
-
-## Usage
-
-### 1. Loading data:
-
-```from modules.bert_data import BertNerData as NerData```
-
-```data = NerData.create(train_path, valid_path, vocab_file)```
-
-### 2. Create model:
-
-```from modules.bert_models import BertBiLSTMCRF```
-
-```model = BertBiLSTMCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)```
-
-### 3. Create learner:
-
-```from modules.train import NerLearner```
-
-```learner = NerLearner(model, data, best_model_path="/datadrive/models/factrueval/exp_final.cpt", lr=0.01, clip=1.0, sup_labels=data.id2label[5:], t_total=num_epochs * len(data.train_dl))```
-
-### 4. Learn your NER model:
-
-```learner.fit(2, target_metric='prec')```
-
-### 5. Predict on new data:
-
-```from modules.data.bert_data import get_bert_data_loader_for_predict```
-
-```dl = get_bert_data_loader_for_predict(data_path + "valid.csv", learner)```
-
-```learner.load_model(best_model_path)```
-
-```preds = learner.predict(dl)```
-
-
-* For more detailed instructions of using BERT model see [samples.ipynb](examples/samples.ipynb).
-* For more detailed instructions of using ELMo model see [samples.ipynb](examples_elmo/samples.ipynb).
+| BERTBiLSTMCRF-IO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9624 | 0.9273 | - | -
+| BERTBiLSTMCRF-BIO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9530 | 0.9236 | - | -
+| B-BERTBiLSTMCRF-IO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9635 | 0.9277 | - | -
+| B-BERTBiLSTMCRF-BIO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9536 | 0.9156 | - | -
+| B-BERTBiLSTMAttnCRF-IO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9571 | 0.9114 | - | -
+| B-BERTBiLSTMAttnNCRF-IO | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.9631 | 0.9197 | - | -
+|**Current SOTA**||||||
+| DeepPavlov-RuBERT-NER | [FactRuEval](https://github.com/dialogue-evaluation/factRuEval-2016) | - | - | - | **0.8266**
+| CSE | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | - | - | **0.931** | -
+| BERT-LARGE | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.966 | - | 0.928 | -
+| BERT-BASE | [CoNLL-2003](https://github.com/synalp/NER/tree/master/corpus/CoNLL-2003) | 0.964 | - | 0.924 | -
diff --git a/convert_tf_checkpoint_to_pytorch.py b/convert_tf_checkpoint_to_pytorch.py
deleted file mode 100644
index 9cdc434..0000000
--- a/convert_tf_checkpoint_to_pytorch.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The HugginFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Convert BERT checkpoint."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-import argparse
-import tensorflow as tf
-import torch
-import numpy as np
-
-from modules.layers.bert_modeling import BertConfig, BertModel
-
-parser = argparse.ArgumentParser()
-
-# Required parameters
-parser.add_argument("--tf_checkpoint_path",
-                    default=None,
-                    type=str,
-                    required=True,
-                    help="Path the TensorFlow checkpoint path.")
-parser.add_argument("--bert_config_file",
-                    default=None,
-                    type=str,
-                    required=True,
-                    help="The config json file corresponding to the pre-trained BERT model. \n"
-                         "This specifies the model architecture.")
-parser.add_argument("--pytorch_dump_path",
-                    default=None,
-                    type=str,
-                    required=True,
-                    help="Path to the output PyTorch model.")
-
-args = parser.parse_args()
-
-
-def convert():
-    # Initialise PyTorch model
-    config = BertConfig.from_json_file(args.bert_config_file)
-    model = BertModel(config)
-
-    # Load weights from TF model
-    path = args.tf_checkpoint_path
-    print("Converting TensorFlow checkpoint from {}".format(path))
-
-    init_vars = tf.train.list_variables(path)
-    names = []
-    arrays = []
-    for name, shape in init_vars:
-        print("Loading {} with shape {}".format(name, shape))
-        array = tf.train.load_variable(path, name)
-        print("Numpy array shape {}".format(array.shape))
-        names.append(name)
-        arrays.append(array)
-
-    for name, array in zip(names, arrays):
-        name = name[5:]  # skip "bert/"
-        print("Loading {}".format(name))
-        name = name.split('/')
-        if name[0] in ['redictions', 'eq_relationship']:
-            print("Skipping")
-            continue
-        pointer = model
-        for m_name in name:
-            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
-                l = re.split(r'_(\d+)', m_name)
-            else:
-                l = [m_name]
-            if l[0] == 'kernel':
-                pointer = getattr(pointer, 'weight')
-            else:
-                pointer = getattr(pointer, l[0])
-            if len(l) >= 2:
-                num = int(l[1])
-                pointer = pointer[num]
-        if m_name[-11:] == '_embeddings':
-            pointer = getattr(pointer, 'weight')
-        elif m_name == 'kernel':
-            array = np.transpose(array)
-        try:
-            assert pointer.shape == array.shape
-        except AssertionError as e:
-            e.args += (pointer.shape, array.shape)
-            raise
-        pointer.data = torch.from_numpy(array)
-
-    # Save pytorch-model
-    torch.save(model.state_dict(), args.pytorch_dump_path)
-
-
-if __name__ == "__main__":
-    convert()
diff --git a/examples/__init__.py b/examples/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/examples/atis-joint-nmt.ipynb b/examples/atis-joint-nmt.ipynb
deleted file mode 100644
index 58bfce8..0000000
--- a/examples/atis-joint-nmt.ipynb
+++ /dev/null
@@ -1,504 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Atis joint nmt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download atis dataset from [here](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Run NER model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/datadrive/JointSLU/data/\"\n",
-    "train_path = os.path.join(data_path, \"train_filtered.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_filtered.csv\")\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 0)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(0)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file, data_type=\"bert_uncased\", is_cls=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(9445, 888)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(154, 17)"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.label2idx), len(data.id2cls)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "106"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sup_labels = list(pd.read_csv(\"/datadrive/JointSLU/data/slt_flt.csv\").slots)\n",
-    "len(sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create Ner model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Set params of encoder and decoder as proposed [here](https://arxiv.org/pdf/1609.01454.pdf)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnNMTJoint"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnNMTJoint.create(len(data.label2idx), len(data.cls2idx),\n",
-    "                                      bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "NMTJointDecoder(\n",
-       "  (embedding): Embedding(154, 64)\n",
-       "  (lstm): LSTM(576, 256, batch_first=True)\n",
-       "  (attn): Linear(in_features=256, out_features=256, bias=True)\n",
-       "  (slot_out): Linear(in_features=512, out_features=154, bias=True)\n",
-       "  (loss): CrossEntropyLoss()\n",
-       "  (intent_loss): CrossEntropyLoss()\n",
-       "  (intent_out): Linears(\n",
-       "    (linears): ModuleList(\n",
-       "      (0): Linear(in_features=512, out_features=128, bias=True)\n",
-       "    )\n",
-       "    (output_linear): Linear(in_features=128, out_features=17, bias=True)\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/atis/joint_nmt.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=sup_labels,\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get best results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Get span results for valid ds (where train support > 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "sup_slots = list(pd.read_csv(\"/datadrive/JointSLU/data/sup_slots.csv\").sup_slots)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid_filtered.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds, preds_cls = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "\n",
-    "\n",
-    "clf_report, clf_report_cls = validate_step(\n",
-    "    learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels, learner.data.id2cls)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9129245283018869"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "np.mean([float(line.split()[1]) for line in clf_report.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span mean precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8206811594202899"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "\n",
-    "\n",
-    "clf_report = get_bert_span_report(dl, preds)\n",
-    "np.mean([float(line.split()[1]) for line in clf_report.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Classification mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8878125"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([float(line.split()[1]) for line in clf_report_cls.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/atis-joint.ipynb b/examples/atis-joint.ipynb
deleted file mode 100644
index 468ef83..0000000
--- a/examples/atis-joint.ipynb
+++ /dev/null
@@ -1,708 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Atis example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download atis dataset from [here](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Run NER model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/datadrive/JointSLU/data/\"\n",
-    "train_path = os.path.join(data_path, \"train_filtered.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_filtered.csv\")\n",
-    "model_dir = \"/datadrive/models/multilingual_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = \"/datadrive/models/multilingual_L-12_H-768_A-12/pytorch_model.bin\"\n",
-    "bert_config_file = os.path.join(model_dir, \"bert_config.json\")\n",
-    "vocab_file = os.path.join(model_dir, \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 0)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(0)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file, data_type=\"bert_uncased\", is_cls=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(9445, 888)"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(144, 17)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.label2idx), len(data.id2cls)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "106"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sup_labels = list(pd.read_csv(\"/datadrive/JointSLU/data/slt_flt.csv\").slots)\n",
-    "len(sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create Ner model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Set params of encoder and decoder as proposed [here](https://arxiv.org/pdf/1609.01454.pdf)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnCRFJoint"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnCRFJoint.create(len(data.label2idx), len(data.cls2idx),\n",
-    "                                      bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AttnCRFJointDecoder(\n",
-       "  (attn): MultiHeadAttention(\n",
-       "    (attention): _MultiHeadAttention(\n",
-       "      (attention): ScaledDotProductAttention(\n",
-       "        (softmax): Softmax()\n",
-       "        (dropout): Dropout(p=0.5)\n",
-       "      )\n",
-       "    )\n",
-       "    (proj): Linear(in_features=192, out_features=256, bias=True)\n",
-       "    (dropout): Dropout(p=0.5)\n",
-       "    (layer_norm): LayerNormalization()\n",
-       "  )\n",
-       "  (linear): Linears(\n",
-       "    (linears): ModuleList(\n",
-       "      (0): Linear(in_features=256, out_features=128, bias=True)\n",
-       "    )\n",
-       "    (output_linear): Linear(in_features=128, out_features=144, bias=True)\n",
-       "  )\n",
-       "  (crf): CRF()\n",
-       "  (intent_out): PoolingLinearClassifier(\n",
-       "    (dropout): Dropout(p=0.5)\n",
-       "    (linear): Linears(\n",
-       "      (linears): ModuleList(\n",
-       "        (0): Linear(in_features=768, out_features=128, bias=True)\n",
-       "      )\n",
-       "      (output_linear): Linear(in_features=128, out_features=17, bias=True)\n",
-       "    )\n",
-       "  )\n",
-       "  (intent_loss): CrossEntropyLoss()\n",
-       ")"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/atis/joint.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=sup_labels,\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Resuming train... Current epoch 43.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=591), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 44, average train epoch loss=0.095276\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 10 by max_prec: 0.877\n",
-      "INFO:root:on epoch {} classification report:\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                              precision    recall  f1-score   support\n",
-      "\n",
-      "           B_toloc.city-name      0.971     0.996     0.983       712\n",
-      "                B_round-trip      1.000     0.973     0.986        73\n",
-      "                I_round-trip      1.000     1.000     1.000        71\n",
-      "                         I_O      0.976     0.992     0.984       123\n",
-      "             B_cost-relative      1.000     0.973     0.986        37\n",
-      "               B_fare-amount      1.000     1.000     1.000         2\n",
-      "               I_fare-amount      1.000     1.000     1.000         2\n",
-      "    B_arrive-date.month-name      0.714     0.833     0.769         6\n",
-      "    B_arrive-date.day-number      0.714     0.833     0.769         6\n",
-      "         I_fromloc.city-name      0.967     1.000     0.983       235\n",
-      "         B_stoploc.city-name      1.000     1.000     1.000        20\n",
-      " B_arrive-time.time-relative      0.967     0.935     0.951        31\n",
-      "          B_arrive-time.time      0.892     0.971     0.930        34\n",
-      "          I_arrive-time.time      0.947     0.973     0.960        37\n",
-      "          B_toloc.state-code      1.000     1.000     1.000        18\n",
-      "           I_toloc.city-name      0.963     0.994     0.978       338\n",
-      "         I_stoploc.city-name      1.000     1.000     1.000        14\n",
-      "          B_meal-description      1.000     0.700     0.824        10\n",
-      "    B_depart-date.month-name      0.982     0.964     0.973        56\n",
-      "    B_depart-date.day-number      0.981     0.964     0.972        55\n",
-      "              B_airline-name      0.980     1.000     0.990        96\n",
-      "              I_airline-name      1.000     0.971     0.985        68\n",
-      " B_depart-time.period-of-day      0.975     0.921     0.947       126\n",
-      "      B_depart-date.day-name      0.986     0.990     0.988       210\n",
-      "          B_toloc.state-name      0.839     0.929     0.881        28\n",
-      " B_depart-time.time-relative      0.968     0.968     0.968        63\n",
-      "          B_depart-time.time      0.859     1.000     0.924        55\n",
-      "        B_toloc.airport-name      1.000     1.000     1.000         3\n",
-      "        I_toloc.airport-name      1.000     1.000     1.000         3\n",
-      " B_depart-date.date-relative      0.944     1.000     0.971        17\n",
-      "                        B_or      0.429     1.000     0.600         3\n",
-      "              B_airline-code      0.912     0.912     0.912        34\n",
-      "                B_class-type      0.960     1.000     0.980        24\n",
-      "                I_class-type      1.000     1.000     1.000        17\n",
-      "             I_cost-relative      1.000     0.909     0.952        33\n",
-      "          I_depart-time.time      0.946     1.000     0.972        53\n",
-      "      B_fromloc.airport-name      0.500     1.000     0.667        12\n",
-      "      I_fromloc.airport-name      0.457     1.000     0.627        16\n",
-      "                 B_city-name      0.897     0.456     0.605        57\n",
-      "                B_flight-mod      1.000     1.000     1.000        24\n",
-      "                      B_meal      0.941     1.000     0.970        16\n",
-      "                      I_meal      0.941     1.000     0.970        16\n",
-      "                   B_economy      1.000     1.000     1.000         6\n",
-      "           B_fare-basis-code      0.944     1.000     0.971        17\n",
-      "          I_meal-description      0.000     0.000     0.000         2\n",
-      "           I_fare-basis-code      1.000     1.000     1.000         6\n",
-      "    I_depart-date.day-number      1.000     0.952     0.976        21\n",
-      "B_depart-date.today-relative      0.889     0.889     0.889         9\n",
-      "               B_flight-stop      1.000     1.000     1.000        21\n",
-      "               I_flight-stop      1.000     1.000     1.000        21\n",
-      "              B_airport-code      0.600     0.333     0.429         9\n",
-      "              I_airport-code      0.667     0.444     0.533         9\n",
-      "        B_fromloc.state-name      0.944     1.000     0.971        17\n",
-      "        I_fromloc.state-name      1.000     1.000     1.000         1\n",
-      "                 I_city-name      0.889     0.432     0.582        37\n",
-      "                I_flight-mod      1.000     0.615     0.762        13\n",
-      "                   B_connect      1.000     1.000     1.000         6\n",
-      "      B_arrive-date.day-name      0.846     1.000     0.917        11\n",
-      "        B_fromloc.state-code      0.885     1.000     0.939        23\n",
-      "          B_depart-date.year      1.000     1.000     1.000         3\n",
-      "    B_depart-time.start-time      1.000     1.000     1.000         3\n",
-      "    I_depart-time.start-time      1.000     1.000     1.000         1\n",
-      "      B_depart-time.end-time      1.000     1.000     1.000         3\n",
-      "      I_depart-time.end-time      1.000     1.000     1.000         3\n",
-      "    B_arrive-time.start-time      1.000     0.875     0.933         8\n",
-      "      B_arrive-time.end-time      1.000     0.875     0.933         8\n",
-      "      I_arrive-time.end-time      1.000     0.875     0.933         8\n",
-      " I_depart-time.period-of-day      1.000     0.667     0.800         6\n",
-      "               B_flight-days      1.000     1.000     1.000        10\n",
-      "                       B_mod      1.000     0.500     0.667         2\n",
-      "             B_flight-number      0.833     1.000     0.909        10\n",
-      "          I_toloc.state-name      1.000     1.000     1.000         1\n",
-      "                 B_meal-code      1.000     1.000     1.000         1\n",
-      "              B_airport-name      0.800     0.381     0.516        21\n",
-      "              I_airport-name      0.857     0.400     0.545        30\n",
-      "            B_transport-type      1.000     1.000     1.000        10\n",
-      "            I_transport-type      1.000     0.833     0.909         6\n",
-      "                B_state-code      1.000     1.000     1.000         1\n",
-      "             B_aircraft-code      1.000     0.879     0.935        33\n",
-      "        B_toloc.country-name      1.000     1.000     1.000         1\n",
-      "        B_toloc.airport-code      1.000     1.000     1.000         3\n",
-      "        I_toloc.airport-code      1.000     1.000     1.000         3\n",
-      " B_return-date.date-relative      1.000     0.667     0.800         3\n",
-      " I_return-date.date-relative      1.000     0.667     0.800         3\n",
-      "               B_flight-time      0.500     1.000     0.667         1\n",
-      "      B_fromloc.airport-code      0.800     1.000     0.889         4\n",
-      "      I_fromloc.airport-code      0.500     1.000     0.667         4\n",
-      " B_arrive-time.period-of-day      0.750     1.000     0.857         6\n",
-      "    B_depart-time.period-mod      1.000     1.000     1.000         5\n",
-      "      I_depart-date.day-name      0.333     1.000     0.500         1\n",
-      "             I_flight-number      0.000     0.000     0.000         1\n",
-      "               I_flight-time      1.000     1.000     1.000         1\n",
-      " B_arrive-date.date-relative      1.000     1.000     1.000         2\n",
-      "          B_restriction-code      1.000     1.000     1.000         4\n",
-      "          I_restriction-code      1.000     1.000     1.000         4\n",
-      "             I_aircraft-code      1.000     0.963     0.981        27\n",
-      "             B_period-of-day      1.000     0.333     0.500         3\n",
-      "      I_arrive-date.day-name      0.000     0.000     0.000         1\n",
-      "    I_arrive-time.start-time      1.000     1.000     1.000         1\n",
-      "                 B_days-code      0.000     0.000     0.000         1\n",
-      " I_arrive-time.time-relative      1.000     1.000     1.000         4\n",
-      " I_depart-time.time-relative      0.000     0.000     0.000         1\n",
-      "                  B_day-name      1.000     0.500     0.667         2\n",
-      "                  I_day-name      0.000     0.000     0.000         1\n",
-      "              I_airline-code      0.000     0.000     0.000         2\n",
-      "             I_period-of-day      0.000     0.000     0.000         1\n",
-      "\n",
-      "                   micro avg      0.952     0.948     0.950      3341\n",
-      "                   macro avg      0.855     0.842     0.833      3341\n",
-      "                weighted avg      0.954     0.948     0.945      3341\n",
-      "\n",
-      "                precision    recall  f1-score   support\n",
-      "\n",
-      "  abbreviation      1.000     0.970     0.985        33\n",
-      "      aircraft      0.692     1.000     0.818         9\n",
-      "       airfare      0.923     1.000     0.960        48\n",
-      "       airline      1.000     1.000     1.000        38\n",
-      "       airport      1.000     0.889     0.941        18\n",
-      "      capacity      1.000     1.000     1.000        21\n",
-      "          city      0.600     0.500     0.545         6\n",
-      "      distance      1.000     0.900     0.947        10\n",
-      "        flight      0.984     0.984     0.984       632\n",
-      "flight+airfare      0.833     0.417     0.556        12\n",
-      "     flight_no      1.000     0.875     0.933         8\n",
-      "   flight_time      1.000     1.000     1.000         1\n",
-      "   ground_fare      1.000     0.714     0.833         7\n",
-      "ground_service      0.973     1.000     0.986        36\n",
-      "          meal      0.833     0.833     0.833         6\n",
-      "      quantity      0.429     1.000     0.600         3\n",
-      "   restriction      0.000     0.000     0.000         0\n",
-      "\n",
-      "     micro avg      0.968     0.968     0.968       888\n",
-      "     macro avg      0.839     0.828     0.819       888\n",
-      "  weighted avg      0.972     0.968     0.968       888\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get best results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Get span results for valid ds (where train support > 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "sup_slots = list(pd.read_csv(\"/datadrive/JointSLU/data/sup_slots.csv\").sup_slots)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid_filtered.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds, preds_cls = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "\n",
-    "\n",
-    "clf_report, clf_report_cls = validate_step(\n",
-    "    learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels, learner.data.id2cls)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8768301886792453"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "np.mean([float(line.split()[1]) for line in clf_report.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0 and line.split()[0] in ss])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span mean precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8235797101449275"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "\n",
-    "\n",
-    "clf_report = get_bert_span_report(dl, preds)\n",
-    "np.mean([float(line.split()[1]) for line in clf_report.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Classification mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8943125000000001"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([float(line.split()[1]) for line in clf_report_cls.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/atis-nmt.ipynb b/examples/atis-nmt.ipynb
deleted file mode 100644
index 85722b6..0000000
--- a/examples/atis-nmt.ipynb
+++ /dev/null
@@ -1,545 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Atis nmt evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download atis dataset from [here](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Run NER model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/datadrive/JointSLU/data/\"\n",
-    "train_path = os.path.join(data_path, \"train_filtered.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_filtered.csv\")\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 0)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(0)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file, data_type=\"bert_cased\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(9445, 888)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "154"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.label2idx) #, len(data.cls2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create Ner model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Set params of encoder and decoder as proposed [here](https://arxiv.org/pdf/1609.01454.pdf)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnNMT"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnNMT.create(len(data.label2idx), bert_config_file, init_checkpoint_pt,\n",
-    "                                 enc_hidden_dim=256, dec_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1928231"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sup_labels = list(pd.read_csv(\"/datadrive/JointSLU/data/slt_flt.csv\").slots)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "106"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 250\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/atis/attn_final_nmt.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=sup_labels,\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get best results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Get span results for valid ds (where train support > 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "sup_slots = list(pd.read_csv(\"/datadrive/JointSLU/data/sup_slots.csv\").sup_slots)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid_filtered.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "\n",
-    "\n",
-    "rep = validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9191981132075471"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "np.mean([float(line.split()[1]) for line in rep.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                            precision    recall  f1-score   support\n",
-      "\n",
-      "                       mod      1.000     0.500     0.667         2\n",
-      " return-date.date-relative      1.000     0.333     0.500         3\n",
-      "      arrive-date.day-name      0.714     0.909     0.800        11\n",
-      "                   connect      1.000     1.000     1.000         6\n",
-      "               compartment      0.000     0.000     0.000         1\n",
-      "                   economy      1.000     0.333     0.500         6\n",
-      "           toloc.city-name      0.955     0.980     0.967       712\n",
-      "    depart-time.period-mod      0.833     1.000     0.909         5\n",
-      "                      meal      0.941     1.000     0.970        16\n",
-      "        fromloc.state-name      1.000     1.000     1.000        17\n",
-      "                 days-code      1.000     1.000     1.000         1\n",
-      "              airline-name      0.959     0.979     0.969        96\n",
-      "                  day-name      1.000     0.500     0.667         2\n",
-      "           fare-basis-code      0.941     0.941     0.941        17\n",
-      "      fromloc.airport-code      0.800     1.000     0.889         4\n",
-      "         fromloc.city-name      0.982     0.986     0.984       700\n",
-      "      stoploc.airport-code      0.000     0.000     0.000         1\n",
-      "    arrive-time.start-time      1.000     1.000     1.000         8\n",
-      "                state-name      0.000     0.000     0.000         7\n",
-      "                flight-mod      1.000     1.000     1.000        24\n",
-      "    arrive-date.day-number      0.714     0.833     0.769         6\n",
-      "      return-date.day-name      0.000     0.000     0.000         2\n",
-      "                state-code      1.000     1.000     1.000         1\n",
-      "depart-date.today-relative      1.000     0.889     0.941         9\n",
-      "                 city-name      0.914     0.561     0.696        57\n",
-      " depart-time.time-relative      0.954     0.984     0.969        63\n",
-      "            transport-type      1.000     1.000     1.000        10\n",
-      "                    flight      0.000     0.000     0.000         1\n",
-      "              airport-code      0.571     0.444     0.500         9\n",
-      "         stoploc.city-name      0.909     1.000     0.952        20\n",
-      "             aircraft-code      1.000     0.879     0.935        33\n",
-      "          arrive-time.time      0.914     0.941     0.928        34\n",
-      "        toloc.country-name      1.000     1.000     1.000         1\n",
-      "               flight-time      1.000     1.000     1.000         1\n",
-      " depart-time.period-of-day      0.992     0.929     0.959       126\n",
-      "      arrive-time.end-time      1.000     1.000     1.000         8\n",
-      "          meal-description      1.000     1.000     1.000        10\n",
-      "             flight-number      0.909     1.000     0.952        10\n",
-      "          depart-date.year      0.667     0.667     0.667         3\n",
-      "        toloc.airport-name      1.000     1.000     1.000         3\n",
-      "               flight-stop      1.000     1.000     1.000        21\n",
-      "                         O      0.991     0.993     0.992      5490\n",
-      "      fromloc.airport-name      0.462     1.000     0.632        12\n",
-      "          depart-time.time      0.857     0.982     0.915        55\n",
-      "          toloc.state-code      0.833     0.833     0.833        18\n",
-      "    depart-date.month-name      0.945     0.929     0.937        56\n",
-      "             cost-relative      1.000     0.973     0.986        37\n",
-      "          restriction-code      1.000     1.000     1.000         4\n",
-      "             booking-class      0.000     0.000     0.000         1\n",
-      "              airport-name      0.643     0.429     0.514        21\n",
-      "                round-trip      1.000     0.973     0.986        73\n",
-      " arrive-date.date-relative      0.500     0.500     0.500         2\n",
-      "    depart-date.day-number      0.944     0.927     0.936        55\n",
-      "                 meal-code      1.000     1.000     1.000         1\n",
-      "        toloc.airport-code      1.000     1.000     1.000         3\n",
-      "    arrive-date.month-name      0.714     0.833     0.769         6\n",
-      "        fromloc.state-code      0.958     1.000     0.979        23\n",
-      "              airline-code      0.969     0.912     0.939        34\n",
-      "          toloc.state-name      0.893     0.893     0.893        28\n",
-      "      depart-time.end-time      1.000     1.000     1.000         3\n",
-      "               flight-days      1.000     1.000     1.000        10\n",
-      "    depart-time.start-time      1.000     1.000     1.000         3\n",
-      "                        or      0.600     1.000     0.750         3\n",
-      "      depart-date.day-name      0.990     0.990     0.990       210\n",
-      "               fare-amount      0.500     0.500     0.500         2\n",
-      "                class-type      0.960     1.000     0.980        24\n",
-      "             period-of-day      1.000     0.333     0.500         3\n",
-      " depart-date.date-relative      0.944     1.000     0.971        17\n",
-      " arrive-time.period-of-day      0.750     1.000     0.857         6\n",
-      " arrive-time.time-relative      0.933     0.903     0.918        31\n",
-      "\n",
-      "                 micro avg      0.978     0.978     0.978      8298\n",
-      "                 macro avg      0.829     0.807     0.804      8298\n",
-      "              weighted avg      0.978     0.978     0.977      8298\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "\n",
-    "\n",
-    "clf_report = get_bert_span_report(dl, preds, learner.sup_labels)\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/atis.ipynb b/examples/atis.ipynb
deleted file mode 100644
index ccb0093..0000000
--- a/examples/atis.ipynb
+++ /dev/null
@@ -1,689 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Atis example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download atis dataset from [here](https://github.com/Microsoft/CNTK/tree/master/Examples/LanguageUnderstanding/ATIS/Data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Run NER model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/datadrive/JointSLU/data/\"\n",
-    "train_path = os.path.join(data_path, \"train_filtered.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_filtered.csv\")\n",
-    "model_dir = \"/datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 0)"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(0)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file, data_type=\"bert_cased\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(9445, 888)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "154"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.label2idx) #, len(data.cls2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create Ner model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Set params of encoder and decoder as proposed [here](https://arxiv.org/pdf/1609.01454.pdf)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Create learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Use lr OneCycleScheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 250\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/atis/base_line_final.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get best results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Get span results for valid ds (where train support > 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "sup_slots = list(pd.read_csv(\"/datadrive/JointSLU/data/sup_slots.csv\").sup_slots)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid_filtered.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 371,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "span_df = span_df[[s in sup_slots for s in list(span_df.slots)]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ss = [\"B_\"+s for s in sup_slots] + [\"I_\"+s for s in sup_slots]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=56), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "\n",
-    "\n",
-    "rep = validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8524716981132077"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "np.mean([float(line.split()[1]) for line in rep.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0 and line.split()[0] in ss])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                            precision    recall  f1-score   support\n",
-      "\n",
-      "                         O      0.993     0.992     0.993      5495\n",
-      "          arrive-time.time      0.865     0.941     0.901        34\n",
-      "               flight-stop      1.000     1.000     1.000        21\n",
-      "                  day-name      1.000     0.500     0.667         2\n",
-      "                 days-code      0.000     0.000     0.000         1\n",
-      "                state-name      0.000     0.000     0.000         7\n",
-      "        fromloc.state-name      1.000     1.000     1.000        17\n",
-      "            transport-type      1.000     1.000     1.000        10\n",
-      "                 city-name      0.971     0.579     0.725        57\n",
-      " arrive-date.date-relative      0.500     0.500     0.500         2\n",
-      "        fromloc.state-code      0.920     1.000     0.958        23\n",
-      " arrive-time.time-relative      0.933     0.903     0.918        31\n",
-      "      return-date.day-name      0.000     0.000     0.000         2\n",
-      "          restriction-code      1.000     1.000     1.000         4\n",
-      "                   connect      1.000     1.000     1.000         6\n",
-      "      fromloc.airport-name      0.444     1.000     0.615        12\n",
-      "          toloc.state-name      0.931     0.964     0.947        28\n",
-      " arrive-time.period-of-day      0.750     1.000     0.857         6\n",
-      "                state-code      1.000     1.000     1.000         1\n",
-      "      arrive-date.day-name      0.714     0.909     0.800        11\n",
-      " return-date.date-relative      1.000     0.333     0.500         3\n",
-      "        toloc.airport-name      1.000     1.000     1.000         3\n",
-      "    depart-date.month-name      0.946     0.946     0.946        56\n",
-      "          toloc.state-code      0.833     0.833     0.833        18\n",
-      "         fromloc.city-name      0.983     0.989     0.986       700\n",
-      "    depart-time.start-time      1.000     1.000     1.000         3\n",
-      " depart-date.date-relative      0.944     1.000     0.971        17\n",
-      "              airline-code      0.912     0.912     0.912        34\n",
-      "         stoploc.city-name      1.000     1.000     1.000        20\n",
-      "        toloc.airport-code      1.000     1.000     1.000         3\n",
-      "             aircraft-code      1.000     0.848     0.918        33\n",
-      "               fare-amount      0.500     0.500     0.500         2\n",
-      "                       mod      0.333     0.500     0.400         2\n",
-      "           fare-basis-code      0.941     0.941     0.941        17\n",
-      "      fromloc.airport-code      0.800     1.000     0.889         4\n",
-      "    depart-date.day-number      0.945     0.945     0.945        55\n",
-      "    arrive-date.month-name      0.667     0.667     0.667         6\n",
-      "depart-date.today-relative      1.000     0.889     0.941         9\n",
-      "      stoploc.airport-code      0.000     0.000     0.000         1\n",
-      "      arrive-time.end-time      1.000     0.875     0.933         8\n",
-      "               compartment      0.000     0.000     0.000         1\n",
-      "      depart-time.end-time      1.000     1.000     1.000         3\n",
-      "                      meal      0.941     1.000     0.970        16\n",
-      "               flight-time      1.000     1.000     1.000         1\n",
-      "             period-of-day      1.000     0.333     0.500         3\n",
-      "      depart-date.day-name      0.990     0.981     0.986       210\n",
-      "                 meal-code      0.000     0.000     0.000         1\n",
-      "              airline-name      0.970     1.000     0.985        96\n",
-      "    depart-time.period-mod      1.000     1.000     1.000         5\n",
-      "                flight-mod      0.821     0.958     0.885        24\n",
-      "             cost-relative      1.000     0.973     0.986        37\n",
-      "          depart-date.year      0.667     0.667     0.667         3\n",
-      "                        or      0.500     1.000     0.667         3\n",
-      "              airport-code      0.667     0.444     0.533         9\n",
-      "        toloc.country-name      1.000     1.000     1.000         1\n",
-      "                    flight      0.000     0.000     0.000         1\n",
-      "                round-trip      1.000     0.986     0.993        73\n",
-      "    arrive-time.start-time      1.000     0.875     0.933         8\n",
-      "              airport-name      0.467     0.333     0.389        21\n",
-      " depart-time.period-of-day      0.991     0.921     0.955       126\n",
-      "             booking-class      0.000     0.000     0.000         1\n",
-      "               flight-days      1.000     1.000     1.000        10\n",
-      " depart-time.time-relative      0.969     0.984     0.976        63\n",
-      "             flight-number      0.833     1.000     0.909        10\n",
-      "                class-type      0.960     1.000     0.980        24\n",
-      "           toloc.city-name      0.964     0.986     0.975       712\n",
-      "          depart-time.time      0.831     0.982     0.900        55\n",
-      "                   economy      1.000     1.000     1.000         6\n",
-      "    arrive-date.day-number      0.667     0.667     0.667         6\n",
-      "          meal-description      1.000     1.000     1.000        10\n",
-      "\n",
-      "                 micro avg      0.979     0.979     0.979      8303\n",
-      "                 macro avg      0.787     0.779     0.772      8303\n",
-      "              weighted avg      0.978     0.979     0.978      8303\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "\n",
-    "\n",
-    "clf_report = get_bert_span_report(dl, preds, list(set(learner.data.id2label) - set(ss)))\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get mean and stdv on 10 runs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=591), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import *\n",
-    "from modules import NerLearner\n",
-    "\n",
-    "\n",
-    "num_runs = 10\n",
-    "best_reports = []\n",
-    "for i in range(num_runs):\n",
-    "    model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)\n",
-    "    best_model_path = \"/datadrive/models/atis/exp_{}_attn_cased.cpt\".format(i)\n",
-    "    learner = NerLearner(model, data,\n",
-    "                         best_model_path=best_model_path, verbose=False,\n",
-    "                         base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[5:])\n",
-    "    learner.fit(100, target_metric='prec')\n",
-    "    idx, res = get_mean_max_metric(learner.history, \"f1\", True)\n",
-    "    best_reports.append(learner.history[idx])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_mean_max_metric_(rep, metric_):\n",
-    "    idx = 0\n",
-    "    if metric_ == \"rec\":\n",
-    "        idx = 1\n",
-    "    elif metric_ == \"f1\":\n",
-    "        idx = 2\n",
-    "    idx += 1\n",
-    "    return np.mean([float(line.split()[idx]) for line in rep.split(\"\\n\")[2:-5] if int(line.split()[-1]) > 0 and line.split()[0] in ss])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.8822570175438595, 0.02)"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric_(r, \"f1\") for r in best_reports]), np.round(np.std([get_mean_max_metric_(r, \"f1\") for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.8956666666666666"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.max([get_mean_max_metric_(r, \"f1\") for r in best_reports])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### precision"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.9077385964912281, 0.021)"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric_(r, \"prec\") for r in best_reports]), np.round(np.std([get_mean_max_metric_(r, \"prec\") for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.9281929824561402"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.max([get_mean_max_metric_(r, \"prec\") for r in best_reports])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/conll-2003-nmt.ipynb b/examples/conll-2003-nmt.ipynb
deleted file mode 100644
index fa6599d..0000000
--- a/examples/conll-2003-nmt.ipynb
+++ /dev/null
@@ -1,731 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Conll 2003 nmt evaluation\n",
-    "\n",
-    "Data downloaded from [here](https://github.com/kyzhouhzau/BERT-NER/tree/master/NERdata)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_path = \"/datadrive/conll-2003/\"\n",
-    "\n",
-    "train_path = data_path + \"train.txt\"\n",
-    "dev_path = data_path + \"dev.txt\"\n",
-    "test_path = data_path + \"test.txt\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 0. Prc data for csv format"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import codecs\n",
-    "\n",
-    "\n",
-    "def read_data(input_file):\n",
-    "    \"\"\"Reads a BIO data.\"\"\"\n",
-    "    with codecs.open(input_file, \"r\", encoding=\"utf-8\") as f:\n",
-    "        lines = []\n",
-    "        words = []\n",
-    "        labels = []\n",
-    "        for line in f:\n",
-    "            contends = line.strip()\n",
-    "            word = line.strip().split(' ')[0]\n",
-    "            label = line.strip().split(' ')[-1]\n",
-    "            if contends.startswith(\"-DOCSTART-\"):\n",
-    "                words.append('')\n",
-    "                continue\n",
-    "            \n",
-    "            if len(contends) == 0 and not len(words):\n",
-    "                words.append(\"\")\n",
-    "            \n",
-    "            if len(contends) == 0 and words[-1] == '.':\n",
-    "                l = ' '.join([label for label in labels if len(label) > 0])\n",
-    "                w = ' '.join([word for word in words if len(word) > 0])\n",
-    "                lines.append([l, w])\n",
-    "                words = []\n",
-    "                labels = []\n",
-    "                continue\n",
-    "            words.append(word)\n",
-    "            labels.append(label.replace(\"-\", \"_\"))\n",
-    "        return lines\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_f = read_data(train_path)\n",
-    "dev_f = read_data(dev_path)\n",
-    "test_f = read_data(test_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "[l for l in train_f]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6973, 1739, 1559)"
-      ]
-     },
-     "execution_count": 68,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(train_f), len(dev_f), len(test_f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['B_ORG O B_MISC O O O B_MISC O O',\n",
-       " 'EU rejects German call to boycott British lamb .']"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "train_f[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_df = pd.DataFrame(train_f, columns=[\"0\", \"1\"])\n",
-    "train_df.to_csv(data_path + \"train.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "valid_df = pd.DataFrame(dev_f, columns=[\"0\", \"1\"])\n",
-    "valid_df.to_csv(data_path + \"valid.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_df = pd.DataFrame(test_f, columns=[\"0\", \"1\"])\n",
-    "test_df.to_csv(data_path + \"test.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "data_path = \"/datadrive/conll-2003/\"\n",
-    "train_path = data_path + \"train.csv\"\n",
-    "valid_path = data_path + \"valid.csv\"\n",
-    "test_path = data_path + \"test.csv\"\n",
-    "\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6973, 1739)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['<pad>', '[CLS]', '[SEP]', 'B_ORG', 'B_O', 'I_O', 'B_MISC', 'B_PER', 'I_PER', 'B_LOC', 'I_LOC', 'I_ORG', 'I_MISC']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.id2label)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sup_labels = ['B_ORG', 'B_MISC', 'B_PER', 'I_PER', 'B_LOC', 'I_LOC', 'I_ORG', 'I_MISC']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "424"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max([len(f.labels_ids) for f in data.train_dl.dataset])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnNMT"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnNMT.create(len(data.label2idx), bert_config_file, init_checkpoint_pt,\n",
-    "                                 enc_hidden_dim=128, dec_hidden_dim=128, dec_embedding_dim=16)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "652906"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### TODO: fix bug with len"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create Learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/conll-2003/bilstm_attn_cased.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Start learning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate dev set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.922     0.934     0.928      1282\n",
-      "      B_MISC      0.924     0.892     0.908       905\n",
-      "       B_PER      0.973     0.970     0.972      1686\n",
-      "       I_PER      0.985     0.974     0.980      3488\n",
-      "       B_LOC      0.953     0.958     0.956      1669\n",
-      "       I_LOC      0.956     0.936     0.946      1913\n",
-      "       I_ORG      0.910     0.927     0.918      2129\n",
-      "      I_MISC      0.860     0.838     0.849      1061\n",
-      "\n",
-      "   micro avg      0.946     0.940     0.943     14133\n",
-      "   macro avg      0.936     0.928     0.932     14133\n",
-      "weighted avg      0.946     0.940     0.943     14133\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "        MISC      0.870     0.863     0.866       905\n",
-      "         ORG      0.815     0.836     0.826      1282\n",
-      "         PER      0.930     0.928     0.929      1686\n",
-      "           O      0.990     0.989     0.990     41801\n",
-      "         LOC      0.895     0.904     0.899      1669\n",
-      "\n",
-      "   micro avg      0.977     0.977     0.977     47343\n",
-      "   macro avg      0.900     0.904     0.902     47343\n",
-      "weighted avg      0.978     0.977     0.978     47343\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds, [])\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Evaluate test set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"test.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=98), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.922     0.934     0.928      1282\n",
-      "      B_MISC      0.924     0.892     0.908       905\n",
-      "       B_PER      0.973     0.970     0.972      1686\n",
-      "       I_PER      0.985     0.974     0.980      3488\n",
-      "       B_LOC      0.953     0.958     0.956      1669\n",
-      "       I_LOC      0.956     0.936     0.946      1913\n",
-      "       I_ORG      0.910     0.927     0.918      2129\n",
-      "      I_MISC      0.860     0.838     0.849      1061\n",
-      "\n",
-      "   micro avg      0.946     0.940     0.943     14133\n",
-      "   macro avg      0.936     0.928     0.932     14133\n",
-      "weighted avg      0.946     0.940     0.943     14133\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "        MISC      0.758     0.778     0.768       688\n",
-      "         ORG      0.656     0.683     0.669      1533\n",
-      "         PER      0.864     0.859     0.861      1566\n",
-      "           O      0.980     0.977     0.979     37690\n",
-      "         LOC      0.834     0.851     0.843      1570\n",
-      "\n",
-      "   micro avg      0.955     0.955     0.955     43047\n",
-      "   macro avg      0.818     0.830     0.824     43047\n",
-      "weighted avg      0.955     0.955     0.955     43047\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds, [])\n",
-    "print(clf_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/conll-2003.ipynb b/examples/conll-2003.ipynb
deleted file mode 100644
index ece92e6..0000000
--- a/examples/conll-2003.ipynb
+++ /dev/null
@@ -1,3710 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Conll 2003 evaluation\n",
-    "\n",
-    "Data downloaded from [here](https://github.com/kyzhouhzau/BERT-NER/tree/master/NERdata)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_path = \"/datadrive/conll-2003/\"\n",
-    "\n",
-    "train_path = data_path + \"train.txt\"\n",
-    "dev_path = data_path + \"dev.txt\"\n",
-    "test_path = data_path + \"test.txt\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 0. Prc data for csv format"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import codecs\n",
-    "\n",
-    "\n",
-    "def read_data(input_file):\n",
-    "    \"\"\"Reads a BIO data.\"\"\"\n",
-    "    with codecs.open(input_file, \"r\", encoding=\"utf-8\") as f:\n",
-    "        lines = []\n",
-    "        words = []\n",
-    "        labels = []\n",
-    "        for line in f:\n",
-    "            contends = line.strip()\n",
-    "            word = line.strip().split(' ')[0]\n",
-    "            label = line.strip().split(' ')[-1]\n",
-    "            if contends.startswith(\"-DOCSTART-\"):\n",
-    "                words.append('')\n",
-    "                continue\n",
-    "            \n",
-    "            if len(contends) == 0 and not len(words):\n",
-    "                words.append(\"\")\n",
-    "            \n",
-    "            if len(contends) == 0 and words[-1] == '.':\n",
-    "                l = ' '.join([label for label in labels if len(label) > 0])\n",
-    "                w = ' '.join([word for word in words if len(word) > 0])\n",
-    "                lines.append([l, w])\n",
-    "                words = []\n",
-    "                labels = []\n",
-    "                continue\n",
-    "            words.append(word)\n",
-    "            labels.append(label.replace(\"-\", \"_\"))\n",
-    "        return lines\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_f = read_data(train_path)\n",
-    "dev_f = read_data(dev_path)\n",
-    "test_f = read_data(test_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "[l for l in train_f]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6973, 1739, 1559)"
-      ]
-     },
-     "execution_count": 68,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(train_f), len(dev_f), len(test_f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['B_ORG O B_MISC O O O B_MISC O O',\n",
-       " 'EU rejects German call to boycott British lamb .']"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "train_f[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_df = pd.DataFrame(train_f, columns=[\"0\", \"1\"])\n",
-    "train_df.to_csv(data_path + \"train.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "valid_df = pd.DataFrame(dev_f, columns=[\"0\", \"1\"])\n",
-    "valid_df.to_csv(data_path + \"valid.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_df = pd.DataFrame(test_f, columns=[\"0\", \"1\"])\n",
-    "test_df.to_csv(data_path + \"test.csv\", index=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "data_path = \"/datadrive/conll-2003/\"\n",
-    "train_path = data_path + \"train.csv\"\n",
-    "valid_path = data_path + \"valid.csv\"\n",
-    "test_path = data_path + \"test.csv\"\n",
-    "\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 0)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(0)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=6973), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=1739), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6973, 1739)"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(data.train_dl.dataset), len(data.valid_dl.dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['<pad>', '[CLS]', '[SEP]', 'B_ORG', 'B_O', 'I_O', 'B_MISC', 'B_PER', 'I_PER', 'B_LOC', 'I_LOC', 'I_ORG', 'I_MISC']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.id2label)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sup_labels = ['B_ORG', 'B_MISC', 'B_PER', 'I_PER', 'B_LOC', 'I_LOC', 'I_ORG', 'I_MISC']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "424"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max([len(f.labels_ids) for f in data.train_dl.dataset])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1151739"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### TODO: fix bug with len"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create Learner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/conll-2003/bilstm_attn_cased.cpt\",\n",
-    "                     lr=0.001, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Start learning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate dev set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=1739), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.925     0.949     0.937      1282\n",
-      "      B_MISC      0.934     0.906     0.920       905\n",
-      "       B_PER      0.969     0.984     0.976      1686\n",
-      "       I_PER      0.980     0.983     0.981      3488\n",
-      "       B_LOC      0.970     0.955     0.963      1669\n",
-      "       I_LOC      0.977     0.921     0.948      1913\n",
-      "       I_ORG      0.916     0.936     0.926      2129\n",
-      "      I_MISC      0.917     0.838     0.876      1061\n",
-      "\n",
-      "   micro avg      0.955     0.945     0.950     14133\n",
-      "   macro avg      0.949     0.934     0.941     14133\n",
-      "weighted avg      0.955     0.945     0.950     14133\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "        MISC      0.902     0.886     0.894       905\n",
-      "         LOC      0.925     0.914     0.920      1669\n",
-      "           O      0.992     0.992     0.992     41803\n",
-      "         ORG      0.849     0.875     0.862      1282\n",
-      "         PER      0.936     0.950     0.943      1686\n",
-      "\n",
-      "   micro avg      0.982     0.982     0.982     47345\n",
-      "   macro avg      0.921     0.923     0.922     47345\n",
-      "weighted avg      0.982     0.982     0.982     47345\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds, [])\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Evaluate test set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=1559), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"test.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=98), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=6973), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=1559), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "data = NerData.create(train_path, data_path + \"test.csv\", vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=98), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.874     0.906     0.890      1533\n",
-      "      B_MISC      0.824     0.830     0.827       688\n",
-      "       B_PER      0.961     0.964     0.962      1566\n",
-      "       I_PER      0.964     0.972     0.968      3347\n",
-      "       B_LOC      0.931     0.917     0.924      1570\n",
-      "       I_LOC      0.904     0.862     0.883      1444\n",
-      "       I_ORG      0.865     0.922     0.893      2546\n",
-      "      I_MISC      0.630     0.651     0.640       839\n",
-      "\n",
-      "   micro avg      0.896     0.909     0.902     13533\n",
-      "   macro avg      0.869     0.878     0.873     13533\n",
-      "weighted avg      0.897     0.909     0.903     13533\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "        MISC      0.759     0.794     0.776       688\n",
-      "         LOC      0.865     0.852     0.859      1570\n",
-      "           O      0.979     0.977     0.978     37693\n",
-      "         ORG      0.635     0.665     0.650      1533\n",
-      "         PER      0.878     0.879     0.878      1566\n",
-      "\n",
-      "   micro avg      0.955     0.955     0.955     43050\n",
-      "   macro avg      0.823     0.833     0.828     43050\n",
-      "weighted avg      0.955     0.955     0.955     43050\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds, [])\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 7. Get mean and stdv on 10 runs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009988594480391467\n",
-      "INFO:root:\n",
-      "epoch 1, average train epoch loss=8.9381\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009954377770788668\n",
-      "INFO:root:\n",
-      "epoch 2, average train epoch loss=2.6673\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009897506632721709\n",
-      "INFO:root:\n",
-      "epoch 3, average train epoch loss=1.5642\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009818136929810007\n",
-      "INFO:root:\n",
-      "epoch 4, average train epoch loss=0.95785\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009829645918699543\n",
-      "INFO:root:\n",
-      "epoch 5, average train epoch loss=0.67193\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009863965743218892\n",
-      "INFO:root:\n",
-      "epoch 6, average train epoch loss=0.45879\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009920732813267874\n",
-      "INFO:root:\n",
-      "epoch 7, average train epoch loss=0.34029\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.00099997931621759\n",
-      "INFO:root:\n",
-      "epoch 8, average train epoch loss=0.29414\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009899006236929822\n",
-      "INFO:root:\n",
-      "epoch 9, average train epoch loss=0.2418\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009775817476504662\n",
-      "INFO:root:\n",
-      "epoch 10, average train epoch loss=0.19245\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009630791719937361\n",
-      "INFO:root:\n",
-      "epoch 11, average train epoch loss=0.19405\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009464079206871408\n",
-      "INFO:root:\n",
-      "epoch 12, average train epoch loss=0.13536\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009564965809716738\n",
-      "INFO:root:\n",
-      "epoch 13, average train epoch loss=0.13186\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009688253964396686\n",
-      "INFO:root:\n",
-      "epoch 14, average train epoch loss=0.12742\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009833179407998218\n",
-      "INFO:root:\n",
-      "epoch 15, average train epoch loss=0.11116\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009999593729379682\n",
-      "INFO:root:\n",
-      "epoch 16, average train epoch loss=0.11768\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009812650576713183\n",
-      "INFO:root:\n",
-      "epoch 17, average train epoch loss=0.11684\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009603700114833495\n",
-      "INFO:root:\n",
-      "epoch 18, average train epoch loss=0.11418\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009373700593990951\n",
-      "INFO:root:\n",
-      "epoch 19, average train epoch loss=0.10928\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009122796832781196\n",
-      "INFO:root:\n",
-      "epoch 20, average train epoch loss=0.070534\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009309836066412781\n",
-      "INFO:root:\n",
-      "epoch 21, average train epoch loss=0.087153\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009518882336136918\n",
-      "INFO:root:\n",
-      "epoch 22, average train epoch loss=0.075947\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009748785163573578\n",
-      "INFO:root:\n",
-      "epoch 23, average train epoch loss=0.10177\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009999401492651726\n",
-      "INFO:root:\n",
-      "epoch 24, average train epoch loss=0.10823\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009729410859499124\n",
-      "INFO:root:\n",
-      "epoch 25, average train epoch loss=0.080489\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009437793207548021\n",
-      "INFO:root:\n",
-      "epoch 26, average train epoch loss=0.063801\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.000912588600293419\n",
-      "INFO:root:\n",
-      "epoch 27, average train epoch loss=0.058319\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008793828838811322\n",
-      "INFO:root:\n",
-      "epoch 28, average train epoch loss=0.059022\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009063912086071017\n",
-      "INFO:root:\n",
-      "epoch 29, average train epoch loss=0.045475\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.00093556220888635\n",
-      "INFO:root:\n",
-      "epoch 30, average train epoch loss=0.059438\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009667436089027941\n",
-      "INFO:root:\n",
-      "epoch 31, average train epoch loss=0.059907\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009999216192338662\n",
-      "INFO:root:\n",
-      "epoch 32, average train epoch loss=0.051479\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009649174653736884\n",
-      "INFO:root:\n",
-      "epoch 33, average train epoch loss=0.056105\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009277872664856822\n",
-      "INFO:root:\n",
-      "epoch 34, average train epoch loss=0.053368\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008887013224597101\n",
-      "INFO:root:\n",
-      "epoch 35, average train epoch loss=0.064154\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008476730889217369\n",
-      "INFO:root:\n",
-      "epoch 36, average train epoch loss=0.040207\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008826861700161747\n",
-      "INFO:root:\n",
-      "epoch 37, average train epoch loss=0.050739\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009198252707618027\n",
-      "INFO:root:\n",
-      "epoch 38, average train epoch loss=0.028476\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009589022306493978\n",
-      "INFO:root:\n",
-      "epoch 39, average train epoch loss=0.079055\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009999037578156096\n",
-      "INFO:root:\n",
-      "epoch 40, average train epoch loss=0.054015\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009571833584706517\n",
-      "INFO:root:\n",
-      "epoch 41, average train epoch loss=0.040161\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009123722482727018\n",
-      "INFO:root:\n",
-      "epoch 42, average train epoch loss=0.048201\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008656759614480127\n",
-      "INFO:root:\n",
-      "epoch 43, average train epoch loss=0.041866\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008171074681076045\n",
-      "INFO:root:\n",
-      "epoch 44, average train epoch loss=0.050747\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008598364725683528\n",
-      "INFO:root:\n",
-      "epoch 45, average train epoch loss=0.025911\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009046561634211117\n",
-      "INFO:root:\n",
-      "epoch 46, average train epoch loss=0.028397\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009513437902791463\n",
-      "INFO:root:\n",
-      "epoch 47, average train epoch loss=0.031361\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.000999886540885058\n",
-      "INFO:root:\n",
-      "epoch 48, average train epoch loss=0.034734\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009497283188137605\n",
-      "INFO:root:\n",
-      "epoch 49, average train epoch loss=0.02559\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008975134451128505\n",
-      "INFO:root:\n",
-      "epoch 50, average train epoch loss=0.029419\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008434814169959513\n",
-      "INFO:root:\n",
-      "epoch 51, average train epoch loss=0.035056\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007876447365778154\n",
-      "INFO:root:\n",
-      "epoch 52, average train epoch loss=0.025335\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008378112532693288\n",
-      "INFO:root:\n",
-      "epoch 53, average train epoch loss=0.038271\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008900343980120909\n",
-      "INFO:root:\n",
-      "epoch 54, average train epoch loss=0.026468\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009440580786370757\n",
-      "INFO:root:\n",
-      "epoch 55, average train epoch loss=0.019331\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009998699451873722\n",
-      "INFO:root:\n",
-      "epoch 56, average train epoch loss=0.027795\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.000942542276910978\n",
-      "INFO:root:\n",
-      "epoch 57, average train epoch loss=0.025859\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008831907872805493\n",
-      "INFO:root:\n",
-      "epoch 58, average train epoch loss=0.020375\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008220877110217311\n",
-      "INFO:root:\n",
-      "epoch 59, average train epoch loss=0.017998\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007592450991395642\n",
-      "INFO:root:\n",
-      "epoch 60, average train epoch loss=0.05226\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008165807627441099\n",
-      "INFO:root:\n",
-      "epoch 61, average train epoch loss=0.028018\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008759402249750965\n",
-      "INFO:root:\n",
-      "epoch 62, average train epoch loss=0.016244\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009370352549418169\n",
-      "INFO:root:\n",
-      "epoch 63, average train epoch loss=0.010752\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.00099985394830681\n",
-      "INFO:root:\n",
-      "epoch 64, average train epoch loss=0.023369\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009356155266044499\n",
-      "INFO:root:\n",
-      "epoch 65, average train epoch loss=0.18327\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008693849292195535\n",
-      "INFO:root:\n",
-      "epoch 66, average train epoch loss=0.15579\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008014659471328631\n",
-      "INFO:root:\n",
-      "epoch 67, average train epoch loss=0.02635\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007318701965169524\n",
-      "INFO:root:\n",
-      "epoch 68, average train epoch loss=0.014648\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007961163250546575\n",
-      "INFO:root:\n",
-      "epoch 69, average train epoch loss=0.013891\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008623546073673774\n",
-      "INFO:root:\n",
-      "epoch 70, average train epoch loss=0.01578\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009302658334936922\n",
-      "INFO:root:\n",
-      "epoch 71, average train epoch loss=0.015194\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009998385286364502\n",
-      "INFO:root:\n",
-      "epoch 72, average train epoch loss=0.02194\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009289387119604383\n",
-      "INFO:root:\n",
-      "epoch 73, average train epoch loss=0.018623\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008560772234129887\n",
-      "INFO:root:\n",
-      "epoch 74, average train epoch loss=0.026317\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007815882715959252\n",
-      "INFO:root:\n",
-      "epoch 75, average train epoch loss=0.019105\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007054830535392704\n",
-      "INFO:root:\n",
-      "epoch 76, average train epoch loss=0.009021\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007763902989674113\n",
-      "INFO:root:\n",
-      "epoch 77, average train epoch loss=0.013462\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008492591951499509\n",
-      "INFO:root:\n",
-      "epoch 78, average train epoch loss=0.014465\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009237406708624169\n",
-      "INFO:root:\n",
-      "epoch 79, average train epoch loss=0.015538\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.000999823665349005\n",
-      "INFO:root:\n",
-      "epoch 80, average train epoch loss=0.024188\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009225028146322963\n",
-      "INFO:root:\n",
-      "epoch 81, average train epoch loss=0.02295\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008432496951962235\n",
-      "INFO:root:\n",
-      "epoch 82, average train epoch loss=0.033862\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007624278357146333\n",
-      "INFO:root:\n",
-      "epoch 83, average train epoch loss=0.016722\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0006800480291987818\n",
-      "INFO:root:\n",
-      "epoch 84, average train epoch loss=0.14866\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007573760406183898\n",
-      "INFO:root:\n",
-      "epoch 85, average train epoch loss=0.0059774\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008366363004022818\n",
-      "INFO:root:\n",
-      "epoch 86, average train epoch loss=0.008101\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009174509535371078\n",
-      "INFO:root:\n",
-      "epoch 87, average train epoch loss=0.017693\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009998093383686922\n",
-      "INFO:root:\n",
-      "epoch 88, average train epoch loss=0.018855\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0009162991416794254\n",
-      "INFO:root:\n",
-      "epoch 89, average train epoch loss=0.019623\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0008308850184785628\n",
-      "INFO:root:\n",
-      "epoch 90, average train epoch loss=0.022019\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.0007439587595654187\n",
-      "INFO:root:\n",
-      "epoch 91, average train epoch loss=0.017575\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=109), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=436), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import *\n",
-    "from modules import NerLearner\n",
-    "\n",
-    "\n",
-    "num_runs = 10\n",
-    "best_reports = []\n",
-    "num_epochs = 100\n",
-    "for i in range(num_runs):\n",
-    "    model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)\n",
-    "    best_model_path = \"/datadrive/models/conll-2003/exp_{}_attn_cased.cpt\".format(i)\n",
-    "    learner = NerLearner(model, data,\n",
-    "                         best_model_path=best_model_path, verbose=False,\n",
-    "                         lr=0.001, clip=5.0, sup_labels=data.id2label[5:], t_total=num_epochs * len(data.train_dl))\n",
-    "    learner.fit(num_epochs, target_metric='f1')\n",
-    "    idx, res = get_mean_max_metric(learner.history, \"f1\", True)\n",
-    "    best_reports.append(learner.history[idx])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### f1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.949, 0.002)"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.951"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### precision"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.9558333333333332, 0.002)"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r], \"prec\") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], \"prec\") for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.959"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports, \"prec\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Test set"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "idx = np.array([get_mean_max_metric([r]) for r in best_reports]).argmax()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model(\"/datadrive/models/conll-2003/exp_{}_attn_cased.cpt\".format(idx))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"test.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=98), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         I_O      0.968     0.972     0.970     10257\n",
-      "      B_MISC      0.873     0.817     0.844       688\n",
-      "       B_PER      0.961     0.963     0.962      1566\n",
-      "       I_PER      0.970     0.971     0.970      3347\n",
-      "       B_LOC      0.932     0.926     0.929      1570\n",
-      "       I_LOC      0.904     0.870     0.887      1444\n",
-      "       I_ORG      0.888     0.922     0.905      2546\n",
-      "      I_MISC      0.746     0.611     0.672       839\n",
-      "\n",
-      "   micro avg      0.942     0.937     0.940     22257\n",
-      "   macro avg      0.905     0.882     0.892     22257\n",
-      "weighted avg      0.941     0.937     0.939     22257\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/factrueval-nmt.ipynb b/examples/factrueval-nmt.ipynb
deleted file mode 100644
index 53fe637..0000000
--- a/examples/factrueval-nmt.ipynb
+++ /dev/null
@@ -1,423 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval nmt evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create dataloaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '[CLS]': 1, '[SEP]': 2, 'B_O': 3, 'I_O': 4, 'B_ORG': 5, 'I_ORG': 6, 'B_LOC': 7, 'I_LOC': 8, 'B_PER': 9, 'I_PER': 10}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnNMT"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnNMT.create(len(data.label2idx), bert_config_file, init_checkpoint_pt,\n",
-    "                                 enc_hidden_dim=128, dec_hidden_dim=128, dec_embedding_dim=16)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "NMTDecoder(\n",
-       "  (embedding): Embedding(11, 16)\n",
-       "  (lstm): LSTM(272, 128, batch_first=True)\n",
-       "  (attn): Linear(in_features=128, out_features=128, bias=True)\n",
-       "  (slot_out): Linear(in_features=256, out_features=11, bias=True)\n",
-       "  (loss): CrossEntropyLoss()\n",
-       ")"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "652360"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/final_attn_cased_nmt.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid_with_pos.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.870     0.776     0.820       259\n",
-      "       I_ORG      0.937     0.775     0.848      1000\n",
-      "       B_LOC      0.914     0.880     0.897       192\n",
-      "       I_LOC      0.894     0.835     0.863       303\n",
-      "       B_PER      0.958     0.979     0.968       188\n",
-      "       I_PER      0.974     0.978     0.976       649\n",
-      "\n",
-      "   micro avg      0.935     0.856     0.894      2591\n",
-      "   macro avg      0.925     0.871     0.896      2591\n",
-      "weighted avg      0.934     0.856     0.892      2591\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         LOC      0.840     0.823     0.832       192\n",
-      "         PER      0.870     0.888     0.879       188\n",
-      "         ORG      0.770     0.726     0.748       259\n",
-      "\n",
-      "   micro avg      0.822     0.803     0.812       639\n",
-      "   macro avg      0.827     0.812     0.819       639\n",
-      "weighted avg      0.821     0.803     0.811       639\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/factrueval.ipynb b/examples/factrueval.ipynb
deleted file mode 100644
index e785964..0000000
--- a/examples/factrueval.ipynb
+++ /dev/null
@@ -1,790 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval example (Cased model), MutiHeadAttention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create dataloaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=3728), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=415), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '[CLS]': 1, '[SEP]': 2, 'B_O': 3, 'I_O': 4, 'B_ORG': 5, 'I_ORG': 6, 'B_LOC': 7, 'I_LOC': 8, 'B_PER': 9, 'I_PER': 10}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.bert_models import BertBiLSTMAttnCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AttnCRFDecoder(\n",
-       "  (attn): MultiHeadAttention(\n",
-       "    (attention): _MultiHeadAttention(\n",
-       "      (attention): ScaledDotProductAttention(\n",
-       "        (softmax): Softmax()\n",
-       "        (dropout): Dropout(p=0.5)\n",
-       "      )\n",
-       "    )\n",
-       "    (proj): Linear(in_features=192, out_features=256, bias=True)\n",
-       "    (dropout): Dropout(p=0.5)\n",
-       "    (layer_norm): LayerNormalization()\n",
-       "  )\n",
-       "  (linear): Linears(\n",
-       "    (linears): ModuleList(\n",
-       "      (0): Linear(in_features=256, out_features=128, bias=True)\n",
-       "    )\n",
-       "    (output_linear): Linear(in_features=128, out_features=11, bias=True)\n",
-       "  )\n",
-       "  (crf): CRF()\n",
-       ")"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1151425"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/exp_final_attn_cased1.cpt\",\n",
-    "                     lr=0.001, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=415), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.846     0.869     0.857       259\n",
-      "       I_ORG      0.935     0.855     0.893      1000\n",
-      "       B_LOC      0.926     0.911     0.919       192\n",
-      "       I_LOC      0.909     0.861     0.885       303\n",
-      "       B_PER      0.969     0.984     0.976       188\n",
-      "       I_PER      0.982     0.983     0.982       649\n",
-      "\n",
-      "   micro avg      0.937     0.903     0.919      2591\n",
-      "   macro avg      0.928     0.911     0.919      2591\n",
-      "weighted avg      0.937     0.903     0.919      2591\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Tokens report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn_crfsuite.metrics import flat_classification_report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.utils import bert_labels2tokens"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
-    "true_tokens, true_labels = bert_labels2tokens(dl, [x.labels for x in dl.dataset])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "assert pred_tokens == true_tokens\n",
-    "tokens_report = flat_classification_report(true_labels, pred_labels)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       I_LOC       0.93      0.90      0.92       230\n",
-      "         I_O       0.99      0.99      0.99      7203\n",
-      "       I_ORG       0.92      0.87      0.89       543\n",
-      "       I_PER       0.98      0.98      0.98       321\n",
-      "\n",
-      "   micro avg       0.98      0.98      0.98      8297\n",
-      "   macro avg       0.96      0.94      0.95      8297\n",
-      "weighted avg       0.98      0.98      0.98      8297\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(tokens_report)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 133,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.plot_metrics import analyze_bert_errors"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 134,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "res_tokens, res_labels, errors = analyze_bert_errors(dl, preds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 136,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "88"
-      ]
-     },
-     "execution_count": 136,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len([error for error in errors if error])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 79,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.utils import voting_choicer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 80,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         ORG      0.809     0.834     0.821       259\n",
-      "         LOC      0.851     0.859     0.855       192\n",
-      "         PER      0.936     0.936     0.936       188\n",
-      "\n",
-      "   micro avg      0.858     0.872     0.865       639\n",
-      "   macro avg      0.865     0.877     0.871       639\n",
-      "weighted avg      0.859     0.872     0.865       639\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Get mean and stdv on 10 runs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.plot_metrics import *\n",
-    "\n",
-    "\n",
-    "num_runs = 10\n",
-    "best_reports = []\n",
-    "try:\n",
-    "    for i in range(num_runs):\n",
-    "        model = BertBiLSTMAttnCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)\n",
-    "        best_model_path = \"/datadrive/models/factrueval/exp_{}_attn_cased.cpt\".format(i)\n",
-    "        learner = NerLearner(model, data,\n",
-    "                             best_model_path=best_model_path, verbose=False,\n",
-    "                             base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[5:])\n",
-    "        learner.fit(100, target_metric='prec')\n",
-    "        idx, res = get_mean_max_metric(learner.history, \"f1\", True)\n",
-    "        best_reports.append(learner.history[idx])\n",
-    "except KeyboardInterrupt:\n",
-    "    print(\"End of exp\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### f1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.9163, 0.006)"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.926"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### precision"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.9253000000000002, 0.007)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r], \"prec\") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], \"prec\") for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.934"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports, \"prec\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/samples.ipynb b/examples/samples.ipynb
deleted file mode 100644
index 8cb408a..0000000
--- a/examples/samples.ipynb
+++ /dev/null
@@ -1,1019 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval example (uncased model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 0. Download pretrained bert model\n",
-    "Download pretrained bert [uncased](https://storage.googleapis.com/bert_models/2018_11_03/multilingual_L-12_H-768_A-12.zip), [cased](https://storage.googleapis.com/bert_models/2018_11_23/multi_cased_L-12_H-768_A-12.zip) (recommended) and unzip.\n",
-    "\n",
-    "Use the following code for convert tensorflow model to pytorch:\n",
-    "\n",
-    "\n",
-    "```export BERT_BASE_DIR=/path/to/bert/multilingual_L-12_H-768_A-12```\n",
-    "\n",
-    "```python3 convert_tf_checkpoint_to_pytorch.py \\```\n",
-    "\n",
-    "```  --tf_checkpoint_path $BERT_BASE_DIR/bert_model.ckpt \\ ```\n",
-    "\n",
-    "```  --bert_config_file $BERT_BASE_DIR/bert_config.json \\```\n",
-    "\n",
-    "```  --pytorch_dump_path $BERT_BASE_DIR/pytorch_model.bin```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \"/datadrive/models/multilingual_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = \"/datadrive/models/multilingual_L-12_H-768_A-12/pytorch_model.bin\"\n",
-    "bert_config_file = os.path.join(model_dir, \"bert_config.json\")\n",
-    "vocab_file = os.path.join(model_dir, \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Data preparation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Data train and validation should be presented in the following format."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>3</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>O O O O O O O O O O O O O O O O O O O O</td>\n",
-       "      <td>Мифология солнцеворота , собственно , и сводит...</td>\n",
-       "      <td>NOUN NOUN PNCT ADVB PNCT CONJ VERB PREP NOUN N...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>O O O O O O B_ORG I_ORG O B_ORG I_ORG O O O O ...</td>\n",
-       "      <td>По его словам , с покупкой Caramba TV « СТС Ме...</td>\n",
-       "      <td>PREP NPRO NOUN PNCT PREP NOUN &lt;unk&gt; &lt;unk&gt; PNCT...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>O O O O O O O O O O O O O O O O B_LOC O</td>\n",
-       "      <td>Такое десятилетие , по его словам « необходимо...</td>\n",
-       "      <td>ADJF NOUN PNCT PREP NPRO NOUN PNCT ADJS ADJF P...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>O O O O O O O O O O O O O O</td>\n",
-       "      <td>Правительство уволило часть врачей , обвинив и...</td>\n",
-       "      <td>NOUN VERB NOUN NOUN PNCT GRND NPRO PREP NOUN N...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>O O O B_PER I_PER O O O O O O B_ORG I_ORG I_OR...</td>\n",
-       "      <td>Министр сельского хозяйства Николай Федоров пр...</td>\n",
-       "      <td>NOUN ADJF NOUN NOUN NOUN VERB PNCT CONJ PRTF V...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                   0  \\\n",
-       "0            O O O O O O O O O O O O O O O O O O O O   \n",
-       "1  O O O O O O B_ORG I_ORG O B_ORG I_ORG O O O O ...   \n",
-       "2            O O O O O O O O O O O O O O O O B_LOC O   \n",
-       "3                        O O O O O O O O O O O O O O   \n",
-       "4  O O O B_PER I_PER O O O O O O B_ORG I_ORG I_OR...   \n",
-       "\n",
-       "                                                   1  \\\n",
-       "0  Мифология солнцеворота , собственно , и сводит...   \n",
-       "1  По его словам , с покупкой Caramba TV « СТС Ме...   \n",
-       "2  Такое десятилетие , по его словам « необходимо...   \n",
-       "3  Правительство уволило часть врачей , обвинив и...   \n",
-       "4  Министр сельского хозяйства Николай Федоров пр...   \n",
-       "\n",
-       "                                                   3  \n",
-       "0  NOUN NOUN PNCT ADVB PNCT CONJ VERB PREP NOUN N...  \n",
-       "1  PREP NPRO NOUN PNCT PREP NOUN <unk> <unk> PNCT...  \n",
-       "2  ADJF NOUN PNCT PREP NPRO NOUN PNCT ADJS ADJF P...  \n",
-       "3  NOUN VERB NOUN NOUN PNCT GRND NPRO PREP NOUN N...  \n",
-       "4  NOUN ADJF NOUN NOUN NOUN VERB PNCT CONJ PRTF V...  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "df = pd.read_csv(train_path)\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Train and valid .csv files must have columns named (0, 1). Column 3 is't necessary (does not used now).\n",
-    "* Column 0 contains labels in IOB format.\n",
-    "* Column 1 contains tokenized and separated (by whitespace) text.\n",
-    "\n",
-    "For using data in model we need to create `NerData` object.\n",
-    "\n",
-    "* `train_path` - path to train .csv file\n",
-    "* `valid_path` - path to valid .csv file\n",
-    "* `vocab_file` - path to google bert pretrained vocab\n",
-    "* `batch_size` - batch size (default `16`)\n",
-    "* `cuda` - using cuda or cpu (default `True`)\n",
-    "* `is_cls` - create data for joint model (default `False`)\n",
-    "* `data_type` - type of input embeddings (default `bert`)\n",
-    "* `max_seq_len` - max sequence len for BERT tokens (default `424`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=3728), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=415), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '[CLS]': 1, '[SEP]': 2, 'B_O': 3, 'I_O': 4, 'B_ORG': 5, 'I_ORG': 6, 'B_LOC': 7, 'B_PER': 8, 'I_PER': 9, 'I_LOC': 10}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object.\n",
-    "\n",
-    "* `label_size` - number of labels: `len(data.label2idx)`,\n",
-    "\n",
-    "BertEmbedder params\n",
-    "* `bert_config_file` - path to google bert pretrained config\n",
-    "* `init_checkpoint_pt` - path to google bert pretrained weights\n",
-    "* `embedding_dim` - output dim from bert model (default `768`)\n",
-    "* `bert_mode` - mode of how bert output will be returned. If `last` return the output of last layer. If `weighted` return weighted sum of all bert output layers, weights are learnable (aka ELMO).\n",
-    "* `freeze` - freezing bert model (default `True`)\n",
-    "\n",
-    "BertBiLSTMEncoder params\n",
-    "* `enc_hidden_dim` - dim of rnn layer or hidden layer (default `128`)\n",
-    "* `rnn_layers` - number of rnn layers in encoder\n",
-    "\n",
-    "CRFDecoder params\n",
-    "* `input_dropout` - dropout param (default `0.5`),\n",
-    "\n",
-    "Gpu or cpu:\n",
-    "* `use_cuda` - use cuda or cpu (default `True`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models import BertBiLSTMCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = BertBiLSTMCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=128, freeze=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "436161"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object.\n",
-    "\n",
-    "* `model: NerModel` - pytorch model\n",
-    "* `data: NerData` - train and valid dataloaders\n",
-    "* `best_model_path` - path for store best model\n",
-    "* `lr` - starting learning rate (default `0.001`)\n",
-    "* `betas` - params for default optimizer (default `[0.8, 0.9]`)\n",
-    "* `clip` - grad clipping (default `5`)\n",
-    "* `verbose` - printing to console reports (default `True`)\n",
-    "* `sup_labels` - list of supported labels for calculating `target_metric` metric. For FactRuEval use: `['B_LOC', 'I_LOC', 'B_ORG', 'I_ORG', 'B_PER', 'I_PER']` (default `None`)\n",
-    "* `t_total` - total optimization steps, used for lr scheduler, if -1, don't scale lr after batch iteration (default `-1`), usally t_total = num_epochs * train_size / batch_size\n",
-    "* `warmup` - portion of t_total for the warmup, -1  means no warmup (default `0.1`)\n",
-    "* `weight_decay` - weight decay (default `0.01`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_epochs = 1\n",
-    "\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/exp_final.cpt\",\n",
-    "                     lr=0.001, clip=5.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`\n",
-    "* `epochs` - number of train iterations (default `100`)\n",
-    "* `resume_history` - resuming appending results to history or create new (default `True`)\n",
-    "* `target_metric` - mean metric, that want you see to pick best epochs (default `f1`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Resuming train... Current epoch 0.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 1, average train epoch loss=14.153\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 0 by max_f1: 0.342\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.435     0.181     0.256       259\n",
-      "       I_ORG      0.657     0.458     0.540       506\n",
-      "       B_LOC      0.320     0.286     0.302       192\n",
-      "       B_PER      0.407     0.314     0.354       188\n",
-      "       I_PER      0.527     0.640     0.578       136\n",
-      "       I_LOC      0.500     0.012     0.023        84\n",
-      "\n",
-      "   micro avg      0.509     0.352     0.416      1365\n",
-      "   macro avg      0.474     0.315     0.342      1365\n",
-      "weighted avg      0.511     0.352     0.399      1365\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "learner.fit(num_epochs, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Fit for the best model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs - 1, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Predict on new data\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 269,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 270,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load our best model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 296,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Call predict from learner."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 297,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Transform predictions to tokens and spans"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 298,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['Сделка', 'состоится', ',', 'если', 'будет', 'одобрена', 'регуляторами', ',', 'из-за', 'которых', 'в', 'начале', 'года', 'сорвалось', 'слияние', 'NYSE', 'Euronext', 'с', 'Deutsche', 'Börse']\n",
-      "['B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_O', 'B_ORG', 'I_ORG', 'B_O', 'B_ORG', 'I_ORG']\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.utils import bert_labels2tokens, tokens2spans\n",
-    "\n",
-    "\n",
-    "tp, lp = bert_labels2tokens(dl, preds)\n",
-    "print(tp[0])\n",
-    "print(lp[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 299,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sp = tokens2spans(tp, lp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 300,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[('Сделка', 'O'), ('состоится', 'O'), (',', 'O'), ('если', 'O'), ('будет', 'O'), ('одобрена', 'O'), ('регуляторами', 'O'), (',', 'O'), ('из-за', 'O'), ('которых', 'O'), ('в', 'O'), ('начале', 'O'), ('года', 'O'), ('сорвалось', 'O'), ('слияние', 'O'), ('NYSE Euronext', 'ORG'), ('с', 'O'), ('Deutsche Börse', 'ORG')]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(sp[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 7. Evaluate"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 306,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.873     0.826     0.849       259\n",
-      "       I_ORG      0.931     0.808     0.865       898\n",
-      "       B_LOC      0.935     0.901     0.918       192\n",
-      "       I_LOC      0.929     0.856     0.891       277\n",
-      "       B_PER      0.972     0.936     0.954       188\n",
-      "       I_PER      0.978     0.949     0.964       613\n",
-      "\n",
-      "   micro avg      0.941     0.869     0.903      2427\n",
-      "   macro avg      0.937     0.879     0.907      2427\n",
-      "weighted avg      0.940     0.869     0.902      2427\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 305,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         LOC      0.870     0.839     0.854       192\n",
-      "         ORG      0.833     0.788     0.810       259\n",
-      "         PER      0.945     0.910     0.927       188\n",
-      "\n",
-      "   micro avg      0.877     0.839     0.858       639\n",
-      "   macro avg      0.883     0.845     0.863       639\n",
-      "weighted avg      0.877     0.839     0.857       639\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "clf_report = get_bert_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 8. Try cased bert model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \" /datadrive/models/multi_cased_L-12_H-768_A-12/\"\n",
-    "init_checkpoint_pt = os.path.join(\"/datadrive/models/multi_cased_L-12_H-768_A-12/\", \"pytorch_model.bin\")\n",
-    "bert_config_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"bert_config.json\")\n",
-    "vocab_file = os.path.join(\"/datadrive/bert/multi_cased_L-12_H-768_A-12/\", \"vocab.txt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=3728), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=415), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import BertNerData as NerData\n",
-    "\n",
-    "\n",
-    "data = NerData.create(train_path, valid_path, vocab_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models import BertBiLSTMCRF\n",
-    "\n",
-    "\n",
-    "model = BertBiLSTMCRF.create(len(data.label2idx), bert_config_file, init_checkpoint_pt, enc_hidden_dim=256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 320,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Use lr OneCycleScheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules import NerLearner\n",
-    "\n",
-    "\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/exp_final_cased.cpt\",\n",
-    "                     lr=0.001, clip=1.0, sup_labels=data.id2label[5:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(100, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 322,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 323,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.845     0.842     0.843       259\n",
-      "       I_ORG      0.920     0.836     0.876      1000\n",
-      "       B_LOC      0.927     0.865     0.895       192\n",
-      "       I_LOC      0.915     0.818     0.864       303\n",
-      "       B_PER      0.973     0.957     0.965       188\n",
-      "       I_PER      0.984     0.957     0.970       649\n",
-      "\n",
-      "   micro avg      0.933     0.876     0.903      2591\n",
-      "   macro avg      0.927     0.879     0.902      2591\n",
-      "weighted avg      0.932     0.876     0.903      2591\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "\n",
-    "\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 324,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         LOC      0.850     0.797     0.823       192\n",
-      "         ORG      0.812     0.815     0.813       259\n",
-      "         PER      0.908     0.894     0.901       188\n",
-      "\n",
-      "   micro avg      0.851     0.833     0.842       639\n",
-      "   macro avg      0.857     0.835     0.845       639\n",
-      "weighted avg      0.852     0.833     0.842       639\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.data.bert_data import get_bert_data_loader_for_predict\n",
-    "from modules.utils.plot_metrics import get_bert_span_report\n",
-    "\n",
-    "\n",
-    "dl = get_bert_data_loader_for_predict(data_path + \"valid.csv\", learner)\n",
-    "\n",
-    "preds = learner.predict(dl)\n",
-    "\n",
-    "clf_report = get_bert_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples_elmo/__init__.py b/examples_elmo/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/examples_elmo/factrueval-nmt.ipynb b/examples_elmo/factrueval-nmt.ipynb
deleted file mode 100644
index 4544856..0000000
--- a/examples_elmo/factrueval-nmt.ipynb
+++ /dev/null
@@ -1,438 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval nmt evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \"/datadrive/elmo/\"\n",
-    "config_name = \"cnn_50_100_512_4096_sample.json\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create dataloaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.data.elmo_data import ElmoNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, model_dir, config_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '<bos>': 1, '<eos>': 2, 'O': 3, 'B_ORG': 4, 'I_ORG': 5, 'B_LOC': 6, 'B_PER': 7, 'I_PER': 8, 'I_LOC': 9}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.elmo_models import ElmoBiLSTMAttnNMT"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:char embedding size: 3896\n",
-      "INFO:root:word embedding size: 329681\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = ElmoBiLSTMAttnNMT.create(len(data.label2idx), model_dir, config_name,\n",
-    "                                 enc_hidden_dim=128, dec_hidden_dim=128, dec_embedding_dim=16)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "NMTDecoder(\n",
-       "  (embedding): Embedding(10, 16)\n",
-       "  (lstm): LSTM(272, 128, batch_first=True)\n",
-       "  (attn): Linear(in_features=128, out_features=128, bias=True)\n",
-       "  (slot_out): Linear(in_features=256, out_features=10, bias=True)\n",
-       "  (loss): CrossEntropyLoss()\n",
-       ")"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "783146"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Don't use lr scheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/elmo_attn_nmt.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=data.id2label[4:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "learner.fit(num_epochs, target_metric='prec')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.elmo_data import get_elmo_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_elmo_data_loader_for_predict(data_path + \"valid_with_pos.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.823     0.769     0.795       260\n",
-      "       I_ORG      0.806     0.601     0.688       283\n",
-      "       B_LOC      0.898     0.862     0.880       195\n",
-      "       B_PER      0.932     0.932     0.932       191\n",
-      "       I_PER      0.952     0.915     0.933       130\n",
-      "       I_LOC      1.000     0.143     0.250        35\n",
-      "\n",
-      "   micro avg      0.873     0.768     0.817      1094\n",
-      "   macro avg      0.902     0.704     0.746      1094\n",
-      "weighted avg      0.872     0.768     0.805      1094\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 92,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.plot_metrics import get_elmo_span_report"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 93,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         PER      0.782     0.791     0.786       191\n",
-      "         LOC      0.793     0.764     0.778       195\n",
-      "         ORG      0.682     0.700     0.691       260\n",
-      "\n",
-      "   micro avg      0.744     0.746     0.745       646\n",
-      "   macro avg      0.752     0.752     0.752       646\n",
-      "weighted avg      0.745     0.746     0.745       646\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "clf_report = get_elmo_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples_elmo/factrueval.ipynb b/examples_elmo/factrueval.ipynb
deleted file mode 100644
index 3352c21..0000000
--- a/examples_elmo/factrueval.ipynb
+++ /dev/null
@@ -1,697 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval elmo, MutiHeadAttention"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \"/datadrive/elmo/\"\n",
-    "config_name = \"cnn_50_100_512_4096_sample.json\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Create dataloaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.elmo_data import ElmoNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = NerData.create(train_path, valid_path, model_dir, config_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '<bos>': 1, '<eos>': 2, 'O': 3, 'B_ORG': 4, 'I_ORG': 5, 'B_LOC': 6, 'B_PER': 7, 'I_PER': 8, 'I_LOC': 9}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.elmo_models import ElmoBiLSTMAttnCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:char embedding size: 3896\n",
-      "INFO:root:word embedding size: 329681\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = ElmoBiLSTMAttnCRF.create(len(data.label2idx), model_dir, config_name, enc_hidden_dim=128)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AttnCRFDecoder(\n",
-       "  (attn): MultiHeadAttention(\n",
-       "    (attention): _MultiHeadAttention(\n",
-       "      (attention): ScaledDotProductAttention(\n",
-       "        (softmax): Softmax()\n",
-       "        (dropout): Dropout(p=0.5)\n",
-       "      )\n",
-       "    )\n",
-       "    (proj): Linear(in_features=192, out_features=128, bias=True)\n",
-       "    (dropout): Dropout(p=0.5)\n",
-       "    (layer_norm): LayerNormalization()\n",
-       "  )\n",
-       "  (linear): Linears(\n",
-       "    (linears): ModuleList(\n",
-       "      (0): Linear(in_features=128, out_features=64, bias=True)\n",
-       "    )\n",
-       "    (output_linear): Linear(in_features=64, out_features=10, bias=True)\n",
-       "  )\n",
-       "  (crf): CRF()\n",
-       ")"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "665818"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.get_n_trainable_params()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Use lr OneCycleScheduler...\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/elmo_attn_cased.cpt\",\n",
-    "                     lr=0.01, clip=1.0, sup_labels=data.id2label[4:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Resuming train... Current epoch 0.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "lr after epoch: 0.004996961373853585\n",
-      "INFO:root:\n",
-      "epoch 1, average train epoch loss=3.3685\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 0 by max_prec: 0.822\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.838     0.777     0.806       260\n",
-      "       I_ORG      0.802     0.728     0.763       283\n",
-      "       B_LOC      0.868     0.841     0.854       195\n",
-      "       B_PER      0.938     0.942     0.940       191\n",
-      "       I_PER      0.927     0.977     0.951       130\n",
-      "       I_LOC      0.560     0.400     0.467        35\n",
-      "\n",
-      "   micro avg      0.858     0.816     0.837      1094\n",
-      "   macro avg      0.822     0.778     0.797      1094\n",
-      "weighted avg      0.853     0.816     0.833      1094\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "learner.fit(num_epochs, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Evaluate\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.elmo_data import get_elmo_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_elmo_data_loader_for_predict(data_path + \"valid_with_pos.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r"
-     ]
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.785     0.842     0.813       260\n",
-      "       I_ORG      0.833     0.777     0.804       283\n",
-      "       B_LOC      0.936     0.826     0.877       195\n",
-      "       B_PER      0.963     0.948     0.955       191\n",
-      "       I_PER      0.954     0.954     0.954       130\n",
-      "       I_LOC      0.923     0.343     0.500        35\n",
-      "\n",
-      "   micro avg      0.877     0.838     0.857      1094\n",
-      "   macro avg      0.899     0.782     0.817      1094\n",
-      "weighted avg      0.880     0.838     0.854      1094\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         LOC      0.866     0.764     0.812       195\n",
-      "         PER      0.873     0.864     0.868       191\n",
-      "         ORG      0.719     0.777     0.747       260\n",
-      "\n",
-      "   micro avg      0.804     0.799     0.801       646\n",
-      "   macro avg      0.819     0.802     0.809       646\n",
-      "weighted avg      0.809     0.799     0.802       646\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_elmo_span_report\n",
-    "clf_report = get_elmo_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Get mean and stdv on 10 runs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.plot_metrics import *\n",
-    "\n",
-    "\n",
-    "num_runs = 10\n",
-    "best_reports = []\n",
-    "try:\n",
-    "    for i in range(num_runs):\n",
-    "        model = ElmoBiLSTMAttnCRF.create(len(data.label2idx), model_dir, config_name, enc_hidden_dim=128)\n",
-    "        best_model_path = \"/datadrive/models/factrueval/elmo_{}_attn_cased.cpt\".format(i)\n",
-    "        learner = NerLearner(model, data,\n",
-    "                             best_model_path=best_model_path, verbose=False,\n",
-    "                             base_lr=0.0001, lr_max=0.001, clip=5.0, use_lr_scheduler=True, sup_labels=data.id2label[4:])\n",
-    "        learner.fit(50, target_metric='f1')\n",
-    "        idx, res = get_mean_max_metric(learner.history, \"f1\", True)\n",
-    "        best_reports.append(learner.history[idx])\n",
-    "except KeyboardInterrupt:\n",
-    "    print(\"End of exp\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### f1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.8689, 0.006)"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r]) for r in best_reports]), np.round(np.std([get_mean_max_metric([r]) for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.882"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### precision"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Mean and std"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0.8894, 0.016)"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.mean([get_mean_max_metric([r], \"prec\") for r in best_reports]), np.round(np.std([get_mean_max_metric([r], \"prec\") for r in best_reports]), 3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Best"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.913"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "get_mean_max_metric(best_reports, \"prec\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples_elmo/samples.ipynb b/examples_elmo/samples.ipynb
deleted file mode 100644
index 447fde4..0000000
--- a/examples_elmo/samples.ipynb
+++ /dev/null
@@ -1,2345 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### FactRuEval example (uncased model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%reload_ext autoreload\n",
-    "%autoreload 2\n",
-    "%matplotlib inline\n",
-    "\n",
-    "import warnings\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(\"../\")\n",
-    "\n",
-    "warnings.filterwarnings(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 0. Download pretrained elmo model\n",
-    "Download pretrained ELMo for [russian](http://vectors.nlpl.eu/repository/11/170.zip) from [implementation](https://github.com/HIT-SCIR/ELMoForManyLangs) of elmo in pytorch and unzip.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "\n",
-    "data_path = \"/home/lis/ner/ulmfit/data/factrueval/\"\n",
-    "train_path = os.path.join(data_path, \"train_with_pos.csv\")\n",
-    "valid_path = os.path.join(data_path, \"valid_with_pos.csv\")\n",
-    "model_dir = \"/datadrive/elmo/\"\n",
-    "config_name = \"cnn_50_100_512_4096_sample.json\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(True, 1)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "torch.cuda.set_device(1)\n",
-    "torch.cuda.is_available(), torch.cuda.current_device()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Data preparation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Data train and validation should be presented in the following format."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>3</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>O O O O O O O O O O O O O O O O O O O O</td>\n",
-       "      <td>Мифология солнцеворота , собственно , и сводит...</td>\n",
-       "      <td>NOUN NOUN PNCT ADVB PNCT CONJ VERB PREP NOUN N...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>O O O O O O B_ORG I_ORG O B_ORG I_ORG O O O O ...</td>\n",
-       "      <td>По его словам , с покупкой Caramba TV « СТС Ме...</td>\n",
-       "      <td>PREP NPRO NOUN PNCT PREP NOUN &lt;unk&gt; &lt;unk&gt; PNCT...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>O O O O O O O O O O O O O O O O B_LOC O</td>\n",
-       "      <td>Такое десятилетие , по его словам « необходимо...</td>\n",
-       "      <td>ADJF NOUN PNCT PREP NPRO NOUN PNCT ADJS ADJF P...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>O O O O O O O O O O O O O O</td>\n",
-       "      <td>Правительство уволило часть врачей , обвинив и...</td>\n",
-       "      <td>NOUN VERB NOUN NOUN PNCT GRND NPRO PREP NOUN N...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>O O O B_PER I_PER O O O O O O B_ORG I_ORG I_OR...</td>\n",
-       "      <td>Министр сельского хозяйства Николай Федоров пр...</td>\n",
-       "      <td>NOUN ADJF NOUN NOUN NOUN VERB PNCT CONJ PRTF V...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                   0  \\\n",
-       "0            O O O O O O O O O O O O O O O O O O O O   \n",
-       "1  O O O O O O B_ORG I_ORG O B_ORG I_ORG O O O O ...   \n",
-       "2            O O O O O O O O O O O O O O O O B_LOC O   \n",
-       "3                        O O O O O O O O O O O O O O   \n",
-       "4  O O O B_PER I_PER O O O O O O B_ORG I_ORG I_OR...   \n",
-       "\n",
-       "                                                   1  \\\n",
-       "0  Мифология солнцеворота , собственно , и сводит...   \n",
-       "1  По его словам , с покупкой Caramba TV « СТС Ме...   \n",
-       "2  Такое десятилетие , по его словам « необходимо...   \n",
-       "3  Правительство уволило часть врачей , обвинив и...   \n",
-       "4  Министр сельского хозяйства Николай Федоров пр...   \n",
-       "\n",
-       "                                                   3  \n",
-       "0  NOUN NOUN PNCT ADVB PNCT CONJ VERB PREP NOUN N...  \n",
-       "1  PREP NPRO NOUN PNCT PREP NOUN <unk> <unk> PNCT...  \n",
-       "2  ADJF NOUN PNCT PREP NPRO NOUN PNCT ADJS ADJF P...  \n",
-       "3  NOUN VERB NOUN NOUN PNCT GRND NPRO PREP NOUN N...  \n",
-       "4  NOUN ADJF NOUN NOUN NOUN VERB PNCT CONJ PRTF V...  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "df = pd.read_csv(train_path)\n",
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Train and valid .csv files must have columns named (0, 1). Column 3 is't necessary (does not used now).\n",
-    "* Column 0 contains labels in IOB format.\n",
-    "* Column 1 contains tokenized and separated (by whitespace) text.\n",
-    "\n",
-    "For using data in model we need to create `NerData` object.\n",
-    "\n",
-    "* `train_path` - path to train .csv file\n",
-    "* `valid_path` - path to valid .csv file\n",
-    "* `model_dir` - path to ELMo pretrained model's dir\n",
-    "* `config_name` - name of config in `model_dir` folder\n",
-    "* `batch_size` - batch size (default `16`)\n",
-    "* `cuda` - using cuda or cpu (default `True`)\n",
-    "* `is_cls` - create data for joint model (default `False`)\n",
-    "* `oov` - default unknown in ELMo model (default `'<oov>'`)\n",
-    "* `pad` - default pad sym in ELMo model (default `'<pad>'`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.elmo_data import ElmoNerData as NerData"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=3728), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=415), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "data = NerData.create(train_path, valid_path, model_dir, config_name)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For factrueval we use the following sample of labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'<pad>': 0, '<bos>': 1, '<eos>': 2, 'O': 3, 'B_ORG': 4, 'I_ORG': 5, 'B_LOC': 6, 'B_PER': 7, 'I_PER': 8, 'I_LOC': 9}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data.label2idx)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Create model\n",
-    "For creating pytorch model we need to create `NerModel` object.\n",
-    "\n",
-    "* `label_size` - number of labels: `len(data.label2idx)`,\n",
-    "\n",
-    "ElmoEmbedder params\n",
-    "* `model_dir` - path to ELMo pretrained model's dir\n",
-    "* `config_name` - name of config in `model_dir` folder\n",
-    "* `embedding_dim` - output dim from bert model (default `768`)\n",
-    "* `elmo_mode` - mode of how bert output will be returned. If `avg` return mean of all outputs from ELMo. If `weighted` return weighted sum of all bert output layers, weights are learnable (aka ELMO).\n",
-    "* `freeze` - freezing bert model (default `True`)\n",
-    "\n",
-    "ElmoBiLSTMEncoder params\n",
-    "* `enc_hidden_dim` - dim of rnn layer or hidden layer (default `128`)\n",
-    "* `rnn_layers` - number of rnn layers in encoder\n",
-    "\n",
-    "CRFDecoder params\n",
-    "* `input_dropout` - dropout param (default `0.5`),\n",
-    "\n",
-    "Gpu or cpu:\n",
-    "* `use_cuda` - use cuda or cpu (default `True`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.models.elmo_models import ElmoBiLSTMCRF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:char embedding size: 3896\n",
-      "INFO:root:word embedding size: 329681\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = ElmoBiLSTMCRF.create(len(data.label2idx), model_dir, config_name, enc_hidden_dim=128, freeze=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "CRFDecoder(\n",
-       "  (input_dropout): Dropout(p=0.5)\n",
-       "  (linear): Linears(\n",
-       "    (linears): ModuleList(\n",
-       "      (0): Linear(in_features=128, out_features=64, bias=True)\n",
-       "    )\n",
-       "    (output_linear): Linear(in_features=64, out_features=10, bias=True)\n",
-       "  )\n",
-       "  (crf): CRF()\n",
-       ")"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.decoder"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Create learner\n",
-    "\n",
-    "For training our pytorch model we need to create `NerLearner` object.\n",
-    "\n",
-    "* `model: NerModel` - pytorch model\n",
-    "* `data: NerData` - train and valid dataloaders\n",
-    "* `best_model_path` - path for store best model\n",
-    "* `lr` - starting learning rate (default `0.001`)\n",
-    "* `betas` - params for default optimizer (default `[0.8, 0.9]`)\n",
-    "* `clip` - grad clipping (default `5`)\n",
-    "* `verbose` - printing to console reports (default `True`)\n",
-    "* `sup_labels` - list of supported labels for calculating `target_metric` metric. For FactRuEval use: `['B_LOC', 'I_LOC', 'B_ORG', 'I_ORG', 'B_PER', 'I_PER']` (default `None`)\n",
-    "* `t_total` - total optimization steps, used for lr scheduler, if -1, don't scale lr after batch iteration (default `-1`), usally t_total = num_epochs * train_size / batch_size\n",
-    "* `warmup` - portion of t_total for the warmup, -1  means no warmup (default `0.1`)\n",
-    "* `weight_decay` - weight decay (default `0.01`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules import NerLearner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_epochs = 100\n",
-    "learner = NerLearner(model, data,\n",
-    "                     best_model_path=\"/datadrive/models/factrueval/elmo_bilmcrf.cpt\",\n",
-    "                     lr=0.0001, clip=1.0, sup_labels=data.id2label[4:],\n",
-    "                     t_total=num_epochs * len(data.train_dl))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 4. Learn your NER model\n",
-    "Call `learner.fit`\n",
-    "* `epochs` - number of train iterations (default `100`)\n",
-    "* `resume_history` - resuming appending results to history or create new (default `True`)\n",
-    "* `target_metric` - mean metric, that want you see to pick best epochs (default `f1`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Resuming train... Current epoch 0.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 1, average train epoch loss=10.427\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 0 by max_f1: 0.089\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.538     0.027     0.051       260\n",
-      "       I_ORG      0.181     0.046     0.073       283\n",
-      "       B_LOC      0.833     0.026     0.050       195\n",
-      "       B_PER      0.706     0.126     0.213       191\n",
-      "       I_PER      0.353     0.092     0.146       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.384     0.056     0.097      1094\n",
-      "   macro avg      0.435     0.053     0.089      1094\n",
-      "weighted avg      0.488     0.056     0.095      1094\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "learner.fit(1, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Fit for the best model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:Resuming train... Current epoch 1.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 2, average train epoch loss=10.293\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 1 by max_f1: 0.093\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.556     0.038     0.072       260\n",
-      "       I_ORG      0.187     0.049     0.078       283\n",
-      "       B_LOC      0.625     0.026     0.049       195\n",
-      "       B_PER      0.590     0.120     0.200       191\n",
-      "       I_PER      0.371     0.100     0.158       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.371     0.059     0.102      1094\n",
-      "   macro avg      0.388     0.056     0.093      1094\n",
-      "weighted avg      0.439     0.059     0.100      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 3, average train epoch loss=9.8236\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 2 by max_f1: 0.132\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.462     0.046     0.084       260\n",
-      "       I_ORG      0.206     0.078     0.113       283\n",
-      "       B_LOC      0.750     0.062     0.114       195\n",
-      "       B_PER      0.559     0.173     0.264       191\n",
-      "       I_PER      0.413     0.146     0.216       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.386     0.090     0.145      1094\n",
-      "   macro avg      0.398     0.084     0.132      1094\n",
-      "weighted avg      0.443     0.090     0.141      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 4, average train epoch loss=8.9581\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 3 by max_f1: 0.218\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.446     0.096     0.158       260\n",
-      "       I_ORG      0.266     0.177     0.212       283\n",
-      "       B_LOC      0.653     0.251     0.363       195\n",
-      "       B_PER      0.554     0.267     0.360       191\n",
-      "       I_PER      0.364     0.154     0.216       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.416     0.178     0.250      1094\n",
-      "   macro avg      0.381     0.157     0.218      1094\n",
-      "weighted avg      0.431     0.178     0.246      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 5, average train epoch loss=7.8177\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 4 by max_f1: 0.336\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.561     0.265     0.360       260\n",
-      "       I_ORG      0.296     0.258     0.275       283\n",
-      "       B_LOC      0.656     0.626     0.640       195\n",
-      "       B_PER      0.496     0.293     0.368       191\n",
-      "       I_PER      0.593     0.269     0.370       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.484     0.324     0.388      1094\n",
-      "   macro avg      0.434     0.285     0.336      1094\n",
-      "weighted avg      0.484     0.324     0.379      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 6, average train epoch loss=6.6099\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 5 by max_f1: 0.389\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.554     0.415     0.475       260\n",
-      "       I_ORG      0.374     0.290     0.327       283\n",
-      "       B_LOC      0.700     0.754     0.726       195\n",
-      "       B_PER      0.424     0.319     0.364       191\n",
-      "       I_PER      0.608     0.346     0.441       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.513     0.405     0.453      1094\n",
-      "   macro avg      0.443     0.354     0.389      1094\n",
-      "weighted avg      0.499     0.405     0.443      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 7, average train epoch loss=5.4815\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 6 by max_f1: 0.461\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.617     0.508     0.557       260\n",
-      "       I_ORG      0.494     0.424     0.456       283\n",
-      "       B_LOC      0.714     0.779     0.745       195\n",
-      "       B_PER      0.511     0.372     0.430       191\n",
-      "       I_PER      0.673     0.508     0.579       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.573     0.495     0.531      1094\n",
-      "   macro avg      0.501     0.432     0.461      1094\n",
-      "weighted avg      0.571     0.495     0.527      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 8, average train epoch loss=4.3492\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 7 by max_f1: 0.61\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.669     0.677     0.673       260\n",
-      "       I_ORG      0.632     0.473     0.541       283\n",
-      "       B_LOC      0.791     0.795     0.793       195\n",
-      "       B_PER      0.803     0.812     0.807       191\n",
-      "       I_PER      0.882     0.808     0.843       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.733     0.663     0.696      1094\n",
-      "   macro avg      0.630     0.594     0.610      1094\n",
-      "weighted avg      0.709     0.663     0.682      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 9, average train epoch loss=3.3389\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 8 by max_f1: 0.658\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.724     0.708     0.716       260\n",
-      "       I_ORG      0.692     0.611     0.649       283\n",
-      "       B_LOC      0.830     0.826     0.828       195\n",
-      "       B_PER      0.833     0.937     0.882       191\n",
-      "       I_PER      0.916     0.838     0.876       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.780     0.737     0.758      1094\n",
-      "   macro avg      0.666     0.653     0.658      1094\n",
-      "weighted avg      0.753     0.737     0.744      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 10, average train epoch loss=2.5398\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 9 by max_f1: 0.685\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.744     0.762     0.753       260\n",
-      "       I_ORG      0.735     0.668     0.700       283\n",
-      "       B_LOC      0.866     0.862     0.864       195\n",
-      "       B_PER      0.862     0.948     0.903       191\n",
-      "       I_PER      0.926     0.862     0.892       130\n",
-      "       I_LOC      0.000     0.000     0.000        35\n",
-      "\n",
-      "   micro avg      0.805     0.775     0.790      1094\n",
-      "   macro avg      0.689     0.683     0.685      1094\n",
-      "weighted avg      0.782     0.775     0.778      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 11, average train epoch loss=1.9537\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 10 by max_f1: 0.713\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.777     0.804     0.790       260\n",
-      "       I_ORG      0.775     0.693     0.731       283\n",
-      "       B_LOC      0.851     0.877     0.864       195\n",
-      "       B_PER      0.910     0.953     0.931       191\n",
-      "       I_PER      0.950     0.885     0.916       130\n",
-      "       I_LOC      0.111     0.029     0.045        35\n",
-      "\n",
-      "   micro avg      0.830     0.799     0.814      1094\n",
-      "   macro avg      0.729     0.707     0.713      1094\n",
-      "weighted avg      0.812     0.799     0.804      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 12, average train epoch loss=1.6208\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 11 by max_f1: 0.766\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.809     0.800     0.805       260\n",
-      "       I_ORG      0.820     0.710     0.761       283\n",
-      "       B_LOC      0.855     0.877     0.866       195\n",
-      "       B_PER      0.934     0.958     0.946       191\n",
-      "       I_PER      0.952     0.923     0.938       130\n",
-      "       I_LOC      0.467     0.200     0.280        35\n",
-      "\n",
-      "   micro avg      0.857     0.814     0.835      1094\n",
-      "   macro avg      0.806     0.745     0.766      1094\n",
-      "weighted avg      0.848     0.814     0.828      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 13, average train epoch loss=1.2925\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 12 by max_f1: 0.811\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.798     0.819     0.808       260\n",
-      "       I_ORG      0.842     0.735     0.785       283\n",
-      "       B_LOC      0.890     0.872     0.881       195\n",
-      "       B_PER      0.939     0.963     0.951       191\n",
-      "       I_PER      0.945     0.923     0.934       130\n",
-      "       I_LOC      0.625     0.429     0.508        35\n",
-      "\n",
-      "   micro avg      0.865     0.832     0.848      1094\n",
-      "   macro avg      0.840     0.790     0.811      1094\n",
-      "weighted avg      0.862     0.832     0.845      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 14, average train epoch loss=1.0895\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 13 by max_f1: 0.825\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.790     0.838     0.813       260\n",
-      "       I_ORG      0.823     0.756     0.788       283\n",
-      "       B_LOC      0.881     0.872     0.876       195\n",
-      "       B_PER      0.948     0.963     0.956       191\n",
-      "       I_PER      0.953     0.938     0.946       130\n",
-      "       I_LOC      0.643     0.514     0.571        35\n",
-      "\n",
-      "   micro avg      0.858     0.846     0.852      1094\n",
-      "   macro avg      0.840     0.814     0.825      1094\n",
-      "weighted avg      0.857     0.846     0.851      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 15, average train epoch loss=0.89711\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 14 by max_f1: 0.831\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.797     0.831     0.814       260\n",
-      "       I_ORG      0.861     0.721     0.785       283\n",
-      "       B_LOC      0.882     0.882     0.882       195\n",
-      "       B_PER      0.948     0.958     0.953       191\n",
-      "       I_PER      0.961     0.938     0.949       130\n",
-      "       I_LOC      0.679     0.543     0.603        35\n",
-      "\n",
-      "   micro avg      0.872     0.837     0.854      1094\n",
-      "   macro avg      0.855     0.812     0.831      1094\n",
-      "weighted avg      0.871     0.837     0.852      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 16, average train epoch loss=0.74681\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 14 by max_f1: 0.831\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.790     0.838     0.813       260\n",
-      "       I_ORG      0.805     0.760     0.782       283\n",
-      "       B_LOC      0.885     0.872     0.879       195\n",
-      "       B_PER      0.953     0.958     0.956       191\n",
-      "       I_PER      0.961     0.946     0.953       130\n",
-      "       I_LOC      0.633     0.543     0.585        35\n",
-      "\n",
-      "   micro avg      0.855     0.848     0.852      1094\n",
-      "   macro avg      0.838     0.820     0.828      1094\n",
-      "weighted avg      0.855     0.848     0.851      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 17, average train epoch loss=0.63229\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 16 by max_f1: 0.835\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.795     0.835     0.814       260\n",
-      "       I_ORG      0.831     0.763     0.796       283\n",
-      "       B_LOC      0.881     0.872     0.876       195\n",
-      "       B_PER      0.953     0.953     0.953       191\n",
-      "       I_PER      0.960     0.931     0.945       130\n",
-      "       I_LOC      0.690     0.571     0.625        35\n",
-      "\n",
-      "   micro avg      0.864     0.846     0.855      1094\n",
-      "   macro avg      0.852     0.821     0.835      1094\n",
-      "weighted avg      0.863     0.846     0.854      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 18, average train epoch loss=0.54294\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 17 by max_f1: 0.839\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.808     0.823     0.815       260\n",
-      "       I_ORG      0.855     0.753     0.801       283\n",
-      "       B_LOC      0.887     0.887     0.887       195\n",
-      "       B_PER      0.953     0.963     0.958       191\n",
-      "       I_PER      0.961     0.938     0.949       130\n",
-      "       I_LOC      0.690     0.571     0.625        35\n",
-      "\n",
-      "   micro avg      0.875     0.846     0.861      1094\n",
-      "   macro avg      0.859     0.823     0.839      1094\n",
-      "weighted avg      0.874     0.846     0.859      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 19, average train epoch loss=0.44378\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 18 by max_f1: 0.84\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.797     0.831     0.814       260\n",
-      "       I_ORG      0.819     0.784     0.801       283\n",
-      "       B_LOC      0.858     0.867     0.862       195\n",
-      "       B_PER      0.958     0.948     0.953       191\n",
-      "       I_PER      0.976     0.946     0.961       130\n",
-      "       I_LOC      0.639     0.657     0.648        35\n",
-      "\n",
-      "   micro avg      0.857     0.854     0.855      1094\n",
-      "   macro avg      0.841     0.839     0.840      1094\n",
-      "weighted avg      0.858     0.854     0.856      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 20, average train epoch loss=0.38945\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n",
-      "INFO:root:Saving new best model...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.806     0.831     0.818       260\n",
-      "       I_ORG      0.872     0.770     0.818       283\n",
-      "       B_LOC      0.877     0.877     0.877       195\n",
-      "       B_PER      0.958     0.963     0.961       191\n",
-      "       I_PER      0.961     0.946     0.953       130\n",
-      "       I_LOC      0.706     0.686     0.696        35\n",
-      "\n",
-      "   micro avg      0.877     0.856     0.866      1094\n",
-      "   macro avg      0.863     0.846     0.854      1094\n",
-      "weighted avg      0.878     0.856     0.866      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 21, average train epoch loss=0.32141\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.786     0.846     0.815       260\n",
-      "       I_ORG      0.851     0.767     0.807       283\n",
-      "       B_LOC      0.872     0.877     0.875       195\n",
-      "       B_PER      0.958     0.963     0.961       191\n",
-      "       I_PER      0.969     0.954     0.961       130\n",
-      "       I_LOC      0.710     0.629     0.667        35\n",
-      "\n",
-      "   micro avg      0.867     0.857     0.862      1094\n",
-      "   macro avg      0.858     0.839     0.847      1094\n",
-      "weighted avg      0.868     0.857     0.862      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 22, average train epoch loss=0.28334\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.801     0.838     0.820       260\n",
-      "       I_ORG      0.882     0.763     0.818       283\n",
-      "       B_LOC      0.864     0.882     0.873       195\n",
-      "       B_PER      0.958     0.958     0.958       191\n",
-      "       I_PER      0.961     0.946     0.953       130\n",
-      "       I_LOC      0.733     0.629     0.677        35\n",
-      "\n",
-      "   micro avg      0.877     0.854     0.865      1094\n",
-      "   macro avg      0.867     0.836     0.850      1094\n",
-      "weighted avg      0.878     0.854     0.864      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 23, average train epoch loss=0.24262\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.827     0.846     0.837       260\n",
-      "       I_ORG      0.894     0.777     0.832       283\n",
-      "       B_LOC      0.876     0.872     0.874       195\n",
-      "       B_PER      0.963     0.958     0.961       191\n",
-      "       I_PER      0.961     0.946     0.953       130\n",
-      "       I_LOC      0.657     0.657     0.657        35\n",
-      "\n",
-      "   micro avg      0.887     0.858     0.872      1094\n",
-      "   macro avg      0.863     0.843     0.852      1094\n",
-      "weighted avg      0.887     0.858     0.872      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 24, average train epoch loss=0.20977\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.809     0.831     0.820       260\n",
-      "       I_ORG      0.892     0.760     0.821       283\n",
-      "       B_LOC      0.881     0.872     0.876       195\n",
-      "       B_PER      0.958     0.963     0.961       191\n",
-      "       I_PER      0.969     0.946     0.957       130\n",
-      "       I_LOC      0.667     0.629     0.647        35\n",
-      "\n",
-      "   micro avg      0.883     0.850     0.866      1094\n",
-      "   macro avg      0.863     0.833     0.847      1094\n",
-      "weighted avg      0.884     0.850     0.865      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:\n",
-      "epoch 25, average train epoch loss=0.19112\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:root:on epoch 19 by max_f1: 0.854\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.821     0.846     0.833       260\n",
-      "       I_ORG      0.874     0.784     0.827       283\n",
-      "       B_LOC      0.890     0.872     0.881       195\n",
-      "       B_PER      0.953     0.963     0.958       191\n",
-      "       I_PER      0.968     0.938     0.953       130\n",
-      "       I_LOC      0.667     0.629     0.647        35\n",
-      "\n",
-      "   micro avg      0.883     0.859     0.871      1094\n",
-      "   macro avg      0.862     0.839     0.850      1094\n",
-      "weighted avg      0.883     0.859     0.870      1094\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0507d55c65da43418043e442a82ebf0b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=233), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "learner.fit(num_epochs - 1, True, target_metric='f1')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.utils.plot_metrics import plot_by_class_curve"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA20AAAJCCAYAAABTfy+dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzs3Xl8VNX5+PHPnSWTSWYmmewrScieEAgBZFEUtyC2ohWrtRa3r2391qVq7aLt94fVqtVabSt209alVq3FVm1VNpFFWWQXQsKWdbKRPTPJTDLL/f0xMBJIIEBCAjzv1yuvJDNnzpy7zJ373HPOcxVVVRFCCCGEEEIIMTppRroBQgghhBBCCCEGJkGbEEIIIYQQQoxiErQJIYQQQgghxCgmQZsQQgghhBBCjGIStAkhhBBCCCHEKCZBmxBCCCGEEEKMYhK0CSGEEEIIIcQoJkGbEEIIIYQQQoxiErQJIYQQQgghxCimG6k3joqKUlNTU0fq7QfU1dVFaGjoSDdDnOVkPxOng+xnYrjJPiZOB9nPxOkwUvvZ5s2bm1VVjT5euREL2lJTU9m0adNIvf2AVq5cyaxZs0a6GeIsJ/uZOB1kPxPDTfYxcTrIfiZOh5HazxRFqRpMORkeKYQQQgghhBCjmARtQgghhBBCCDGKSdAmhBBCCCGEEKPYiM1p64/b7cZms+FyuUasDWFhYZSWlo7Y+x8pODiYpKQk9Hr9SDdFCCGEEEIIMQJGVdBms9kwm82kpqaiKMqItMFut2M2m0fkvY+kqiotLS3YbDbS0tJGujlCCCGEEEKIETCqhke6XC4iIyNHLGAbbRRFITIyckR7HoUQQgghhBAja1QFbYAEbEeQ9SGEEEIIIcS5bdQFbUIIIYQQQgghviRBmxBCCCGEEEKMYhK0HSE8PJzCwkImTJhAUVERa9euPWb5kpISLrnkErKzs8nMzOSxxx5DVVUAXnnlFaKjoyksLCQnJ4fnnnuuz2tff/11xo8fT35+PhMmTOCOO+6gvb192JZNCCGEEEIIceaRoO0IRqORbdu2sX37dp588kkeeuihAcs6nU7mzp3LT37yE3bv3s327dtZu3Ytv//97wNlbrjhBrZt28Znn33G448/Tk1NDQCLFy/mueee46OPPqKkpIQtW7YwY8YMGhsbh30ZhRBCCCGEEGeOUZXy/3A//08Ju+o6h7TOvAQLC67KH3T5zs5OrFbrgM+/8cYbnH/++RQXFwMQEhLCwoULmTVrFnfddVefspGRkWRkZFBfX09ycjKPP/44zzzzDImJiQBotVpuv/32k1gqIYQQQgghxNls1AZtI8XpdFJYWIjL5aK+vp4VK1YMWLakpIRJkyb1eSw9PR2Hw0FnZ9+As7q6GpfLxfjx4wOvLSoqGvoFEEIIIYQQQpxVRm3QdiI9YkPp0PBIgHXr1nHzzTezc+fOk069/49//IPVq1dTVlbGwoULCQ4OPqrMjh07mD9/Pna7nSeeeIIbbrjhlJZBCCGEEEIIcfaQOW3HMH36dJqbm2lqaur3+by8PDZv3tznsfLyckwmExaLBfDPafviiy9Yu3YtP/nJT2hoaAAgPz+fLVu2AFBQUMC2bduYM2cOTqdzGJdICCGEEEIIcaaRoO0YysrK8Hq9REZG9vv8TTfdxKeffsry5csB/9DKe++9lx/96EdHlZ08eTLz58/nt7/9LQAPPfQQDz74IDabLVBGAjYhhBBCCCHEkUbt8MiRcmhOG4Cqqrz66qtotdp+yxqNRt577z3uuece7rrrLrxeL/Pnz+fuu+/ut/yPf/xjioqKePjhh7nyyitpampizpw5eL1ewsPDGTduHLNnzx62ZRNCCCGEEEKceSRoO0J7eztms3nQ5QsKCli5cmW/z916663ceuutgf8TEhICwyMBbrnlFm655ZaTbaoQQgghhBDiHCDDI4UQQgghhBBiFJOetkE4lN3xcAaDgQ0bNoxQi4QQQgghhBDnCgnaBuFQdkchhBBCCCGEON1keKQQQgghhBBCjGLS0yaEEEKMMl63j4aKDmxlbdTtbccaH8rkOamYrIZB19Hd2cuWxVVU72o5ZjlzpJHkXCtJORFEJoSiaJRTbf6o5Gjrwba7FVtpG20NXcSmhZGUYyUx24rBOPKnQ13tPaz5xx68XpXk3AiSc62Ex4agKGfm9uhxeqjb00ZNWRv1+9rxun0DltUFaUnIDCcpx0pCZjhBwcfeHr0uD3V72wOfD0+vd8CyWr2G+IxwknOsJGZZCTrFbd3V0YOtrA1baSsHqu2oPnXAssEmPUnZVpJyI4hNs6DVSl+JOHkjf5QSQowK3o4OesrLMY4bh6LXD2ndnrY2esrKCEpPRx8TM6R1DxdVVXFXV9NbY8MwNg1dfPwJnTx529txlZXhczgGLqTVYsjMRJ+YeMaemImhofpUmmsd2ErbsO1uPXgi6kNRIDLJROlndZStq2fCJUlMLE4hOHTgz2iv08O25dVsW16Dp9fLmPxI9Ib+b12jqiottV18tsgf2BnNepJyIkjKsZKcG4E5InhYlvd0ODxosJW20tbQDfhPpCPiQyldW8eOlTYUBWJSLSTn+pc7Li0Mrf70nlzX7+9g8Z920OvyEGIJovKLZgBCww0k5/hP+pNyrISGDT5oP928Hh+NFR3UlLZhK2ulsdIf0Oj0GuLSw465zzodvexcVcv2j2vQaBRixx7aHhHEpJpRfSr1+9r927KslcbyTnw+Fa1OQ1y6hbBo44B1u7rclH5ax45PbCgahdhUM0k5/qA4Ni0Mre7Y27rX5aFuTzs1Za3YytporesCwBCqI35sGLqg/j9bAJ3NTjZ+WMnGDyrRG7QkZIWTfPDzFZEQKsd9cUIkaBPiHOfr7qb1b6/T8tJL+Ox2tGFhmC67FMvs2YROm4YSFHRS9aqqSvfGjbS//U/sS5ei9vYCoIuOJjg/v8+PPnZkAznV56O3qgpXyS5cJSW4du3CtWsXPrs9UEZrtRKclxdosy8lmx3be+jp9jB9djRq+R7/aw/+uGtrB/3+mrAwgvNyMebn403LpzkomfpGULQKF1yXidF8ctvgSIf3NLTWdw1Jnf3RG768aj6YE2Cv20d9eQe20lYayjuwRBtJzokgMdtKiOXUlt1/ItlJTVkr9fvaMVuDB30CrKoqHU3OwFV1d6+XxCx/MBOVZDrlHqnOZn/dh04GXQ43ANa4EHJnJPTpBepocvL5f8vZsrSanavrKJo9hvEXJ/cJxjxuLyWr69j0USUuh5v0omimzh2LNS70uG2xt7r8y1nWSk1ZG3s3NgJgiQrGEDLwybbd7uPA2o2ntB6Gg9fjo62hOxA0JGSFk3t+Asm5ViIT/NvuyCBj8+IqNn1YiS5Ic9weLmt8iD+oyI44od7PI6mqSsmaOtb8Yw+miGDmfr+QyETTwf3Ov19U7mihbL3/dkFhMcbj9kKNBFVVaW/sDlxoiEm1MOmKFJKyrcSNHVwQ7On1Ur+/w78Plrbx+X8r+Pw/FeiDtXg9Krs8W0CBmDFmCi8fQ1Ku9bhB0yFet4+G8o7AZ23zR5X+bW3QEh5jHHBb+7w+2uq7/QGiXkNCRhjZU+NO6Bjg6nJTu6cNW6n/s1614+AFEksQpvDRG4SfjSISQrns1ryRbsZJU1R14G7d4TR58mR106ZNfR4rLS0lNzd3RNpziMlkwnGMK+MlJSXcc8891NbW4vP5uPnmm/nZz36Goii88sor/PCHPyQxMRGXy8V3v/td7r///sBrX3/9dZ5++mm8Xi86nY4pU6bwzDPPEB4efsw2jYb1IobWypUrmTVr1oi2Qe3tpW3RIpr/8Ae8Tc2YLrkEy5wrcKxZg2PFJ/gcDjRhYZgvuQTLFbMJnT59UAGcp62Njn+/S/vbb9NbWYnGbCZs7lxMF11Ib0UFzpISXCW76C0vh4PHH210FPr4BBjg+09BQRsdRVBSMvrkJIKSk9EnJaNPSkRzWJu8ji7ctTbcNTX01hz8bavB29ExcIO9PnorKwM9YkpQEIbsbILz/QFaUPIYesr3HwzGdtFVYaM6/iJqki/Fp/GfPAU7Wygo+TOmrnr0yckHA7s8jPn5aK3WAd/a53Jh37mXmh0N1DeoNCmxOI3+ADbIbcejMxKkOpnkWEqYt/m46/5IrXYX7ohsmnWJtOiTcGj9bQnyOQnzNqGc7PFfUdCGh6GLjkZjNh+12brtbpqqOlFV0AVpSMgMJzHdRIR9P7r1S+itq6VTG0mLLolmfSKtujh8ih5F9WH2ttCtC8ODf7tarZCUYWHMpGQSc6MCQYrX4cBts9FbU4O7xobb5t/mSkgovunFtFoyqS3vonZvO54eb6DHyt7qoqfLA/i/wJNyrCTnRJCQ5R+S5bT3YtvdRk2pP7i1t7oAMFkN6A3aL3trQvUkZlsDwwqPdaX/EJfDjW33l4FRZ5MTAIOmhyh3LbFhLhLHGAlLj/Pv48nJaMPD+5xQNtscbHhvP5U7WgixBDHlK6nkzIhn78YDfP7fchytPSTlWJk6NxXzgT10LlmMa9euE9q8KuDQWGnWJ9Gmi8V3jOu7bo8bvW5oe+aHhorZ20qUx0a4pxEtAw/NO8RNEK36eJp1STg1A9+vVVUUOvSx9OI/4Q6zQGKGmTFFiSTlxwxqqKWqqvQcaGH1m7vZW9ZDvLmLIt0mlLoqfC7nEUtC4PPSpotFHaXpCIw+O5GeWiI9dejV3lOur1cx0KJLoEWfSG+vm3jlAJGeOoLUnlOu260E+evWJR5zW/v3ozYiPTasnka0DDwMc7CciolmfSItugQ8igRtp5PFArN/c9uAz4/UuZmiKJtVVZ183HIStPV1rKDN6XQybtw4/vCHP1BcXEx3dzfz5s3jq1/9KnfddRevvPIKmzZtYuHChbS0tJCdnc3WrVtJTk5m8eLF/PSnP+X9998nMTERr9fLq6++yvnnn092dvYx2zQa1ov4Uv3+DrYurSJnejxpE6L6nFB57XZa/vpXuj/v/8qzXRNOhT4flW7OmxaJZUIewfl5aE2mAd/P19WFq7QUV0kJzpIS3LV1x2yfLiYa46FerLw8tGFhfZ5XfT46P/iQpt/9DndNDcbJk4h54AeEFE388j17e+n69DPsSxZj/3iFP4CzWDCOy0djCUNrsaANs6CxWNBawtCGWUCjwb54CfZly1DdbowTJxJ+/fVYrpiNxnj0Ca2vqwtXWVmgd8vTcox5Nz4f7gONuGtsqD2HfWErCrrYWLQRVjwNjXhbW/u8TGM2E5Sc7A+cjnHVXJ+cFFhnhoyMfoeHetxedq6qZfNHlbi6PIyJdpHl2UpPt8pG33l4VB0X3zCWrJmpAy/HQapPZf/WJrYtr+ZApT+40Ru0JGRYiIvwENlTjaHiC5rrnWzSXkAPRsb7NpKsVhy/bqBRSWC/JpdWokDRoFE9RKpNRKsNRKkNWGgfKD4eFJ/TiXP7dvB40CcnY5ldjHn2FQSPyw98HnqcHmw7Gqj4pJTaym4cqv/ESO9xoGh19Cr+YXcmtSPQrkj1AHpfD70HmmhpU2gNHUubNZv2sLGoGj2K6sXiaUbpcaJ6PH3apOh0KEFBdClmevX+9wrFTmKqkbSLckkaH0dwqN4/DNHm8AdlZa3U7evA6/ahaMBkUrB3+r8T9UEQH6clIUFHfIIWi0VBGxpCryWW+novtWX+wK6rw39yao4MPmaPS6/TS0udA1TQ6yBS00RY+XrC6rZjpgNDRgbu+jq8TX2Dc01oKPrkZAzp6Yf1TudxoMHDunf3U7+vA61eg9ftI3qMiYk5bkxfLMO+bDnelhYUoxHjhAkouuHpnWltbSUiImJY6h61fD7czc20tnhpNabSas2mPTwTnzYIRfUR5j1AlLeeKG8dVm8TmiMCRp/dTmejnS/Sv4Xdkkpq5UekVX6APjoKfVISmtDj94yea87J/UwMuaCxacQ9/PCAz0vQNoDjBm0f/QQadgztm8YVwJxfHrPIsYK2v/zlL6xatYrXXnst8Nj+/fuZNWsWNTU1fYI2gGnTpvG73/2O8847j5kzZ/Loo49y8cUXn3CzJWg7vVRVxV1VhTYiAq3F0ufxQ8NYUMHnU4lNszD9mnTiU4y0/f0NWv78Z7wdHRgnTkQxfHkC100oe3TjsWnHosWLV9GjdztIqVpKYt0qjMmJgROyoLQ0/1C9Xf5gpreiItAjpYuJISg1FTQDXGlVVdw2W5+heYf3/Oiioml99VV6ysow5OYSc/99hM6cecxhQL7eXro++wz70mX0lpfj7ewM/OB29ymrCQsj7Oq5WL/+dQyZmSex9o9NVVU8TU3+ZTysN83T1oo+Lr5PL1xQctJRAevJ8Hl9lK1vYON/K3C09ZCcF8G0q8cSk/LlvtHV3sPiP++gobyTotljmHp1Opp+hs2oqoqttI117+6nqdqONS6EjMmxJOdYiRlgkrrT0cuSF0uo3d1GwUWJnP/1zAHnYNTtbWfdv/cHhhgGRTu54IqJwzJHx9PWhmPFCjoXL6Fr3Tp/AJeUhHl2MYbMTBwrPsGxejWqy4U2KgrdpV/BnjmTA73hKBrFP08nJ4LQAYYHqT4fnuZm3DYb3RXV1O9po77eR1uPEcVgQDEEozEYUIL9vzkYlISY9cQaOzHv+RTviv/iaWpCMRgInXkBluJiFJ3Ov98c7Jlz1dTS0h1Ca3gWXaHxWDqriGgrw+SoQaP23zujMZnQJyejT0zCFZdBS/AYmnrCcHsHHqal+DyEdZRj2r6U0PoSdCFGf+/2FbMJveAC/zLgH67srq3t00Z3dTWuPXvw1NcH6gtKScGQl09b0mRqXNHEdJZhXvMmvtZWFKMR06yLsMy+AtOFM9GEhJzsZj6u0TBqYKSoqoq3tRV3TQ3OShsNe5upq/XQ2G2mXYkERYNW9RDhayTKV0+0rwGz2kZ7cDKbNDPwouOC6Roypo9Bn5jY78Ut4Xcu72fi9BntQdvoGxg9ipWUlDBp0qQ+j6Wnp+NwOOjs7OzzeHV1NS6Xi/HjxwdeW1RUdNraejbwtLbi6+pCn5R0wpN1vQ4H7ro6glJSAidDx6KqKj1lZXQuWYJ98RJ6KytBr8c0YwbmK67AeOEsPvuwntLP6hmTH8llt+VSsb2Zz/9TzrvPbSXSsZ+0sn8SXzSemPvuIzjPP2baae9l80dV7FhtQ0GhcFYiRVeksHLpZ/RWxLNPfy21uVeR7dtB1LaP6Pzww0CbdLGxBOfnY/nKlQTn52PMz0cXHd1v+/3zUVqp3d2OzqAlITmISHct6j5/D51r507sixcDoB8zhoRfP4NlzhyUgYK/w2iCgjBffDHmIy44qKqKr6ubpn1N1Oxq4YCtm4j0GELHxaBNtQxQW1/dnb3+OVZlbaBCYrb1mHONFEVBHxPjT2ZynM+Ty+GmYvMBaspa8bl9JOZYBz3/xOv1caCiMzC3p72xm5hUC5fekktSztFXe0PDDVxzfxFr3t7DliXVNNU4KP6f/D4T7xsrOln37n5qd7dhjgjm0ltzyTovrt/g7nBGUxBz753Aun/vZ9vyGpprHcz+9rg+66jZZmf9u+VU7WwhNCyIWTdlkzMjnjVrVpOYNfDwzFOhs1oJnzeP8Hnz8La3Y/94BZ1LFtP66mvg8aCNjiL82msxXzGbkEmTULTHn3dyOEWjCWzrkKIiooCCE6phBuqCB3Fu2ULn4iXYly7FsfzjwLPaqCiCkpIwFRUSkZzkH24bFwva8wes0WfvpNdmw22r9V80qCjHu3oVYb29DObygMZkwnTJxViuuJPQ88/v99ikCQnBkJnZ70UPT0tL4GKOq6QE5/at6D76kDRACQnBNOsizIcCNQkAhp2iKOgiI9FFRmIsLCQCODRbpqfbTe2e9oPzBC2UNiRSij/RS0+XB0u0kTl3FhARL71qQojBGb1B23F6xEarf/zjH6xevZqysjIWLlxIcPDRmbd27NjB/PnzsdvtPPHEE9xwww0j0NLRydPUROeyZdgXL6F70ybw+dBYLAcTQOQFhrDpx4wJBHJeux3XrtI+SSB6Kyv9Fep0GDIyAvOTjPn5GLKz0QQH+wO10lI6Fy+hc8li3FXVoNEQMvU8rPO/hdtWS+eSxTSv387OcW10mlMYN9bFjG+loAvVk2TfgW77QircKVSNvZJNk39CxqQYLJGpaI7I3pYzI54pX0kLZGIzRijMuXYKtt1trH93P9sqigifeQFTLo0hMbQNQ0oKuqioAdeTq8vdJ5tVe6N/no3RrMfT66NktffghO1CkmZdQtL/WkmOBF+9jeDc3FPKDtnZcjAxw8GkBU67v7fNFGGg6uM6tiyrRafXEJ8ZTlJ236QNvS4P9fsOTgYvbaOl1t+rbQjxH4pK1/p7EiISQv0ZtnIHl/4ZDk5i3/flRPOmGjuoEBSsRaPTBCbyW+NCAtnxDiV5UFWV1rquQFKIuj3tuHv86zA21cKcOwuOGgp7JK1ew6ybcohJsbDqrd3888mNzLmzAI1Ww4b3yynf2oTRrGfmDZnkX5B4Qj1fGq2G86/LJCbFworXSvnnExu54rsFGM16Nrxfwd6NjRhCdEy/Np3xs5IGNTF/KGnDwwmfdy3h867F29FBr81GcE7OCQdqQ03RaAiZPJmQyZOJffghXLtKUYL0BCUlDVnvk+rz4Wlqxm2rwedyDVhOExRE8IQJfeZgnihdZCSmmTMxzZwZeMzT2kpvVRXBublo+vm+ESPDEKJnbGE0Ywv9F9oOJQGqKW1FH6Rl+rUZo+I2A0KIM4ccMU5AXl4eq1ev7vNYeXk5JpMJy8FhdDfccAMLFy5k06ZNFBcXM3fuXOLi4sjPz2fLli1cfPHFFBQUsG3bNu6++26cTmd/b3VO8TQ10bl06ZeBmqoSNHYsUXd+F11sXODKcttrf6P14HA8jdmMITvLP1SuqjpQlzY+gZ686bRPu5VufTgpSgW6/ZtwfLyCjnf+dbCQFkN6Oj6XC3d1NWi1hE49j8jb/wfz5ZehO2zcvOdr32b1H7bhcXkorF1ExMpP2Pc3Pfq4ONw1NRgyM5jxw2u5aNpMti+vYdvHNezf2kRQsJaebs9xs7clZVuZ96NJVGxvZv27+1n2RiWRSSbCouqB+n5f42hz0VRt9yd5MGhJzApn3IWJgRTCPp/KgUr7wQxcrWxbVs2WJVVo9RrixlowrC07qe10KLDpOJg8IcQSRHJeRCB9sckaTK/Tf++cQ4HTun/vZ92/9xMcqscSbaS5xo7P60/THJ8RxrRrxvqDumT/HKTmGrs/cCptZeeaWravOJj+Oc1yzAyKri43jRWdeD0+NFqFuLFhTL0qzZ8uOsWMoii01DkCWeIOT/UdPcaMva0HZ6d/blJ4bAjZ0+L8QV2W9ZhpqvuTd0ECEYmhLP7jDhb9cjM+rw+dQct5V6Ux4dLkU8r8ljklFmt8CB/9cQf//vUWUEGjU5h0RQoTi8ccM8vf6aINC8M4BMNSh5qi0WAclz8s9epjY0YsA6ouIqLPMUuMTiargZxp8eRMix/ppgghzlAStJ2Am266iSeeeILly5dz2WWX4XQ6uffee/nRj350VNnJkyczf/58fvvb3/Lkk0/y0EMP8eCDD/Lee++RlJQEcE4HbO4DB7AvXYZ98WK6N2/2B2oZ6UR973tYrpjd79AgtbeXnn37DmYfLKFn9x4MmVlor7yeVlM6jd1m6iq66On2QIM/a92+3izSzzuf836RhlntDLzWVbILFIi8438wX345uiMy/Kmqyo6VNj775z7/MJYfFWCNuwzXzp10Ll6Ma2cJUXd9j7Crrgr0JkydO5aCWUlsXlxJV3sPE4tTiB3EMEFFURhbGE3q+Ch2r2+gZE0tHU3dA5Y3hOiZfGWq/2adqZaj5jdptQrx6WHEp4cx5StpfW5CWr+vPZBW/GRY40IomJVEUq6ViPij7zETZNSROj6K1PH+XsLDb0La0eyk8LJkknIiiE/vP01zTIqFmBQLRbNT8Li9NOzvoKasjbo97cdcJ1q9loJZiSTlRpCQEd7vPamiksxEJZmZePmYPqm+6/a2H+wR9M+xGor7UsWlhfH1h6ew+q09mCODmXRFCkbT0KTtj0oy8/WHprD6zd0Eh+qZdGXqqL53kxBCCCFOnQRtJ8BoNPLee+9xzz33cNddd+H1epk/fz533313v+V//OMfU1RUxMMPP8yVV15JU1MTc+bMwev1Eh4ezrhx45g9e/ZpXoqR4248gH3pUjqXLMa5ecugArXDKUFB6DKy6TEm0hw9nQNpndTubsde6gJ8mKwu0gqjST447E0fpA0MUSzf1nRwiOJMYi6/fMD3OJSSe+/GRsq3NZE6PorLbssLDGMxFhRgLBh4Zk2IJYiZ12ed1PrRaBRyZ8STO2Nor8QGBetILYgitWDg4ZbDJTTMQPbUOLKnxp3wa3V67cFhjEPfi6DVaUjItJKQOTzzvcC/7HO+e2KzsAYrOFRP8R3jhqVuIYQQQow+ErQdob6+/yFphxQUFLBy5cp+n7v11lu59dZbA/8nJCTQ0NAQ+P+WW27hlltuGYpmnjHcjQewL1lM5+IlOLduBVXFkJlB1F13+QO1jIwBX+v1+GiqtnOgyk5TdSdN1XZa6/03SwUINulJyAynaPYY/32S+rlB5nlXHez9OpgMZM+GRsbNSgz0fAw0D0of7B/ONnlO6infQFcIIYQQQohTIUGbGDb2lSupvff7qL29GLKyiLrnbiyzZ2NITz/ua9sauvjwDzv6JNiIHmMmdXwUMWMsRKeYMVkNg8oqaTQHccH1mYy/NImN/63gi49r2PVpHdHJ5qPmQZ331TSSc/3zoDT9pF8XQgghhBDidJOgrR+HsjsezmAwsGHDhhFq0ZnH/skn2O79PsGZmSQ88ysMY8cO+rXl25pY/soudHoNl9+eR3xG+KADtGOxRBq59JY8Ci8fw8b/VNDR7GTcrESScyKIzwg7pQQRQgghhBBCDBc5S+3HoeyO4uTYV3yC7fvfJzgrizF//cugb3Cs+lQ+/28Fmz6sJCbFzBXfLRiSpBBHikwwccUwzTUSQgghhBBiqEnQJoaUfcUKbN+/j+DsbMb89S9ozOZBva4ICDigAAAgAElEQVSn282yl3dRtaOFnBnxXHRjFjr9yN7jSQghhBBCiNFAgjYxZOwff4ztvvsJzslhzF9ewu7SsejnawgxBx11M+PDtdQ5+OgPO7C3uLjoxizyL0w85aGQQgghhBBCnC0kaBNDIhCw5eYy5qUXUUxmPv6j/+a/5sjgL29mrFGISTGTnOsP4ro7e1nxtzL0Bi1XPzCRhIzwkV4UIYQQQgghRhUJ2o4QHh5OQUEBqqqi1WpZuHAhM2bM6LdsZWUlX/3qV9m5c2efx1VV5fHHH+fVV19FURQSExNZuHAh+fn5ADgcDn7wgx+wfPlywsPDMZvNPPXUU0ydOnXYl284dC5bRu39DxCcn8eYl15CazazZUkVDeUdXH57HlnnxeF1+2io6MBW1kZNaSubP6pk04eVAMSmWbjiOwWYrHKDYCGEEEIIIY4kQdsRjEZjIAnJkiVLeOihh1i1atUJ1fHCCy+wdu1atm/fTkhICEuXLmXu3LmUlJQQHBzMHXfcQVpaGnv37kWj0VBRUcGuXbuGY3GGnf3jj6m9/wGM+fkkv/QiWrOZZpuDDe+Xk14UTeaUWAC0eg2JWVYSs6xMnTuWHqeH2t1tOO295EyLR6uX9PpCCCGEEEL0R4K2Y+js7MRqtZ7w65566ilWrVpFSEgIAMXFxcyYMYO///3vzJo1iw0bNvD3v/8djcYfqKSlpZGWljakbT8dvJ2d1D38U4Jzckj+y0toTSa8bh/LX96FIVTPRd/MHnBumsGoY2xh9GlusRBCCCGEEGeeURu0PfX5U5S1lg1pnTkROfz4vB8fs4zT6aSwsBCXy0V9fT0rVqw4offo7Oykq6uLsUfcl2zy5MmUlJQQHR1NYWEhWu2Znxmx5aW/4OvsJP4Xj6E1mQD4/IMKWmodXPm98RhNQSPcQiGEEEIIIc58ozZoGymHD49ct24dN998Mzt37pRshkdwNzbS+tprWK76KsE5OQA0lHewdUkVuTPiSRsfNcItFEIIIYQQ4uwwaoO24/WInQ7Tp0+nubmZpqYmYmJiBvUai8VCaGgo5eXlfXrbNm/ezEUXXUR+fj7bt2/H6/We0b1tzQtfQPV6ib73XgDcPV6Wv7ILkzWYC76eOcKtE0IIIYQQ4uwh2R+OoaysDK/XS2Rk5Am97oc//CH33nsvTqcTgOXLl/Ppp5/yzW9+k/T0dCZPnsyCBQtQVRXwZ6H84IMPhrz9w6WnvJz2d97BeuM3CEpKAmDdv/bRccDJpbfkEmQctdcChBBCCCGEOOPI2fURDs1pA3/q/ldfffWYPWK7d+8m6WDgAvDcc89xzz330NbWRkFBAVqtlri4ON577z2MRiMAL730Ej/4wQ/IyMjAaDQSFRXFr371q+FdsCHU9NxzaIxGou68E4CaXa3sWFXLhEuSScw+8cQtQgghhBBCiIFJ0HaE9vZ2zGbzoMqmpqbidrv7fW7BggUsWLCg3+csFgsvvvjiSbdxJDm3bcO+bDnR378XXUQEPd1uPn6tFGtcCNOuGXv8CoQQQgghhBAnRIZHikFTVZXGZ55BGxVFxC23APDp23vp7uzlstvy0AWduXP0hBBCCCGEGK2kp20QduzYwfz58/s8ZjAY2LBhwwi1aGQ4Vq3CuWkzcQv+H5qQEGpKWylb38CkOSnEpFhGunlCCCGEEEKclSRoG4SCgoLAbQDOVarXS9Ovn0WfMobw667D0+tl5Ru7CYs2MvnK1JFunhBCCCGEEGctGR4pBqXj/f/Qs3cvMfffj6LXs+nDSjqbnFx0UzY6vQyLFEIIIYQQYrhI0CaOy9fTQ9PzvyN43DjMs2fTUudg69JqsqfFkZwTMdLNE0IIIYQQ4qwmQZs4rrY33sRTV0/Mgz8AFVa+vpsgo47zr8sY6aYJIYQQQghx1htU0KYoyhWKouxWFGWfoig/6ef5FEVRPlYU5QtFUVYqipLUXz3izOPt7KTlj38k9IILCJ02jZJP62go7+D86zIwmoJGunlCCCGEEEKc9Y4btCmKogVeAOYAecCNiqLkHVHsGeA1VVXHA48CTw51Q0+X8PBwCgsLmTBhAkVFRaxdu3bAspWVlRiNRgoLC8nLy+POO+/E5/P1efzQz2uvvQb47+1WUFDA+PHjueiii6iqqjpdi3ZSDjz3HN6ODmIeuJ+ujh7W/Xs/idnhZE+LG+mmCSGEEEIIcU4YTPbI84B9qqqWAyiK8hZwNbDrsDJ5wAMH//4EeHcoG3k6GY3GQKbIJUuW8NBDD7Fq1aoBy6enp7Nt2zY8Hg+XXHIJ7777LkVFRYHH+/PJJ58QFRXFggUL+MUvfjFqb7RtX7GC9jffIuK22wjOy2PJSzvxun3M+mYOiqKMdPOEEEIIIYQ4JwwmaEsEag773wZMPaLMduBa4LfA1wCzoiiRqqq2HF5IUZTvAN8BiI2NZeXKlX0qCQsLw263A9D+62fp3bNn0AsyGEFZWYT/4IHjljvUhsbGRsxmc+D/IzkcDnw+X+D5yZMnU1JSQlZWVp/HD6eqKg6HA4PBwIQJE1i3bt2A9R/icrmOWlfDTdPeTuRjv8CbnExp0UQ+f+MTqjepRI9T2Fb6OZSe1uacdRwOx2nfpuLcI/uZGG6yj4nTQfYzcTqM9v1sqO7T9iCwUFGUW4HVQC3gPbKQqqp/Bv4MMHnyZHXWrFl9ni8tLcVsNgPQFaTHpx3aVPL6IH2g/oE4nU5mzpyJy+Wivr6eFStWDPgak8mERqPBbDbT3d3NmjVrePTRRzGZTFRUVDBz5sxA2eeff56ZM2eiKAomkwmz2czq1au57rrrjtum4OBgJk6ceOILfJJUn4+aO+6g2+cj409/QpM4hjd/vgFrnIZ53z0PrV7y15yqlStXcuT+L8RQk/1MDDfZx8TpIPuZOB1G+342mKCtFkg+7P+kg48FqKpah7+nDUVRTMA8VVXbT6VhcQ8/fCovP2mHD49ct24dN998Mzt37hxwOOD+/fspLCxEURSuvvpq5syZQ2Vl5TGHR1588cW0trZiMpl47LHHhm1ZTlbry6/QtXYdcY/+HMPYNNa+sw97q4uv/aBIAjYhhBBCCCFOs8GcgW8EMhVFSVMUJQj4BvD+4QUURYlSFOVQXQ8Bfx3aZo6M6dOn09zcTFNT04BlDgVnW7du5ZFHHhlUvZ988glVVVUUFhayYMGCIWrt0HCWlHDgN7/BfPnlhH/96zRV29n2cQ1558eTkBk+0s0TQgghhBDinHPcoE1VVQ9wN7AE/0ymt1VVLVEU5VFFUeYeLDYL2K0oyh4gFnh8mNp7WpWVleH1eomMjBzyunU6Hb/5zW947bXXaG1tHfL6T4avu5u6HzyILiKC+McepaPJyX8XbifEEsT0a+WebEIIIYQQQoyEQc1pU1X1Q+DDIx77f4f9vQhYNLRNGxlOp5PCwkLAnzTk1VdfRXsSc+sODZs85Pbbb+fee+/tUyY+Pp4bb7yRF154gf/7v/87tYYPgcYnn6S3qooxL79Mty+Y93+zBZ9P5Wv3TyQ4VD/SzTutVFWVDJlCCCGEEGJUGKpEJGeN9vb24yYGOSQ1NZWdO3f2+7jT6ez3NZWVlX3+f/7550+4jcOhc8lS2v+5iMjvfAclbyLv/3oLPd1urnmgiIj40JFu3mn1We1nPLLuEa7Pup5vj//2SDdHCCGEEEKc4ySrhMBdX0/9//t/BBcUYPmfO/nP89twtLr4yt0TiB4zuAD2bOD2unl207PcufxOWp2t/H7776noqBjpZgkhhBBCiHOcBG2DsGPHDgoLC/v8TJ165K3qzkyqz0fdj36M6nYT88RTfPCnXbTWdTHnzgISMkZv4pF2VzuL9ixiceViajprUFX1lOqr6azh5o9u5uWSl7k+63re/9r7BGuDeXLDk6dctxBCCCGEEKdChkcOQkFBwYDp+890rpJddG/cSNRPf8bHH7bTWNHB7G+PY0z+0CdfOdKbZW/y0hcvMS1hGrNTZzM9fjp67cBz51RVZVPjJv65558sr1qO2+cOPGcOMpMXkUde5Jc/yebkQc1L+6D8Ax5b/xgaRcOzs57l8pTLAbh74t388vNfsqxqGcWpxae+wCPE5XHxWd1nLK1cyubGzVwy5hK+M/47RBmjRrppQgghhBBiECRoO8e5SnbiUzR83pxJze42Lr0ll/SimGF/39d3vc5TG58i25rNJ9Wf8P7+9zHrzVw85mKKU4qZnjCdIG0QAK2uVt7f9z7v7H2Hys5KzHoz12Vdx9cyvoYPH7tadgV+Xi99PRDMmfVmciNzjwrkNAfvTtHt7uaJDU/w3v73mBgzkV/O/CUJpoRAG2/IvoF3973L0xuf5oLECwjRhwz7ehkqLo+LT2s/ZWnVUlbVrKLb0024IZyCqALe3v027+57l2/lfovbxt2GOejcGQIrhBBCCHEmkqDtHNe9Yye7x91G/W4HM2/IJGd6/LC/5992/Y2nNz7NZWMu4+mLngYV1tWvY0nlEj6p8QdwJr2JWcmz8Pg8LK9ejsfnYWLMRL49/ttcnnI5Rp0xUF9+ZH7gb7fXzb72fX0CuTdK36DX1wuASW8iNzKX3IhcVttWU9VZxXfGf4f/nfC/6DR9Pw46jY6fTv0p8z+azx+/+CMPTHrguMumqirPbXmOpZVLiQ+NJ8mcRJIpyf/74N8RwRFDtCb9fKqPZmczNruNans1n9V+xirbKpweJ1aDlTlpcyhOLWZK3BT0Gj1VnVW8sPUFXtzxIm/veZs7xt3BN3K+QbAueEjbNRiqqlLrqKW0tZTYkFjGR48/7W0YjXq8PWw7sA2b3YY5yIzFYMESdPDHYMGkNwUuPggxHFRVpbKzkpKWEopiivpc0BoK3e5uPq7+mMbuRjp7OunsPfjT00lHbwedPZ0oisLMxJkUpxZTFFOEVnPimZxHE6fHydYDW9nXtu/L5T24zIf+tvfa0Wl0X37eD37mD/0dbggn3uT/bkk0JWLQGkZ6sQbk9rlpcDRQY6+hobsBq8EaaPeZdBFUiNFCgrZzXM2+LuojL2HKV1IZf3HysL/fayWv8atNv+LylMt56sKn0Gv8wyEvTLqQC5MuxO11s65+HUsrl7KiZgUKCt/I/gbzMueRYT3+veL0Wr0/KIvMZR7zAP8Xx/72/X0CubfK3iI8OJyXil/ivPjzBqyvMKaQazKu4W8lf+Oa9GsYGz52wLKqqvL4hsf5x+5/MC1+Gr3eXtbWruWA80CfckadEati5Z0V7wQCumRzMkmmJBJMCYHgye1zB77M7b32wJd7i6sFm92GzWHDZrdR66ilx9sTqN9qsPKVsV+hOMUfqB0ZjKZYUnj6oqe5bdxt/Hbrb/n15l/zt9K/8b0J3+PqjKuPKj8YLo+L1bbV+FTfUScZ5iAzWo0WVVWxOWx9tsOull109nYG6pmTNocHJz9ITMjgens7ejr4686/sqJ6BSmWlD69qseq4/BAt6GrgUmxk4gNjT3h5e6PqqrsadvDsqplfFLzCQatoU+70sPTA/v94e0pay1jff161tetZ8uBLX226ZE0igaT3kSyOblPb3JWeNYxhxgLcSzNzubAPri+fj2N3Y0AKCicn3g+12Vdx4VJFx61/56Idlc7b5a9yRtlb9De0w5AkCaozzEjJiSGjPAMut3dvLvvXd7a/RZRxiguHXMps1NnDxjA+VQfB7oPYLPbqOuqw+11H1VmqEQERxw3APH6vOxq2eVfp/Xr2Xpga2AkiIKCKcjUJzCLCYnBEmTxH/sPHu+r7dV0tvi/A5yeo7NSx4TE9LkwGGWMQjMC6QpUVNp72v3fTQe/n+q76vGpvn7LRwZH9rmYGRMSg1YZOCjfbd9Ny56W4Wq+OEdYg61cMuaSkW7GSVNGKsnC5MmT1U2bNvV5rLS0lNzc3BFpzyF2u33QKf9Pl+FaL76eHt689RV6IlO45bfFaLXDe6B/teRVntn0zFEB20A8Pg/ASQURx+P2udGgGdSV2xZnC1e9exV5EXm8WPxiv/PkfKqPJzY8wT92/4Pbxt3G/UX3B8q5PC7qHHXYHDZq7DXY7Da2V27HaXBS66g96ovYarDi8rr6/YI+JFQfGgj0Du/NSzQlkmROOqF1trFhI7/Z8hu+aPqCMeYxXJ99PXPT52INth73tR09Hby9+21eL32dVtfAN4k36U0AONwOwL9NM8MzA8FGbkQuq2tX89cdf0Wn0XHnhDv5Vu63BgxAut3dvFH2Bn/d8VccbgfT4qfR2N1IRUcFKv5jWpQxKlC/WW+m1lE7YKBr0pv44ZQf8rWMr53U/fkOBWpLKpewrGoZlZ2VaBQNk2InAVDaUhpY9iBNEFnWrMBw3R3NO/i84fPACWxGeAbT4qcxPWE6GeEZONyOo3oiOns76ejpoKKjgl2tu7D32o9arzkROVTtqyInJ6ffNhu0hsD+Em4IPyPuS2iz29h6YOuAJ4IA8aHxFMUWDctx40x3+IWgw3u1SppLWF+/nn3t+wAIM4QxNW4q0xKmkRuRyyrbKv61918c6D5AtDGaazKu4drMa0kyJ7Fy5UpmzZp13Peuc9Tx2q7X+Nfef+H0OLko6SJuzb+VcVHjjtnL3+3uZnXtapZWLmWNbQ0ur4vI4EguS7mMZHMytY7awHG1zlEXGFVxOh0ZgJiDzGw9sJXPGz4PfDazrdlMi5/GtIRpjIsch8VgOeHecrfXTVtPG3WOOv8yH3Y8s9ltgSB7JB25Lg79jguN+zKoO9juwQR3Qgyl8VHj+ftX/j7g84M9ng01RVE2q6o6+bjlJGjry2Qy4XA4+n2usrKS3NxcsrOz6e3t5cILL+T3v/891dXVgccPeeCBB7j55ptJTU3FbDajKApWq5XXXnuNlJSUE2rTcK2X6mWb+c87HUwa52Xa3ZcPef2He2XnK/x6868pTinmlxf+8pSu1I6Et8re4vENj/P0hU8zJ21On+d8qo9frP8F/9zzT24fdzv3Fd133BPgQwcGVVWP6jlr7G7EqDP2OzTGYrAQYYggzBA2pCfZqqqyomYFr+x8hW1N29Br9Fw25jKuy7qOKXFTjnqvxq5G/rbrb/xzzz/p9nRzfuL53JJ3C9HG6AGH/Xh8HrKsWeRH5ZMZnhmYs3i4ms4ant74NCttK0kLS+PhqQ8zLX5a4Hm31807e9/hT1/8iWZnM7OSZnFP0T1kWbMA/wleWWtZoBevtLWU8o5yfKqPUH3oUUFukjkJk97Eb7f8lk2Nm5iRMINHpj9CvGlww4T3t+/ng/IPWFq1lKrOKjSKhilxUyhOKebSMZcSafQn9PGpPmrsNX16GEtbSrG77cSGxAZO5qbGTSU6JPqEt53NbqOktaRP3Yf3YB5PqD7UH8Adtl4sQZYBy2sVLRNjJg5Z7+Sx1NhrWFq5lKVVS9nVsmtQr7EarFyacimXp1zOeXHnHTOAq3fUB3pC9rTtIdoYffRJ53HWx7F4fB7KO8qpsdeQakkl1ZJ6Wob5qarKlgNbWLRnEZsaN9HR0zHghSCD1kBRTBHTEqYxLX4aORE5RwUUHp+HT2s/ZdGeRaypXYOqqkxPmE5aTxozJ87sc6wyB5kD63xP2x5e3vkyH1V8hILClWOv5Nb8W8m0Zp7wMnW7u1lTu8YfwNWuwelxBnqcjwwSDh+1MNRUVfX31DtsRwUhDd0N+FQfCaEJgfV5Xtx5gWPBcOrx9tDmahv29xmIJchyUsMe3T43rc7WwAW3/qxbt47p06efSvOEQK/RH/OzKEHbAM7UoO2rX/0qO3fuxOPxcMkll3DfffdRVFQUePxIqampbNq0iaioKBYsWEBdXR0vvvjiCbVpuNbLf/7vQ+rqfXzr4QmEjh2+oZEv73yZZzc/y+zU2fxy5i/PyCvgXp+XGz+4kWZnM//52n8I1ftvOO5TfTy2/jEW7VnEHQV3cO/EewcVTI3UgWEw9rXt45297/D+/vfp7O1kjHkM87LmcXX61XT0dPByycv8t/y/qKrK7NTZ3D7udrIjso9f8QlYbVvNkxuexOawUZxSzIOTH2TLgS0s3LoQm8NGUUwR9026j4kxE49bV7e7m15v7zEDXZ/q4+3db/Ps5mfRKBoenPwg8zLn9Vve5XGxtGopi/YsYuuBrWgVrT9QS/UHaoOds+hTfbT3tGM1WIe8l0tVVQ50H2DN2jVMmzat3zJOjzNwhf7IK/Yur2tQ71MYXUhxajGXp1xOXGjckLW/urOapVVLWVq5lNLWUgAKogooTinmgsQLBjwZV1HZ3bqbpZVLWWlbidPjJNwQzqVjLvUPF46fQre7m40NGwOBWlVnFeDvIciPyqfV2YrNYQv0fB5iCbL0O0c1yezvRdBr9Hh8nr5DsVt3sad1T5/1adQZyYnICfQw50XmkRaWNmTHxXZXO+/vf59FexdR0VGBSW9iZtJMooxRA14IOtG5UQ1dDfx77795Z+87A/bumPQmTEEmGroaMOqMzMucx815Nw/6gsjxuDwuerw9WIIso6qX2O31D22MCI4YVe06043m70xx9pCgbQDHC9rWvL2H5pr+g6eTFZVsYub1WccsM9igDeAnP/kJERERXH/99YMK2hYvXszvfvc7PvzwwxNq93AEbR1NTl7/v7WkNq7myn89MuRfLk6Pk1p7LUuqlvDH7X/kitQreHLmk2dkwHbIF01fcNOHN3FL3i08OOVBfKqPR9c9yjt73+HbBd/mnon3DHo9nglfQC6Pi2VVy1i0ZxFbDmxBp+jwqB6CtcFcm3ktN+ffTKIpcdjev8fbw8s7X+alHS/R6+1FRSXbms33i77PBYkXDMsJkc1uY8HaBXze8DnT4qfx8xk/DyRg2Nu2l0V7FvGf8v9g77WTYknhuszruCr9qtNyFf1knMx+dqgXocvdNWAZp8cZ6PHY3bYbgAnREyhOKaY4tfiEArgebw972/YGAp0vmr9gb9tewD+U5VBQeKKJMFweF5/VfsaSqiWBDKomvYkudxcqKkadkSlxU/y9nPHTyAjP6LNP2XvtXwa1Rwa2Dltg+Db4ex5jQmJodbUGht2G6EICwVleZB4plhQqOysDy1nWWhbo+QrWBpMRnvFlj9FhAWFsSOxxe+YO3Q5l0Z5FLKtahtvnZnz0eK7LvI7ZqbOHLemD1+fl7eVvk1OY0+8Q3s7eTlItqVyffT1hhrBhaYM4N5wJ35nizDfag7Yz9wx6hHV3d/Pxxx/z6KOPArB//34KCwsDzz///PPMnDmzz2sWL17MNddcc1rbOZDty6tRVB+ZEa3HPPl1eVxsatw04HhzVT04+fiIMerNzuZAmTmpc3hi5hNndMAGMD56PPMy5/F66evMzZjLG6VvnFTAdqYI1gVzVfpVXJV+FeXt5by3/z1CdCFcn339oOa7nSqD1sCdE+7kqvSreLP0TXIjc5mTNmdYsyYmmZN4sfhFFu1ZxK83/Zqvvfc1vpn7TTY2bGR703b/sNGUy/h61teZHDv5rNvmAIqiEB0STTTHHqaZG5nLd8Z/h8qOSpZVLWNp1VJ+telX/GrTr8i0ZhIVHHVUj86hv1tdrYHhq/va9uFR/QFQmCGMvIg8rp58NcUpxafUKxOsC+bSlEu5NOVSXB4Xa+vWstq2mtiQWKbGT6UguuCYw7TNQWZyInLIiTh6TqDX56XJ2RSYS2Vz+IO5yODIPkHakfvq+OjxzE2fG6jj8CBuX/s+drbsZFnVssD6AP88xYTQhEDvfn86ejqo66rDrDfz9ayvMy9rXmDI8HDSarTEB8UPqsdbCCHEqRm1PW0jZbBz2hRF4eqrr+aRRx45qgfucIfmtLW2tmIymdi0adMJJzoZ6vXidPTy2kNria5ZywUXhRBz/30Dln1287O8vPPl49apoBAXGnfUvIJkczL5UflnTXryNlcbV717FV6fF4fbwXfHf5e7Cu864ZN3uWo4+tU56liwdgHr69eTaknluqzrBp2gZbQ43ftZVWcVy6qWsaVxSyBt++FzGg8Xbgjvk1UzLzKPhNCEszIQPhEen4fG7sajeviOlZhIr9Fz8ZiLj7odyukgxzJxOsh+Jk4H6Wk7y6Snp7Nt27YTes0nn3xCeHg4N910EwsWLODZZ58dptYNzs5VtXjcPsZULyM4/6EBy7k8Lv61919cmHQh/zvhfwcsZ9KbSDAl9JtY4mxjDbZyf9H9PLLuEe6ccCffm/C9c/4k82yVYErgz5f/mcbuRmJDYmU7D0KKJYU7Cu6Agr6Pq6qK0+MMZL00B5mJD42XddoPnUZHoimRRFMiU+OnjnRzhBBCjBIStJ0mOp2O3/zmNxQUFPCzn/2MiIihvcHyYHl6vexYaSMhwkVodwPGceMGLPtRxUd09HQE0jILv3lZ8zg/8Xw5kT8HKIoypAk2zlWKohCiDyFEHyLrUwghhDgJZ8eYtVHg0Jy2Qz+/+93vjioTHx/PjTfeyAsvvDACLfQrW9+A0+5mrLsErdWKLr7/OSOqqvLW7rfICM9gcuxxe2zPOXGhcRKwCSGEEEKI00J62o5QX18/4HOpqakDzltzOvufb1BZWdnn/+eff/6U2ncqfD6VbcuqiUkxY1r/Gfpx4wYMPHY072BXyy5+OvWnEpwIIYQQQggxgqSn7RxSsb2JjiYnE2bF07tvH8H5eQOWfavsLUL1oVyVftVpbKEQQgghhBDiSNLT1o8dO3Ywf/78Po8ZDAY2bNgwQi06daqqsnVpNZaoYBINTVR7vQPOZ2t1tbK4cjHzMucdM820EEIIIYQQYvhJ0NaPgoKCE84QOdrV7++gsaKTmTdk0bNrFQDB+fn9lv3X3n/h9rm5MefG09lEIYQQQgghRD9keOQ5YuvSagyhOnJnxOMqKUEbGYku7ugsbl6fl7d3v83UuKmMDR87Ai0VQgghzlFuF3h6R7oVQohRSHrazgFtDV1UftHM5CtT0Ru0uHbuJDg/r98EI6tsqygzb0MAACAASURBVKjvqudHU340Ai0VQggxKnQ1Q8UqsG2GsCRIKIS48WAwnXrdqgp1W2HXuxAaDfGFED8egsMOK6LS6/Vh0GlP/f3OBF43rP8DrHoKvL0Qk+df5/GF/t8xeaAzjHQrhRAj6P+zd9/xUVX5/8dfd1pm0ishvVKTUCO9I1Ysq66K3V1Xt7q7lv26VXddt6tb9KerrmVXEMEGKgIqAtJCDYRQkpCEhCSkkJ6ZybT7++OGEghkEhISwuf5eMwjZOaWM5lJuO8553yOhLZLQPaXpegNOjJmxeKx2Wg9dIiAeZd3uO3iA4uJ9I1kVtysC9tIIYTobQ4r6PQXx8WvqkJdMVRkw9EcCIyBEdeB/6DeOZ/DCiWboHCtdjuao92vN2khAgAFwodA1OgTYULvsnp/Dnsj5CyFHW/C0T2gM4DHdeJhNTSFhuCR7HIlsKxqEGsbY5gzOpWHZqYwbHBAzz3P/NXa+ZNmQsJU0PfxpVDhOljxONQchCFXwKARUJ4NuR9qPysAnREiR0L0WEicrrXdP6JPmy2EuLAktF0Cyg7WkZAehm+gCeuuXeDxdDifrbihmM0Vm/nhmB9i0MlbQwjRTW5X318I2xuhYrcWesqzta/HDgEqGCxgCQZz8ClfQyAwGiZ9D/zCe64dqgqlWZC3EvQ+HZy37avTelp7d4O9XjuGogPVAyse00LGyBtgxPUQENn9drld2rkKv9JCQ2mWFs70JoibCHN+DcmztYBmPdb+51i8UQtfwHSA/akne4Sixmj7mANPPv+yHbDjDdj7gfY8IzPg2mch45uoLgdFORspzd2EUrGb5GObma18zGzAbdKTtW8kb+6ZgDX5au6cM57LEkO6vgyNowXyVsG+ZVpgc7YFza+fBd9wLQyn3QgJ0y7s+7ahDM+qX6Lb9yF2/ziyxv+LraYJDA6wMO3acBJDLSj1h9v/7PeeEuQi0yF5lnZLmAImKRwmxEAmV+aniYqKorm5ucPHiouLmT9//hlrtamqyjPPPMNbb72FoijExMTwwgsvkNYWjJqbm3n00Uf54osvCA4OJiAggD//+c9MnDix158PgMPmwhxgBMC+NxfouAjJuwffxaAzcPPQmy9Iu4QQA0TLMW0o3fFemsYyiL3s5AVlzHjQG3u3DaqqDbfb/7F2gVt76ORjgTFamMj4ptbTZqvXApGtHuwN0HAEKvdCY7l2QXzt3yDtG+fXHmst7F6sHa/mICh6UN2d76czQmSaFiJOHRp37JD2/HI/0sLbise1AJd2Y1sPXCScK8yoKtTkn3ydir6G1gbtscEZMPG72msVPxlMvid2a3W5aVCDaAiaQoPpMhoinTQMc9JafxRLTQ6G0s2MMVQRUbgBn73vnTxfaIoW3mrytJ+t0Q9P+i1UDVlAgWEIJXU2Cj4v4/P9RymtNWLQzWJK6i1cmzGYKxINhDTsQ1+8gYl7P2JK/X9wlb7BltdH8O/g2QyfdQczxo5Epzv5fF1uDxUNdkprrZTUWqmsqSH+2AZG1K4hpWETRk8rzcZQDoZcxYHQuVT4DSepIYsRtWtI3bUY0443aDGEkBcyk/2hczgcOA5V6dolktnVSHzjTgZb82g2hlHnE0O9OZoGUxQe3cn3v9VqJa3kbW5oXIROdfOS62ZerrmO1hoTOuUQHlXbLibYwtTUMKamTmDKpGuICPABj7stbK/Vbltfgc0vaO+buAknf+eix/X9Byeqir3qEEU5G2gs2Ys5PJ7E9KkEJYzq2b8Hble7HtszKDowmHrufMepqvZBh6qefRu9UfubI0QPkNDWA1588UU2bdrE7t278fX1ZfXq1Vx//fXk5uZiNpt54IEHSEpKIj8/H51OR1FREfv27btg7XPY3PiYtZfanpuLPjwcQ2T7T2itTisfFXzEFQlXEG7pwU+ZRY8qqGomp6yeG8fEyKLnovtKsiB/1bkvNgzms/cKGXygbPuZQ+l8giBpuhYiDm+CtX+CtX8Ekz8kTtOGdCXP0oZ/9eT7t3IffPYzKP4aAqK0kDh6wcmeH2+HkVXug2Xfh6X3aUPTrnm2a0PQVFV73jve1Hp13K0QkwnXvwDpN2k9ba2NYKs7JTS2fdUZtJAzaGTHF5iRI7Xb7F9A1X4tvO07HuAe03rIOnqtLMHQ2qT1pjWVa8cKTtDCXvIsSJpxRs+ix6OyufAYi7aWsDr3KE732d4ng1C4geOPhtHAaH0xU31LGW0rJiVvIy2GYFYFP8yS1okUZOlwb64DtgJg0uuYnBLGj2YPYd7ISEL8TnneEdGQejn6uU/C0RzUnA8Zlf0e05pewL38RbJXZGALGYbHWo/S2oDJ2UggLSQpLYyhBV+lFYBqNYil6kxWqpPY4R6Bp0IHFQA1QAqQgpl7mE42V3q2MKvqM8ZWf0SL6sMBEslVk9lHEvvUJAqJwc3JC3ATDsaSx2Qlh0nKXtI5hF4582flURWOEsoRIilVBzFel0ci5eT4T2XLkMeIiR7Cm6G+xIf5MjjQTGmtlQ0FNWwsqGFVbiVLth8BYPjgAKakhDMmPoqMEd8lYeoj6Fw2KN2ivb6Fa+GrP8BXz4BPoPY7lzxLu4UP7fx3TlW7/3upqlBbiKc8m+q8LJylOwlu2I+/2syI49uUADvBgZEavyEQNYbwoRMwxY3T3vdeBjmPR6XqWA3Nuz/Gkv8xkdUbMXhaz7mPU+eDXR+I3RCA3RBAqz7gxL9dOjMqHT9vBRWT24rZ1YSPuxGzq0m7tf1br54jLLZp1fueOHerIaBdO9xKL3+YJdrRBccy8daLt2ZDvw1tX735ClWHC3v0mIMSkpl934M9ekyAP//5z6xbtw5fX+3TySuuuIIpU6awcOFCZs2aRVZWFgsXLkSn04p1JiUlkZSU1OPt6Ijb6cHt8mCyaP/R2HM7LkLySeEnNDubpcx/P1ZyzMrtr2ymptlBVWMrD81M6esm9QsHjjby5f4qLEY9QRajdvM1nvy3xYjZ2AufdLY2a0OtPC6tV6a3e5J6QmUufPk05H2mffqsnO3nop77k+vjTgyl+1XbULox7T/dt9ZC8YaT4S5vpXa/f+TJAJc8C4Jiuvd87I1aMMx6GXwC4NrnYPx9Xn+yfXrvTL3Nh7Fz3mV82dsY1v9J64269m+QdtPZL2Y9Hq0n6dCXsGshHMvXwuv4e2HcvTD4tPUwfUO12/kYNEK7zf45VB2Agi+gpbp9EGyu0nq5bHVaL0zS9LaQNhNCO/7/p7qplfd2HGHxthIOH7MSZDFy58QEUgf5n/hdCjzl9yrQbGDdunUMHTPxxM+wpHYcu2qtLK+1crjWit6jEOfvy7BIX+aF+hIf6kvcKQFFr+skJCgKRI3CGDUK47zf4Crfw6F1C4ko+JTQmo+x6rQLYY9fMDpLDDr/UNxBYbgDw9AnTiEifjJ36PTc0ekP9Qbti9MGBV/gV7yB8eXZjD+6DpyfaY8ZLFqv5OB0qC2Cks3gsmu/R7GXQfItbT3L47QhpXXFUHcYXV0x0W23CXUHtCA97x9kDL2CjA5akhjuR2K4H3dNSsDtUcktbzgR4t7OOszrG4sACPAxkBYTSEbMYNJjHiRjzOMkWlrRHf765O/cwRXaQQOiUJNn4jKH4WyuxW2tQ7XVo9jq0Lc2YHA2YnDbqbfEUek3nEr/EVT5D6fKbxgOw2nFZ1QPwfYjRDYfILJ5P4NaDjCo+QBmdzM6IFg1cECNZ7d5GkSPIXLYJIZmZHKkKI/S3E04S3cR1rSftOaPMBUsAsClGKnxS6XSTztvpf8IanxTtB5KVSW3oJXFh74irnodE61fM13ZzWDFyVE1hEWeGVQSdtZX1oCbALeVQJeVoNYWAmkmSCklmBaCaMGHc1frtGKmET8aVD+O4kcDg2lUU2nAj2YseM4S+ABMOAlytxCoaOcKpIUgpYpBWAmkBQNe/J0VPeZQ9TBAQtslq7GxkZaWFpKT25fHz8zMJDc3l4iICMaMGYNe3zfd4w679gfBZDHgsVppPVRIwLwr2m2jqiqLDy5meOhwRkeM7otmik7Utji4942tON0qs4ZF8MfPDjA4yMwNY7y/2LU73Xy4q4y5wwcxKNDci63tfaqqsrWolpfXHeKrg9Wdbh8XauG2zDi+mRlH5Pk899ZmLXjs+wjyvwCXTbt/7R9h9i+1i3td362koqoqOw7XsWhrCav2HsXjcROWtYZhpmPc73yHKdY1tOr92BL3fQ7E3YHbcPafhVnnIcJgJ0RvJUSxEqg0E6C24OtpwsdjwxmRRl34eBpcRhpsThoanTRUHqXB5sTl8RATrF2gxydeTdDI67WD1pe2Dc1bp82lylmi3R825GSAS5ymXdSe+4nCniXw+a+1cDLuHpj7JPideeHWYHOeEii02/Hvy+psuDxn9oxYjGl8I/b/8ZOW5xn03rdQ936Acu1zJ+eQ1R2GwrU4C9agFK3HYK8F4Ih/Bptjf8EGn2nUlOtpOFRHg20NDVYnbo/KggnxfH92KqF+3g/VsjvdfLqnApvTrf08Q32JDrZgMrS9zwYN126nOB5Gjz/fequTQIuBIKORoGNGgqz1J4KXv4+BzYXHeGdrCatzK3F5VCYkhfLTy4dyVfrgTj/w0OsU4tqC2BSvn1U3KQqGmNEMu2M08BcAenwWl9Gi9RSPuE773uPWhpWeOq9sz1Ktoub4+0/OJzs+h++4wGjtlnB+PxW9TmFUbDCjYoP5/qxUnG4PeZVN7C1rIKesgZyyRt7afBiHy3NiH0UxAvOAecRSxVRdLlPrc5icvQJfWmloCyAN+NGo+tNAJA2qH3aMDGkuI926jRE1q04c75Anir1qEsfUQEYoJaTpighUtL99raqR/Woc6z0TOWIehiVhPMnpmUweEsVo//YFf0ZmjGdkxngArA4X24qOsS93Nw2HthHSsI/0hiIymlYyWvkAAIeq56AaR44niTG6Bqbr9mDERaNPBEXR36R16PUED5vGghA/jPre+7trAjr5iyQuEiM636Rf67ehrTd6xC5FJ0Kb2YD9wEGtCEl6+/lsO6t2kl+Xz1OTn5Ihd/2QzeHmgbe2UVHfwrqMVURUbeI3Md/jsaW7iQjwYUpK58NZa1scPPjf7Ww/XEeQxchT14/s2SGWbqc2XMs/AuImgbF3QqHHo/L5/kpeXneIXSX1hPmZeHTeUO6YGI9OUbTw0MFtY0ENf1udx/Nf5DN3+CAWTIxnxpCIc37KX9VkZ29ZAyXlVcTWrCe56nPiazdh8LTSYgqnKPI6DkXMw1+xMbHwRfzf/zaeDX9Hd/lTkDq3Z4f/daLe6uD9nWW8s7WEgqpm/H0MXDsqCtfRA3zDvZTJdR/jRs+7ppt4xX09ZYU+OPJLungWAxAEBKEooKoOYLNXewaaDcSHHe9lySQychpBCU8S4ygkui6LsMpN+O56G2Xbq1oPYFAshCR2cEvS5p2teFyrdBg9Dtdtiyj3HUlJuZWS2pJ2oayk1kqDzdmuLSG+RuJDfcmICeLajKgTQSgu1Bd/HwPbimvZWFDDhoIaJlc/wQP6FTxy4D1ceZkcCp1OVMMuIpzaMMNjaggbPelscKez0ZNOvTOMQLuRIIuTIAtE+PuQGqH1UtVZnby+sYjF20p5cEYy356WhJ/P2f8LbrA5eXvLYd7YWERNc/ueAJ0CUUGWE22PDDJT3dR6MozW23B3EEbPJdjXyH1TErl9Qjypg3qgrP9AodOfDMajb+/r1mDU60iLDiItOojbLtPuc7o95Fc2s7esgSN1p1fzTAWmkA/kA2aTvt0ohEiLkaEWI8EWE/5mw8m/ic1VWjGc8mxSKrJJKc/WenMHp0PUHSeGHvsMGsEYvZExXXweviYDM4dFMnPYFcApHySrKtQVQXk2popsMsqzyajYiV01YBz7IIy8kcDYywjsww/HhOgr/Ta0XSwCAwPx8/OjsLCwXW/bjh07mDlzJmlpaezevRu3290nvW0Omzbx3WQxYN+jFVAxp7UfrrP4wGICTAFck3zNBW+fODe3R+XHi3ext7SGdSmLidy/AiwhPG1/nBT/BXzvvwrvfm8qwwcHnvUYRTUt3P/GVsob7Dx53Ug+2VPBT9/dzad7KnjmGxmd9jw53R4+3VPBaxsKKauznTFMKtrQzF2lTxLftBMAl86HyuAxlIVMpCx0IrUBw1AVPS6PSuNpYerU700G3cmhU6fdAi1GlmWX8e/1hRRWtxAXauHpG9L4ZmZcu56AkNN7MKoOwKpf8IPmfKwjRrDdmcD7xeE8vi8On+DB3JoZx62XxaJXFHLKGsgtPUZd8W70lbtJaM0nQ1fEVOUwPoqLSjWYt90zWeGeyHb7MDyNOjgE4IeOX3OdbhOPViwlfuHN5Boz+DLme6ixE/DzOfvvvaIoBJgNBJqNZwzt9DPpzxmqj/c2vrO1hBV7j+JweRgTF8zz1ydxdUQV5sMf4t73EnrVCZn3YpjxMxYERrGgbd+OephOHhvsLjcN1jNfpwabkya7C8spF3/Bpw1H1ekUyupsZwSoA0eb+GJfFQ6355SzpQFpGLmfsUo+Uw0HGNpQSXxTFdGHcwjx1J3RvhZ9EItCfsL/6qZT9lIVbk/liceMeoW4EF9iQ30ZHRfULpTFhfoSaD73MNYr0gZzRdpgACoabGwsGMdzuVdxddEfSalew25DBp8F38ixyClYokYQH+bHt0N9eSrEl0CL4Zyv2Y/mpPK31Qd57vM8/ru5mB/OTmXBxPh2a5EdbbDznw2FLMoqocXhZsbQCL47I5mkCD9KjlnP+Jl+eaCKmuZWwvxMxIX6MiYumOtHR7cbhhjia6TJ7qLB5qTeeuaHGikRflyZ1nmvmuifjHodI6MDGRl99v8Husx/EAyZp92OO585b95SFAhN1m7pN50475Z165g1a1bvnluIfk5CWw94/PHHefjhh1m6dCkWi4UvvviCDRs28O9//xuLxUJmZiZPPvkkTz/9NIqiUFxcTG5uLtdee22vt63VdnJ4pD03F31EOMbIk+v8VFur+eLwF9wx4g4sBkuvt0d4T1VVnlqey/p9JayJeY2oIxvh8qcg81sonzzC/XvfJkOXzWP/+Qmv/GA+0cFnvn7bimv5zn+3o1MUltw9lDH2LO65YjCLSmJ5Zk05855bx2+uS+PmcWf2ulkdLt7dVsprXxdRVm9jyCB/rsmIorHt4q/B5sSvZg/fsv2RYLWRx10PUq0GMU23l6k1e5lQ+084BPWqH5s8aaz1jOYTdToWi++J0BfsayIhzI8gixG7001JrZUth47x4a6ydjUytJ4dGBkVyD8XjOWa9MEYzjUcprXp5Fwnkx8kz8K3MpcZx1YxA8AMtc5wdqxLYMnaBMJoIF1XxENKKT6K1jPj8PHDHp4O8VfROvxaAmMncJui47bTTtVkd1JaZ6WkdizLqu8mpvBd5lS9xcPF32f1ofFke1JPzGcIOu2rr2LHqppPDFcqPmXYUjN+WA0BNOFPI35tN+3fTfjh9CjoHE1c5lPCv+KPMclSSlBdLqw+WTWxZtB0Im/7B4S1n/+oKApG/bkvvkwGHYFmI3Hn3OrsAqOMjIg68yLS3UF4P3lLo9HmZN0p97VaG/GzlRFsLyPCVYFJdbFKfyVBxkjGRvpyw2lBP9KbeVJeigqycMv4WBgfi6regMejMk2vY1o3jzckMoB/353JzpI6/rLyAE99vI/XNhTxyLyhpMcE8er6Qj7KLsPtUZk/KpqHZiaTFn1ywemoIAsTk88cAupye879+4DWs3FeQ4OF6KuRODICSAhAQluXHTx4kNjY2BPfP//88/zoRz+irq6OjIwM9Ho9gwcPZtmyZVgs2kX0a6+9xqOPPkpqaioWi4Xw8HD++te/XpD2Oo6HNrOepty9WE7rZVuStwS36ub2YX0/7EO099K6Qyzbso/Pw/9F7LEcuO4fWpEFgJtfg5TZjPv0Mf7n+CnPvXKYx370Y4IsJ3sRlmWX8fjS3cwPKODpuO34LfkM3A70wN3A7aGJbGuNZ90HMfw1awz33nwjkZGDqW1x8NamYv67uZg6q5PMhBB+e30ac4YPaldim11vwye/hKBI1Ns+4KmwdNynJK2m5kr0h7/Gt3g9VxWv5Zqmrfw56DOU2b+EUbeds1hEq8vdrqemvMHO5OQwpg8JP/eQTlWFnPdg9a+g+SiMvVsLuser49kbtUV1y7MJrchmVulOLq/fiVPvhz0iHSX+aogbB1FjMIUmY/JiCI7FpGdQoJnxCceLS/wWWh+HrJeYt/EfXNG6A1Xvg2oOQjUHt92ita9Gf8y2RoKsWjVBnf0YekchRmcjBs+5J8e3Gnzx0bUNhSoHguK06oNjFkDUWIgew/5te4kM618Fa/Q6hRA/05m9ol5we1Scbg//1wc9QoqioO8k6HprXHwI73xnEl/n1/DnlQd4ZMluAHwMOhZMiOc705OJC/Xt5CgndRbYhBBCXPwktJ2moqLirI8lJibidDo7fOzJJ5/kySef7PCxwMBAXn311R5pX1cdn9NmxImjsIjAK686+ZjbwZKDS5gZO5O4wO5+ni56wwc7j/D6yq2sCPobMdYS+OYb7deNUhQYexe62AmYFt3D03VPs+JfOcx9+CVMPhbeWL2No+tfZ615PdH2MigNgsxvw6hbtWpyFdkYy7OZXJHNFPt6qHoHXvo/Kk3xLLOP5QvnBMYPm8h3Z6WQmXhapTuXA1b9HLa9plWiu+UNFL+wMwsCmGMhfAGMX6CFqUNrUL78HXz0Pdj4D5j7Gxh2TYefovoY9CRH+JMc0YW5NZX7tLlOhzdA9Fi4fRHEjj+tTW1lsBO1vhIjgNOGSe/jVUDzmo8/zHgcZcrDoHpQjJaz1hfzOcv9OG2nrSfW/quPvUELo8fL2vfkgtD9lF6noB8gax4pisKMoRFMSw1nZe5RjtRZuXlcLGH+Z31HCCGEuIRJaBvgjs9pU0sLzyhCsrJ4JbX2Wu4Y0XkxZHHhbMiv4R/vfcnHfn9isKce5Y7FkHp5xxtHDMXv+2s59M6jXFP4NqXP51LrE8tdDesxGd14oidD5pMw8nqtKtpxqXMBtCBhraXy4Ba+XLOa+IbtfFv3CQ/6LIOGRMi/EUxti/wqCjRVwpJ7tHWBpjysVezzZgFXRdHOmTJHW79qzdOw+A6tTPblT50IUd3SXAUbnoesf2uhbP7ftWqC3l7cG3txWLDhPC7AjRbtFhjVc+0R/Y5Op3BNhrzGQgghzk1CWwdycnK4++67293n4+NDVlZWH7Wo+44Pj3QX7AfAnKaFNlVVWbh/IclByUyKmtRn7RMaj0dlV2k9K3Iq2LZ1E+/7PEOYwY1y1zKIm3DunY1mUu55kc/eH8/EPb/G315BTsxtjLvxx+hOKwXeId9QIsdewx1jr8Hp9qC318GBT7Sy9pv+BRv/ri3GO/xabcFhWz3c/B/IuKXrT1RRtEV9h8+H7IXavLM3r4WUuTDtJ1rlyY4WFj6dx6OVjN/xprYOkcetDR2d+5vzXwNLCCGEEKKfkdDWgYyMDLKzs/u6GT3CYXOhN+hw7s/FEBGBcZBWhGR39W72HdvHryf9Wsr89wBVVTlU3cLGghpMO14hqTmbr1MeIyFpCOkxQQyJ9D9jHRmPR2VnSR2f5lSwcu9RKhrszDbksND0Ir5mM8q9n0Bk2lnOeKarbrqftwZNZ1CAiWvGJHTreRj1Om2tq/H3ajdrrRbgcj+Cra9AYAw88Lm2wOz50Bu044+6Fba+Cl8/C29dB0ZfSJh6cs2uQSPbr3vWdFSbS7fzv1B/GCyhMPG7WmALH3J+bRJCCCGE6Kf6XWhTVVVCxClUtWtr7ZzOYXdhsuix7c3FnH6yCMnC/QsJMAUwP3n++TZxQHC4PO1L0dudmPS6dmXY/U2GdoU4qhrtbDxUw4b8Y2wsqOFoo52bdet51vQyAMNzd/NY9kP8zDMek0HHiMEBpMcEMTI6kLX7W3li0xqONtox6XXMHhLMW3GfMLTgdQgbDgve0Uoed4GiKNw3vYeDi2+oNtRw3D1aRUaDxbvhkN4yWmDqw5B5PxSu0xZeLlwLq3+pPe4Xoc2bi5uoLch88DNQ3ZA0Q+tVG3Hd+Q1BFEIIIYS4CPSr0GY2mzl27BhhYWES3NAC27FjxzCbu1+m2WFzYfLR4SgsJPDqqwE42nKUzw9/zt0j78bX6H2FsgutqKaFg0cb29aJcp1RHrzJrq3tder6UKeuIeZrMtDc6mxba6r9/qeXHLc53Z22R6dwYm0yBSg+plXuC/E1MiU1nFuC8pi14zVInAVX/pGgDx/itaPPUph8J0tCvkN2hZ3l2eUszCrBoIPZw8N4ImM4l0dZ8f/4ISjeAePvhyv/AKZ++Lr4BPTusUfM124ADUe0EFfUFuT2vge+4TDlhzDu3jNK2AshhBBCDGT9KrTFxsZy5MgRqqur+6wNdrv9vEJSTzObze2WGOgqh92NASeo6okiJEsOLkFF5fbh/a/M/6HqZlbsqeDTnAoOHG1q99ipoSnIYiTQbKTV5aaopuVE+LI7PWc5Mvi1LQZ8/BgJYb7tw57vycAXaDbidHvOGvJanR4WTIhnamo4I6MC0R3dDW8+ChEj4Nb/gTkQ5YEv4PMnSc56iSci98Atr+MJm8SROhu5u7K4+vJMrTz96z/RhgDe+l8YeUNv/4gvDkGxMPZO7aaqUFesDc30Zr6bEEIIIcQA069Cm9FoJCkpqU/bsHbtWsaOHdunbehJDpsLvUPrEbKkpWF32Vmat5RZsbOI8Y/p49ZpCqqaWZFTwYpTgtr4hBB+PX8kE5NCzzo8sSOtLveJnjmrw0WAWQtiAWbDGXPKekzdYVh0K1hC4M6lWgVD0IbtXf0nbW7Wsu/DKzPRXf0X4sfexWHs8NEPIPttbejfza9BcHzvtO9ipygQ2rd/F4QQQggh+lK/Cm2i57XaXPhYIwQe7AAAIABJREFUGzBERmKIiODj/A+pb63nzhF39kl7nG4P+ZXN7C1vYG9ZA1mFtRys1IJaZkIIv5k/kqszBhMV1L0y7D4GPYMC9AzqxZF87Vhr4e2bwWWHe5Z3XJ592FXw3Y3wwXdg+Q8hfzXji3eArRxmPA4zn+jZeWJCCCGEEGJAkSvFAc5hd2FprMU8fDiqqrLowCKGhAzhssGXXZDzF1Y3s624lpyyBnLKGjlQ0UirSxvC6O9jICMmiCevG8nV6VEMDuo/w1K94rTBO7dDfQnc8xGcq7x+YBTcs0xbT+yrP6A3BsG9y7WCGkIIIYQQQpyDV6FNUZSrgH8AeuA1VVX/dNrj8cBbQHDbNk+oqrqih9squsFhc6N3NKOPCmJH5Q4O1B7gqclPXZBCL+/vOMJj7+1GVSHAx0BaTCD3TE4gPSaI9JggksL8Oh3u2G953PD+A1C6Fb75JiRM6XwfnR5mPAZp32Dbzv1Mk8AmhBBCCCG80GloUxRFD7wIzAOOANsURVmuquq+Uzb7FbBEVdWXFEUZCawAEnuhvaILVI+Kw+5C72hBsVhYdGARQT5BXJN8Ta+f+70dR3j8vd1MSQnj6RvSSbzYAlpTJdjqzv741le09cuu+pO2WHRXhKXgMpaeX/uEEEIIIcQlw5uetglAgaqqhQCKoiwGbgBODW0q0FZ9gSCgvCcbKbrH6XCDCnp7Mza9P1+WfMl9afdhMXRvvpi3lm4v5Wfv72FqSjiv3pOJxaTv1fP1mPoS2LdMW0i6bHvn20/+IUz6Xu+3SwghhBBCXNKUzhZvVhTlFuAqVVUfaPv+bmCiqqo/PGWbKGA1EAL4AZerqrqjg2M9CDwIEBkZOX7x4sU99Tx6THNzM/7+/n3djB7htKrkLVcZlvcOlcMq+MukUp6MeZJQQ2ivnfPrI05e3+tgZJiOH48zY9L37941s62SiOpNRFRvJLApH4Am/xSqI6Zgsww+634ugx91IaNB6V5FyoH0PhP9l7zPRG+T95i4EOR9Ji6EvnqfzZ49e4eqqpmdbddThUgWAG+qqvqsoiiTgf8pipKuqmq7RbNUVX0FeAUgMzNTnTVrVg+dvuesXbuW/tiu7qgtbyFveRYGp5ViXQVzE+Zy06ybeu18S7aV8nruHqYN0XrYzMZ+3MO2/xP4+lko36l9HzUGJj4FI28gIDSZ3i4+OZDeZ6L/kveZ6G3yHhMXgrzPxIXQ399n3oS2MiDulO9j2+471beBqwBUVd2sKIoZCAeqeqKRonscdhcABredBsXeq2X+391Wwv+9n8OMoRG8cvf4/h3Ytr0Gnz4GEcPg8t9qC1rLOmBCCCGEEKKf8ia0bQOGKIqShBbWbgfuOG2bEmAu8KaiKCMAM1Ddkw0VXddqawttLhshQYMZN2hcr5znna0l/PyDHGYOjeDf/TmwqarWu7bmaRh6lVb10di78/uEEEIIIYQ4X52GNlVVXYqi/BBYhVbO/3VVVXMVRfkdsF1V1eXAo8CriqL8FK0oyX1qZ5PlRK9ztIU2vdtORlxmr5T5X7q9lJ9/kMOsYRG8fFc/D2yrfwWbX4BRt8ENL4Le2NetEkIIIYQQolNezWlrW3NtxWn3/eaUf+8DpvZs08T5cpzS05YW0/O9bHanm2dW7GdCUmjPBjZ7IxzdAxW7weOCqNHazRLSveO5XfDJj2HX2zDhIa1Mv657BUSEEEIIIYS40HqqEInohxx2NwAGl52I0LhOtu66FTkV1Fud/GTukO4HNnujFs4qsqE8W/t6rKDjbUMStYIh0WNOfu0syDnt8P63tTXVZj4Bs56AC7CwuBBCCCGEED1FQtsA1tDUhIoHvbsVnaXn524tzCohOdyPySlh3TtAznvwwYOgauGSwBgtjI26DaLHar1rOgOU7zoZ6sp3wr6PTh4jOKF9iIsaA75tSxq0NsHiO6Fonda7JmuqCSGEEEKIi5CEtgGsuLoEt+JBQUVnNvfosfdXNLLjcB2/unZE9+bKNVfDp49qwWz2L7Sw5R/R8bapc7Xbcdba9j1z5dnaotjHBcdrx6srhspcuPFlGLOg620UQgghhBCiH5DQNoCV1R3FXwkGQDH3bE/bwqzDmAw6bhkf270DrPoFOFrgxpdg0PCu7esbCilztNtx1tqTwywrdmtBzlYHt/0Phl/bvTYKIYQQQgjRD0hoG6CsTiv1jY2E6YMA0Fl6rqetudXFhzvLmD8qimBfU9cPcGgN5CyBGY93PbCdjW8opMzWbsepqsxfE0IIIYQQFz0poTdAbSzfiMFtwteovcQ9OadteXY5LQ43d05M6PrOTht88giEpsD0x3qsTR2SwCaEEEIIIQYACW0D1JclX2Lx+OOn16o6Kj0U2lRV5e0thxkRFci4+OCuH2DdX6CuCOY/D8aenWcnhBBCCCHEQCShbQByepysL11PECEYFScoCoqpG8MYO5BdWs++ikbunBjf9QIklftg0z9h9B2QPLNH2iOEEEIIIcRAJ6FtANp2dBtNziZ8PL4YPA4Ui6V7FR47sDCrBD+TnhvHxnRtR48HPvkJ+ATCFb/vkbYIIYQQQghxKZDQNgCtKVmDxWABhw6Dp+fWaKu3Ovh4dzk3jI3B36eLNWx2vAGlWXDlM+DXzXXdhBBCCCGEuARJ9cgBxqN6+Kr0K6ZETsXt9GDQ2Xtsjbb3d5bR6vJwV1cLkDQdhS9+C0kzYLSslyaEEEIIIURXSE/bAJNbk0uVtYpZkdoaZnqXHaUHyv2rqsrCrMOMjQ9mZHRg13Ze+QS47DD/71LRUQghhBBCiC6S0DbArCldg17Rkxk6EQCDswVdDyysvaWwlsLqlq6X+c9bDbkfamuyhaWcdzuEEEIIIYS41EhoG2DWlKwhMzITH7cW1AwOa4/MaXs76zBBFiPzR0V5v5PTBp8+CuHDYOqPz7sNQgghhBBCXIoktA0gRQ1FFDYUMid+Dg6bCwBda/N5D4+sbmpl1d6j3DI+FrNR7/2OO96EhhK45q9g6JklB4QQQgghhLjUSGgbQNaUrAFgTvwcWttCm97eeN7DI5dsL8XlUbljYrz3OzltsOF5SJwua7IJIYQQQghxHiS0DSBrStaQFpbGYL/BOO1toc3WhO48etrcHpVFWSVMTg4jJcLf+x23vwHNlTDr590+txBCCCGEEEJC24BRZa1iT80e5sRrVSNbbW4A9NZGlPPoaVufV01ZvY27JnWhAInDqvWyJc2AxKndPrcQQgghhBBC1mkbMNaWrgVgTpwW2hzHe9pa6s6rEMnCrMOE+/swb2Sk9zvteANaqmDWW90+rxBCCCGEEEIjPW0DxJclX5IQmEBKsFZW32FzoTMoYGvpdiGSsnobaw5UcdtlsZgMXr5VHFbY8HdImgkJU7p1XiGEEEIIIcRJEtoGgEZHI1srtjInbg5K2+LVDpsLk9kAHk+3C5Es3lqCCtx+WRcKkGz/T1svm8xlE0IIIYQQoidIaBsA1pWuw6W6TsxnA3DY3Zh8tJe3O4VInG4Pi7eVMnvYIOJCfb3bydECG/8BybMgYXKXzymEEEIIIYQ4k4S2i1yVtYrndjxHYmAioyJGnbjfYXNhMmm9bko35rR9sa+S6qZW7uxKmf9t/4GWaullE0IIIYQQogdJIZKLmMPt4JG1j9DibOHVea+iU05mcIfdhdGo/bs7wyMXZpUQE2xh1rBBXjbmeC/bbIif1OXzCSGEEEIIITomPW0XsT9t/RO7q3fz9NSnSQ1JbfeYw+bCaFCBrg+PLKppYUNBDQsmxKHXKd7ttO01sNZIL5sQQgghhBA9TELbReqD/A9YmreU+9Pv58rEK894vNXmwqj3AHR5nbZFWYcx6BRuzYzzbofWZq2XLWUOxE/s0rmEEEIIIYQQ5ybDIy9COdU5/H7L75kUNYmHxz7c4TZOuxujrq2nzdf70GZ3ulm64whXpEUyKNDLHrptr4H1mPSyCSGEEEII0QsktF1kjtmO8dO1PyXCEsFfZ/wVg+7Ml1BVVW145PHQZvZ+eORneyuotzq5c2KCdzu0NsOmf0LKXIib4PV5hBBCCCGEEN6R0HYRcXqcPLbuMepb6/nf1f8j2Bzc8XatblQVDKoT6NrwyLe3lJAc7seUlDDvdtj2qtbLNvsXXp9DCCGEEEII4T2Z03YReW77c2yv3M6Tk59kRNiIs27nsLkBMKgOwPtCJPsrGtlxuI47JsafWKT7nBwtsOlfkHo5xGZ6dQ4hhBBCCCFE10hou0h8Wvgpb+9/mztH3Ml1Kdedc1uHzQWA8URo866nbVFWCSaDjpvHxXrXqO1vaL1sM//Pu+2FEEIIIYQQXSah7SJQ2VLJU5ueYnzkeB7NfLTT7R12LbQZ3K2Ad4trt7S6+HBXGfNHRRHiZ+q8UU6bNpctaYbMZRNCCCGEEKIXSWi7CGRXZ2N323k883GMOmOn2x/vaTO4baAoKKbOQ9iy7HKaW13eFyDZ9TY0V8KMn3m3vRBCCCGEEKJbJLRdBArqC9ApOlKCU7zavrUttOkdVhSLpdP5aaqqsjDrMMMHBzAuvuPiJu24HLDh7xA3CRKnedUmIYQQQgghRPdIaLsI5NflEx8Qj9ngXUERp10rRKJ3tnhV7n/3kQZyyxu5c1KCdwVIdr8DjUdg5uPgzfZCCCGEEEKIbpPQdhHIr8tnSMgQr7c/3tNmaG3xqgjJwi2H8TPp+cbYmM4P7nbBhucgeqy2NpsQQgghhBCiV0lo6+esTiulTaUMCfY+tB0vRKKzN6N0Uu6/webk4z3l3DA2Bn8fL5bt2/se1BVrc9mkl00IIYQQQoheJ6GtnytsKERF7VJPm8PmwmjWo9qt6DpZWHvl3grsTg+3ZcZ1fmCPG75+FiLTYehVXrdHCCGEEEII0X0S2vq5/Lp8gC6HNh+LAdVm73RO27LscpLC/RgVG9T5gfctg5o8mP4o6OStI4QQQgghxIUgV979XF5dHma9mVh/Lxe8Bhx2NyaLAY/djuJ79p62qkY7mwuPcd3o6M4LkHg8sP5vED4URt7gdVuEEEIIIYQQ50dCWz9XUF9ASnAKep3e630cNhcmsx7Vbjvn8MiP91SgqnD96OjOD5q3Eqpy23rZvG+LEEIIIYQQ4vxIaOvnulo5EtpCm8WAx2pDd45CJMuzy0iPCSR1kP+5D6iqsP4vEJII6bd0qS1CCCGEEEKI8+NVaFMU5SpFUQ4qilKgKMoTHTz+vKIo2W23PEVR6nu+qZeeWnstx+zHSA1O7dJ+Drsbk7lteORZetqKalrYfaSBG0Z7Ueb/0JdQvgumPQJ6LypMCiGEEEIIIXpMp1fgiqLogReBecARYJuiKMtVVd13fBtVVX96yvY/Asb2QlsvOd0pQgLaOm0miwHVZjvrOm0f7y5HUWD+6KhzH0xVYd1fITAWRi/oUjuEEEIIIYQQ58+bnrYJQIGqqoWqqjqAxcC5KlEsAN7picZd6o6HtqEhQ7u0n7NtTpvHbu9wnTZVVfkou4wJiaFEBXWy+HbxBijdAtN+AgZTl9ohhBBCCCGEOH/ejHWLAUpP+f4IMLGjDRVFSQCSgDVnefxB4EGAyMhI1q5d25W2XhDNzc39pl3rj63HX+dPzpaczqs7tlE9Ki6nSln5YUI9HoorKth32vM53OimsNrOjEHOTp/rsAP/IkLvy6amBDz95OcyEPSn95kYuOR9JnqbvMfEhSDvM3Eh9Pf3WU9PULodeE9VVXdHD6qq+grwCkBmZqY6a9asHj79+Vu7di39pV2vfPoKI/1HMnv2bK/3sTU72LdkA0NT4gFIHTmS0NOezx9X7MegK+LHN80kxO8cvWceD2x/EIZfyYy5V3TnKVySyvMOUH24iNHzrj7rNv3pfSYGLnmfid4m7zFxIcj7TFwI/f195s3wyDIg7pTvY9vu68jtyNDIHuFRPRTUF3SjcqSWl416DwDKaXPaPB6V5bvLmTk04tyBDaAiG5orYehVXWrDpe7rd97ki9depLKwoK+bIoQQQgghBgBvQts2YIiiKEmKopjQgtny0zdSFGU4EAJs7tkmXprKmsuwuWwMCe56uX8Ag6J9PX2dtm3FtVQ02Ll+jBdrs+WvBhRIvbxLbbiUWRsbKNuv1ejZ+O7/+rg1QgghhBBiIOg0tKmq6gJ+CKwC9gNLVFXNVRTld4qiXH/KprcDi1VVVXunqZeW7laOdNi1sGY8HtpOK0SybHc5FqOeeSMjOz9Y3kqIvQz8wrvUhktZ4Y6tqKqHoZOnU5S9g7ID+zrfSQghhBBCiHPwap02VVVXqKo6VFXVFFVVn2m77zeqqi4/ZZunVFU9Yw030T3HQ1tKcEqX9jvR06Y6ANqt0+ZweViRU8G8kZH4mjqZzthUqa3NNvTKLp3/UlewfQsB4RFc+d2H8Q0KZsO7/0U+xxBCCCGEEOfDq9AmLrz8+nxi/GPwM/p1ab8Toc3TCrTvadtQUE291ckNXg+NREJbFzjtdg7v3kXqZZMwmS1M/MatHNm3l8M52X3dNCGEEEIIcRGT0NZP5dfld3loJIDDrhUiMXi0nrZTF9dell1OsK+R6UMiOj9Q3koIjIHI9C634VJVvHsnLqeD1MzJAIy6/GoCwiLYuFh624QQQgghRPdJaOuHHG4HhxsPd7kICUDr8Z42lx04OTzS6nCxOreSq9OjMBk6edldrVC4FoZcAV6uDyegYNtmzP4BxI5IA8BgNDLp5ts5eiifQ9uz+rh1QgghhBDiYiWhrR8qbCjErboZGjK0y/s67S50BgXFYQVODo/8Yn8VNqfbu6GRhzeCo1lK/XeB2+Xi0M6tpIyfgE6vP3F/2sy5hERFs3HJ26geTx+2UAghhBBCXKwktPVD3a0cCdBqc2MyG1Btx3vatNC2PLuMwYFmJiSGdn6QvFVgMEPSjC6f/1J1ZP9eWltaSLlsUrv79QYDk795JzUlxRzY/HUftU4IIYQQQlzMJLT1Q/n1+Rh1RuID47u8r8PmwmQx4LFroU3n60u91cG6vGquHxONTtfJcEdV1eazJc0Ak293mn9JKti2BYPJh8RRY894bPjk6YTHJ7J56UI8bvdZj6GqKnu+XMmbj35flgoQQgghhBAnSGjrh/Lr8kkOSsaoM3Z5X4fdhY/FgGq3gaKgmEx8tvcoTrfK9aO9GBpZkw91xVI1sgtUVaVg+xYSR4/F6GM+43FFp2PqrXdRV1FO7rovOzxGY3UV7z3zaz5/5QXqj5bzyd//hLWxobebLoQQQgghLgIS2vqh7laOhLaeNrMej9WGYrGgKAqf7CknOcKPtOhAL06+Svs6REKbtyoLC2g+VkPqZZPPuk1K5kQGpw5l8/vv4HI6T9yvejzs/nwFbz72AyryD3L5A99nwe+fxdbcxGcvPCvz4IQQQgghhIS2/qahtYFKayWpwand2v/k8Egburb5bPsrmpiYFIbiTSXIvFUwKA2C47p1/ktRwbbNKDodyeMuO+s2iqIw9ba7aaqpZs8XKwFoqDrKe8/8ii9e+39EDRnGfX97kdHzriEyKYXZ9z5I8e6dZH209EI9DSGEEEII0U8Z+roBor2C+gKge0VIABw2NyaLVohEZ7HQ3OqitsVBfKgX89Ns9XB4E0z9cbfOfakq2LaF2BHpWALO3ZOZkDGG2JHpZH34LqHpY3nr9RdQdArzHvwhGXOubBeqR11+FUf272XTkoXEDBtBXNqo3n4aQgghhBCin5Ketn7meOXI7pT7B21Om8msFSJRLGZKa7XS/3Ghlk72BA6tAdUtpf67oLa8jGNHSkg9rWpkRxRFYdpt92BtqOfIxq+IHjaCe//2IqPmXnVGL6iiKMz7zg8IHhzFp//8Ky31db31FIQQQgghRD8noa2fya/LJ8AYQKRvZJf3VVW1bXikHo/Nis5sORnaQrzoactbBZZQiM3s8rkvVQXbNgN4FdoAYoaPZMad95M4+ypu/sXvCAwfdNZtTRZfrvvpE7S2tLDiX3/D4zl75UkhhBBCCDFwSWjrZ/LrtSIkXs0/O42z1Y2qcmKdNp3ZTGmdDaDz4ZEeN+SvhiHzQKc/97bihILtWxiUlHLO8HW6y66/mbDh6V69xhEJScz51ncp2bubrA+WnE9ThRBCCCHERUpCWz+iqioFdQXnNZ8NOLFOm2LRetr8fQwE+3ayfEDZDrDVSqn/Lmiuq6Ui74DXvWzdlT57HiOnz2bTe4so2bu7V88lhBBCCCH6Hwlt/UiltZImZxNDgrsZ2uwugBPrtOnaQltsiKXzXp28laDoIWVut859KTq0PQuAIeco9d8TFEXh8gd+QGh0rMxvE0IIIYS4BElo60fy6vKA86kcqYU2o1mPx2ZHZzFTWmclzpvKkXmrIX4yWIK7de5LUcH2LQRHRhEWl9Dr5zKazVz30ydw2G0sf/YPEtwuAYU7t7Hx3f+hqmpfN0UIIYQQfUxCWz9yvHJkSnBKt/Y/tafNY7OhmM2U1to6n8/WcAQqc2DoFd0676Wo1dpCSc5uUidM7tb8w+4Ij0vgqu/9hMqiAt585Hvs//oruaAfoMrz9rP8uT+w5YN3Kdq1va+bI4QQQog+JqGtH8mvzyfSN5Ign6Bu7X/qnDbVZsOuN2FzuokL6aTcf94q7auU+vda0a7teNwuUjN7dz7b6YZNns7df/4nITGxrHjhWT7669M01x67oG0Qvauxuoplf3uGgNBwgiOj+Pqdt6RyqBBCCHGJk9DWj+TX5Xd7aCScNjzSbqepbe30TodH5q+GkEQI797acJei/G1b8A0KJmrosAt+7rCYOG7/7Z+Zefe3KdmTzZuPfZ/cdV9Kr9sA4LBZ+fAvv8PtdHLj//2GaQvuoaakmP1fr+3rpgkhhBCiDxn6ugFC4/Q4KWwoZGrM1G4f4/jwSKNBBY+Heo+Wyc8Z2pw2KFwH4+6BCzTM72Lncjop2rWd4VNnoOuj5RF0Oj2Z879B8rgJrHr5H6z8f8+Tt2UDl3/nBwSEhvdJm8T58XjcrHjhWY4dKeGmn/+WsJg4QqNiiEwewsYlbzNs8nQMJtMFaYvL6USv16Po5HM90XeaamtwO11nfdwvJASjyecCtuj8uV0umo7V9Nn5A8LC0Ru6fumnejw01lT32YeDrY311Fce7ZNzi4FDbzRc1NdIEtr6iZLGElweV7crRwK0tvW0GdytANS5tUBxzoW1q/aDywZJ07t93kvNwU3rcdptvV7q3xuh0THc9tQfyV75CV+/81/eevQHpIyf0O2L7cCISCbfsuCCzdMTJ3296C0Obc9izre+S+KosQAoOh0z7ryPpU//kuzVn5I5/xu93g6P282iXz6C0Wzhm7/6/QULikKcaueKZXz11qvn3MY3KJjpd9xH2ow5F8UHDEXZO/jqzX9TV1HeZ20Ijoxi9n0PkjzuMq/3Kc/bz5evv0xV0aFebFnn9i58rU/PLy5+UanDuOOZZ/u6Gd0moa2fOF6EZGhI94coOm1ujGY9OLTQVu1QCA/wwWI6R29QbaH2NbR7xU8uNS31daz972tEDR1O4uhxfd0cQOt1G3fNDSSNu4yv3vg3Rw7kdus4LocDa0M9KZkTiUyS98OFtPerz9n+8QeMufJaxl45v91j8emjSRg1lqwPl5Ax5wp8fP16vS3Vh4sAWPXyP7jmR49JiBcXVHNdLRvefZv49FGMnNHxMjSqx8OeNatY9dLf2fPlSube/10ik1MvcEu901BVyVdvvcqh7VsIiYrh8gd+0CcfhridTnZ8+hEf/vm3JI+fwOx7HyQ4cvBZt7c21LN+0Zvkrv0C/5BQZt/7HXz8/C9gi086cOAAw4cP75Nzi4HDEhjY1004LxLa+om8ujz0ip6koKRuH6PV7jpRORKgyglxoZ0UIanVLs4ISez2eS8la15/GafdxpUP/bjPhkaeTcjgaG76+W+7vb+1sYF/f/ceDm7+WkLbBXRk314+f/VF4jPGMPveBzvcZvod9/H2Ez9m2/L3mXb7Pb3WFqfdzqb3FhE9bCTJYzPZsPi/hMXGM+mm23rtnEKcbsM7b+FxOZn3nR8RPDjqrNulzZxL7vo1rF/4Bgt/8Qij5l3N1NvuwuIfcAFbe3Yuh4Nty99n60dLQacwbcG9jL/2RgxGY5+1KW3WXHauWM7m997hzUe/x4QbbuGyG25pN8zU43aTvXoFm5a8jbPVTuZ1NzH55tsxWbxYPqiXVKt60mbO6rPzC9EfSGjrJ/Lr80kMTMSk7/6nbw6bC6PZgGq3A1DR2snQSNB62gKiwdR3f4wvFnlZG8nL2si02+8hLDaur5vT43wDg0jIGMPBTeuZvuBe6V25AOorj7LsuT8QFDmY6376BDp9xx8ERCalMHzqTHZ8uowxV1yLf2hYr7Rn52fLaamr5bqfPEH0sBHUlpWy8d3/ERodw9BJ03rlnEKcqqLgILnrvmTCDbecM7CBNnw4fdblpF42iU1LF5K98lPyNn/NtAX3kjF7Xp8OmTy0I4uv3nqVhsqjDJ08nZl3fYvA8Ig+a89xeoORy66/meHTZrLuf6+z+b132Ld+DbPufZCU8RMoO7iPNa+/TPXhIuIzxjDn/ocIixl4/98JcTGS0NZPFNYXMjz0/Lr+HTYXPhb9iZ62CjtM6qxyZF0RhCaf13kvBbamRr78z0sMSkoh87qb+ro5vWbYlBms/H/PU5F/kOihMhSlN7VarXz0l9+Bx8M3/u83mDsZdjT1trvJ27KRze+9w7wHf9jj7bE1NbJ12XukZE4kZvhIAOY99DD1lUf57MXnCRo0uN8OPxvI3C4n+zesIyAsnISMMX3dnF6lejx89cYr+AWHMPEbt3q9n9nPnzn3PUT6rHmseeNlPn/lX+SsWcXcb32PwSldmyeuqiqHtmdRdnBfV5t/Qk1JMcW7dxIaE8ctv/p9v3zdAkLDmf/jnzFq7lWseeNllv31acLjE6kpKSaRHt2HAAAgAElEQVQgLILrfvoEQyZOlQ/vhOhHJLT1E/Wt9YRbzq+ijcPmwuxnxGNrAcCmM3oxPLIQhsw7r/NeCta+9Sr/n707D4+yPPc4/n1mJjOZSQgk7BB2QUBUBMRdcNe6L9Wqdeux2u10t4ttbWu11tra1h57Tj2eLnbR2rrhiiuuqMhSkU0gLAk7JCR5Z1/e88ckgZBJMklmyYTf57rORTPzZuZOeHs6P+7nuZ+Q1cilt97erclbheKQo4/F6XKxZuGbCm1ZZCcSPH//vdRureGy791B+bARnX7PgKHDOPKMc1j24rPMPO8iKkZUZrSm9554lGgo1Gr5pauoiAu/+T3+euvXePLnt3PVT+8t6MlbhWbjh0t59Y+/p25rDZD87+fcaz9L/yFD81xZdqx6awHb1q3h7C98rVtL8YaMHc8VP7qbVW8t4I2//oG/fe/rHHHqWZx45bV4+3W+l2XPlmpe+9MDbPpwKc6iom536oo8xZz86c8w45zzcbrytxQyHaOnHcE1d9/HsvnP8O+XX2D2RZ/k2IuvoKi4ON+licgB+u6nzwJi2zb+qJ+Sop4NGIiE4pQN8rYsjww53R0vjwxbYO1Qp60TVUsXsfLN1zj20k8xZGzf/l15fCWMO2oWaxa+yZxrPtPr9u31Fe898SjrP3iXU66/idHTjkj7+4695Ao+WvAybz3yEBd8/daM1dOwayfL5j/DYXNPY9CoMa2e8/UfwMXf/iEP/+AWnrrnDq740c8o8ugDXTY17N7JgoceZO177zBg2HAu+tYP2F29mXcff4Q/ff3zzL74kxx9/qV9arJnJBjgjb//iWGHTGLqSad0+3WMMUw96RQmzDyGhf/6G0uef7ppWfs1HH7aWSn/f1okFOTdxx5h8bNPUeTxcOoNN3PkGZ9od7lyX+N0uZh57kXMPPeifJciIh3o/TNyDwKheIi4HafU3bOpTJFgDLfXRSLYHNqKOj6jra5pCIlCW7vCAT8v/e/9DKwczTEXHxzDGA49/mT8dbVsWd395UHSvqqli3j7n39jykmncNTZ53fpe339B3D0+Zew9r132Prx6ozV9Pajf8UYB8dddlXK5wePHsu5X76FHRvW88L9v8JOJDL23rJPLBrlvSce5Y9f+zwbli7mhCuu4bp77mfCzGM45qJPcsO9/8P4mbN559G/8edvfpGqJYvyXXLGvPfkP/HX1XLq9TdnZC+ax+dj7rWf5dq772PQ6DG8/ODv+Pv3vtHqvze2bbP6nTf449c+x6J5jzHlpLl85te/56izzz9oApuIFA6Ftl7AH00uZywtykBoK3Zih5J72mJFbob37+BfxFsmR3Z/YmVf98Zf/4i/tpazPv+VvE78yqUJM2bj8nhY886b+S6lz6nbvpXn7vsFg8eM44zPfrFb+0VmnncRvv4DePPhP2XkoNtdmzaw8s3XmH72eR0OSpgwczZzrr6Bj997m3f+9fcev6+0tmHZYh665Yu89chDjJs+kxvu/W+OveSKVt20skHJvUaXfe8OHE4nT9z9Y564+8cFf+jw3h3bWfzME0w9+VSGTzw0o689aPRYLr/tLj7x5Vvw19Xy8A++yfz/+Q01qz7iX3d8j2d/83N8ZQO48if3cPbnv4qv/4CMvr+ISKZoeWQvYEUsgB4tj4zHE8SiiWSnbU8ytJVX9MPl7CCXt5zRptCWyqbly/jwlReYdf4lDD8ksx8kerOi4mImzJjNx+++xak33JyRf3Fu2L2THRvWM/6oWRnf4xEO+Nm0fBkTjz6uW/9CX7V0EYPHjOvyXi3btnn1j79n29rVnPYfn+/0HomEgsz7xZ0Yh4MLv/G9bi8xdBd7Oe7SK3nlD//NuvcXMvGY47v1Os3efPjPeHw+Zl/0yU6vnXnexezZUs27jz3CwMrRTD7+5B69dy5ZdbU8ftcPGT7xUE6++jN4fL1nYu7rf/0DHzz9OOXDR3Lprbd3egbkmCOmc+09v20Z3f6Hr9yUt6WS8XicD//wX+0+P2TcBE684hoqp05r95rX//IgDqeLk668LhslYoxhyglzmDDjaBY+9ghLnnuKj157ieKSUk77jy9wxOmpl02KiPQmCm29QCY6bdFgHAB3sYtE0562IYM6+RfD2irwDYLi/t1+374qGgrx0gO/pXz4CI6//Op8l5Nzh55wMmsWvsnmj/7do0PEY9EoHzz9OO898SixSJjyEZWcesPNjD3iqIzUGY/FeOoXd1K94kNmX/TJLn/oW/7qi7z4+/voP2QoV/z47i4Ft3f++XeWzX8Gt9fLw9+/hVkXXMLxl12V8sOzbdvM/5/72FNTzSW3/rjHgyQOP+0slr34LPN+dRdHnHYWJ34qvUELB6peuZwNSz/gpKuuT+tsK2MMp9/4BWq3bmH+//yGgSNHMXhM7/9Hn3gsytO/+hm1W2vYvXkTG5Yu5syb/7NH93amrHzzNT54+nGOOO1sTrnh5rQ7+vuPbv/w5ReIhsNZrjS1mupqKkelHglvx+N8/O5b/OPH32Hc9JmceOV1bfYFb/pwGesWvcuJn7o2a0dZNHN7fcz59GeYNvcMNi1fyuQT5uAr0//+iUhhUGjrBaxozztt4WAMALfXhR0KksAwYkgn/2NUW3XQ7mfbu30bm5Yva/f5zSs+pH7nDq744c9aHTp6sBh35EzcXh9rFr7Z7Q+2G5Z+wKt/+j17t29j4jHHM/GYE3jn0b/y2J0/YOIxxzP32hspGzSk2zXats2rf/gfqld8yIhJU3j/yX9SMaKSw+acltb3V6/4kJcfvJ8Rk6awu3oj/7rjB1zxo5+l9SHuw1de4N3HHuawuacz99obeeOvf2DRU/9i/aJ3OevzX20zefODZ57g44VvctJV12cksDpdLq78yS/2DVp4N3l+4OGnnZl2x8C2bd78258orRjIUeekv7fO6Srigq9/l7985ys89cs7+fRPf01xac+WdmfbgoceZOualZz31W/Tb+Bg5v/3r3nsp7dx+KlnMuea/8Dj69kQqO7aubGKlx74LyqnTuPUz3yuW5Np+1UM4oTLP52F6tKzYMEC5s6d2+7zJ155LUtfeIb3n/onf/n2l5l8whyOv/xqyoeNIBGP89qfH6D/0GE5HYIxsHJUnzxrU0T6NoW2XqA5tPVkEEkklAxtHq+LiBUg7Cxi1MBOPojUbYQxPVtaVaie/92v2NrJOTwzz7u4wyU9fZnL7eaQo49l7fvvcPqNX+jSksb6nTt47c//y/oP3qV8RGWr5V4TZx/P4mee4N3H/8GGpYs55uLLmXX+Jd3aL7j0+Xl8+EpyRPXxn7yax++6jZce+C0Dhg5vOWesPXu3b2PevXcxYNgILvnuj9i5sYrHf/pDHvvpbVx+2087/BBftWQRLz/4O8ZOn8kZn/0STpeLM2/+MpOOPZEXf/9bHrntW8w87yKOv/xqitweNi1fxpt/+xOTjjmBoy+4tMs/Z3uaBy1Mm3sGr/zxf3j5wftbzqZKZ1/QuvcXsm3dGs783Je7/A8TJQPKueDr3+UfP/ouz/32Hi769m29dnnZRwteZtn8Z5l1/iUcetxJAFxz932886+/88G8x9nw7yWcedN/Mm76zJzWFWxs4Klf3ElxaSnnfeXbffYokSJPMbMvvIwjTjubRU8/xpLn5vHxu29x+Kln4evfnz01m7ngm9/rU5MwRUSyoW/+r0SBaV4e2ZNOW6Sp01bkddK4t5Gws4jK8g7OaIuGoL7moOy01W7dwtY1Kznusis54vRzUl7jcDgO+g3pk48/mZVvvMrGfy9lwszZnV4fi0RYNO8x3n/ynxiHg5Ouup6Z517YKvC5ioo45uLLmXLSXBY89CBv/+MvrHj9ZU69/mbGHTUr7do2LP2ABQ/9H4ccfSwnXnENxuHgvK99l4e//w2e+uWdXH3nve0uQQz5LZ64+8cAXPyt2/D4Shg19XDO/8Z3eeqeO3ji7h9z6a23p9xztn3dxzz9658xeMw4zv/ad1p90B575Ayu+8X9vNG0P2n9B+9x4qeu4aUHf0fFyErO+vxXsnJQbfOghTXvvMHrf/k//v79bzDtlDM46arr2/2eRDzOmw//mYqRozjs5PQ6kwcaMWkKp95wEy8/+DsW/vPvnHDFNd38CbJn+/q1vPzg/YyedmSrpbMut5uTr7qeibOPY/5//4bH7/phS9e0s0POMyGRiPPsfffgr9vDFT++m5IB5Vl/z3wrLi3lpCuv46izz+fdxx5h+avzScTjjJ52JIfMOjbf5YmI9HoKbb1A8yCSnuxpi4SSe9o8Xhc7G/yEne6Ox/3v3QTYB2VoW/nGKxjj4IjTzqa0vCLf5fRaow+fTnFpP9a880anoa1h107+ecf32Lt9G5OOO4m51/wH/Qa2vz+sbNAQLvj6rS2HBz/+sx8xYdaxnHJd5wcH767exDO/uZtBY8Zyzpe+0TJ8xFvaj4u+dRt///43ePLnt/Op2+9pM2wiEY/zzK/vZu+O7Vz2/Z8wYNjwlufGH3U0n/jPb/Lsb+5h3r13cdEt328VOPdu38bjd/8YX9kALvnOj3AXt/1HEY/Pxxk3fSnZdXvgPp7+1c9we31c8I3vdeuw4HQZY5h8whzG7zdoYc3Ct3B4PKx/4m9tro9HYzTu2cWF3/x+jwbNHHH6OWxfv5Z3H/8HQ8YfwsSjj+vJj5FRgYZ65v3yp/j6D+Dcr3wr5c85/JBD+fRdv2bhYw+zaN5jrFu0sMO9gRUjKjn56hsYWDm6R7W99chf2PThUs68+csH1ZAjgNLyCk6/8QvMOu9iPnzlBY4845ys/GOGiEhfo9DWC2Sy0+YudhFs9BNyuZnWUWirPTjPaEsk4qx441XGTp+R9U3vhc7pcjHxmONZ/fYbRCPhdpfQBa1GHrvrhwQbGrjse3cw5ojpab/H2COO4rp7fsviZ5/i3ceaDg6+6JMcfUHqg4MDDfU8+fNkF+yiW37QJjhVjKjk/K9+l8fuuo3nfnsPF97y/VbL9l778/+2fFgeNfXwNq9/6HEnEQkGefH39/Hcfb9o+bAfaKjnsbtuw7ZtLr31x512RsYcMZ3r7vkvFj39OKOnHUnFiJFp/056Yv9BC0uee4qazZsYMiT1vsFBo8YwYdYxPXo/Ywynfebz7N68kRfuv5eKO+9l4Mj87xVKxOM8+5u7CTTs5crb7+lwn6LL7eakK69j4uzjWTb/WeKxaLvXbly2mIe+9Z8cdfb5HHfZVd2aQPnxu2+x6Kl/ccTpZ3P4qWd2+fv7igHDhnPy1TfkuwwRkYKh0NYLWFELt8ON29n9Nf2R/QaRRPwBoi43A0s6eL3mcf8H2Rltm5f/G2vPbuZec2O+SykIk48/meWvzGfD0g+YdMwJbZ6PRSI8dc8d1O/YxqW33s6ow47o8ns4XUXMvvAyppw4l9f/8n+888+/seKNVzjlus8yYea+UBGLRpn3yzvx19Vx+Y/uavdMsTFHTOfUGz7HK//3O97425+Ye81/ALBs/rMsm/8MM8+7uMMPy4efeiaRYIAFDz1I0QO/5dQbbubJu2/H2rOHT952JxUjKtP6udxeX94GRAysHMUZN32p0yERmeByuzn/67fy1+9+lXm/uJOr7rw37+P03/j7n9j80Yec/YWvMXT8IWl9z7AJEzn7C1/t8JpAQz1vPfIQi597itXvvMGcq29g8olz0+4U7a7exAu/+zXDJx7KKdffnNb3iIiIgA7X7hX8UX+PhpDAvkEkbq+TWCAAxcUdf5CorQJPf/AdXMsDV7z+CsUlpT3uMBwsKqdOw9d/AGvefqPNc3YiwfP/9Uu2rF7B2V/4WrcC2/76DRzEeV/9Np/8wZ04XUU8+fOfJA8O3r4N27Z5+cH72bJ6JWd9/iudLimbfuYnOOrs81n8zBMsf/XF5DLMP/2e8TOO5uSrr++0lpnnXsRxl13JigUv88evf57t69fyia/cwohJU3r0M/ZVZYMGc/5Xv03d9q288LtfYScSeatl9duvs/iZJ5h+1rlpTxJNl6+sP2fe9J9cdccv6FcxkOf+65c8+uPvsmvzxk6/N+S3mPfLOykqLuaCr9/areE7IiJy8FKnrRfwR/09WhoJyU6bw2lwuhwkgiGcnZ3ZVFuVPFT7INpLEPJbrHt/IdNOPUMfmNLkcDiZdOyJfPTqi0SCgZZ9WbZts+ChB/n4vbeZ8+nPMPmEORl7z9HTjuTan/+Wpc/P451/PcyfvvkFxhw+naolizjusivTfq+5195I7dYaXn7wflxuDwNHjuLcL9+S9pTD4y67ikgwwOJnn+LUz3yuV+3X6o1GHXYEcz79GRY89CBv/P1PjD0i92eghfwW839/HyMOncrca7PXTR9+yKFcdccvWf7ai7z58EP85dtf5qizzmP8jPb3fi55/inqd+7gk7f9VEuzRUSkyxTaegEramUgtMVxe5N/nSYcwjVsWMffULcBhh/Zo/csNGveeZNYNMK0uWfku5SCMvn4k1k2/xnWL36fKSfOBWDxM0+w5Pl5zDjnAmaed3HG39PpcjHr/EuYfMIcXv/rH1j99utMOu4kjrv0yrRfw+F0ct5Xv83D3/8mQauRi751W5eGgRhjmHPNjcw87+IuHbp9MJvxiQvZvn4tHzz9OB88/Xheaigpr2ia7Jndf5gxjuQwo4nHnMDbjzzEkheeZsnz8zr8nlNvuJnKyYdltS4REembFNp6gUx02sLBGG6vi72BKEWxCM7SDj6cxqOwdzMclvkP273ZigUvM2jUGIaMm5DvUgrKiEmTKR04iNXvvMGUE+ey+u3Xef2vf2DSMScw99obszr5rbRiIOd++RZOuPzTlA0e0jIpMl3FJaVcfdeviEUiaR2afSBjjAJbFxhjOOdLX2fGORcQj8fyUsPAytF4S/vl7P28pf04/cYvMuu8S7D21rZ7XbGvhEGjx+asLhER6VsU2noBK2IxxJd6wlu6IqEY7mInm2sDFMcjFPXrIATWV0MidlBNjtxTU822dWuY8+nPaLx0FxmHg0OPO4mlzz/N2kULeeF3v2Lk5MNajdvPtv1H83eVu9ibcjy/ZIfD4UzrcO++ZsCw4T26T0VERDqS1icuY8zZxpg1xph1xpjvtHPN5caYlcaYFcaYv2e2zL4tU3vaPF4X1XUBPPEopWUdDDZpnhxZ4KEt5LewbTuta1e8/jLG4WDKSadkuaq+afLxJ5OIx5j3izvpP3Q4F97y/ZQj+UVEREQk8zoNbcYYJ3A/cA4wFbjSGDP1gGsmAt8FTrBt+zCg47nJ0ooVtXp0sDYk97QVFbuo3hPAE4tQVt5RaGs6o62Ax/1btXv43y/ewPP339tpcEvE46x88zXGHTWr0/O1JLWh4w+hfEQlJeUVXHrrj3O6/ExERETkYJfO8sjZwDrbtqsAjDGPABcCK/e75rPA/bZt1wHYtr0z04X2Zf6onxJ3ZjptNbsacGLjKe0ktLm80K+TYSW92AfPPkkkGGTVm69RMaKSYy+5ot1rN364BH9dLdPmnp7DCvsWYwyf/MEdOF1F3dobJiIiIiLdl05oGwlU7/d1DXDgIVeTAIwxbwNO4Ee2bb9w4AsZY24CbgIYOnQoCxYs6EbJ2WVZVk7ritkxwvEwu2p2saCx++8bsBLsqt3Omi11AKyvqeGjdn6OaWvfp9gzhA9ef73b75dPsVCQ5S88Q8XEKWAMb//jL2yrq6d8wqSU11e9OA9nsZfqxiBbesk9l+v7TA5Ous8k23SPSS7oPpNc6O33WaYGkbiAicBcoBJ4wxhzuG3be/e/yLbtB4AHAGbNmmXPnTs3Q2+fOQsWLCCXde0N7YXNMG3SNOZO6d772rbNykcXMG7CaGLLkr/ySYdPo7y9n+Ojb8Gow3P6c2bSwsceJhGLct5NX6R82Age/cmtbF4wn+NPOZWh4w9pdW3QamTZ//6aI844h1NPy+xBuz2R6/tMDk66zyTbdI9JLug+k1zo7fdZOoNItgCj9vu6sumx/dUA82zbjtq2vQH4mGSIk05YUQugR3vaYpEEdsLGVeykrrYRAIe3nWl5iQTUbUwerF2AoqEQS55/mvEzjmbw6LG43G4u/Mb38JaV8eTPb8eq3dPq+tVvv048FtPZbCIiIiJSsNIJbYuAicaYccYYN/Ap4MATRJ8k2WXDGDOI5HLJqgzW2Wf5o36gZ6EtEkyehxQy4IyEgQ5CW+NWiIcLdnLk8tdeJNTYwOwLP9nyWMmAci7+1m2Eg0GevOcOouFQy3MrFrzC4DHjGDK2MH9eEREREZFOQ5tt2zHgS8B8YBXwqG3bK4wxtxtjLmi6bD6wxxizEngNuMW27T2pX1H219xp68kgknBTaGuIxvDEowCY9s6lKuBx//FYlA+efoKRkw9j5ORWA0wZPGYc5375m+zYsI4Xfvdr7ESC3Zs3sqNqrQaQiIiIiEhBS2tPm23bzwHPHfDYbfv9Zxv4etP/SRdkpNMWSoa22miM4ngEAIe3OPXFzaGtAMf9r3rrdRr37OL0z34h5fMTZh7DnKtv4PW//oF3Ro4iGg7hcDqZfOLc3BYqIiIiIpJBmRpEIt1kRZo6bT04XLt5eeSuUBRvU2gzxe2Ftg3gKIL+ld1+v3ywEwkWPfUvBo8Zx7jps9q9buZ5F7NnSzXvPvYwLo+H8TNma0S9iIiIiBS0dPa0SRb5Y5nY0xYHYHsgzFCPAcDh9aW+uLYKyseCw9nt98uHdR+8S+3WGmZfeBnGmHavM8Zw+o1foHLKNGLhMIdpaaSIiIiIFDh12vLMH0mGth512pqWR9ZYYQ732EBHyyM3FNzkSNu2ef+pf9F/6DAmHXtip9c7XUVc+M3vs/Hfi5kw4+gcVCgiIiIikj3qtOWZFbUwGLyudgaHpKF5eeSmxiBDi5KhLeXySNtOdtoKbAhJ9YoP2b7uY44+/1IczvQ6hMWlpUw+YQ7GoVtcRERERAqbPtHmmT/qp7SotMMlf51pnh65pTHMwKLmTluKEOjfBVF/wYW295/6FyUDyjlsTu85HFtEREREJFcU2vLMilo9GvcPEGiI4C5xYRsod8bBGIzH0/bCAhz3v6NqHZs+XMqMT1yIy+3OdzkiIiIiIjmn0JZnzZ22Hr1GXRhHSXJ7Yn+TwHi9qTt3BTju/70nH8XjK+HIMz6R71JERERERPJCoS3PrIjVoyEkAFZdmJg7+VdZakdxtDvuvwqMAwaM7tH75Urt1hrWvr+Q6Wedi8fXzjRMEREREZE+TqEtzzLRabP2hvC7wONy4Il3FNo2QP9R4CqMZYaLn3kSl6uIGedckO9SRERERETyRqEtz6xozzptsUicsD9GXSJOZbkXOxTCpBpCAgU3OXLDssWMn3E0vv4D8l2KiIiIiEjeKLTlmRW1KHV3v9Nm7Q0DsCMWY1SFj0QomHpyJDSFtsLYz9awayeNe3ZROXVavksREREREckrhbY880f9Peq0+euSoW1zMMzoCh92MJR6eWSgFkJ7C6bTVrPqIwBGTj4sz5WIiIiIiOSXQlseJexEj/e0teq0lftIBIOpl0fWbUj+WSihbfUKPL4SBo0ek+9SRERERETySqEtjwLRAECPOm1WXSj5p8NmVIUXOxRM3WmrbQptBTLuf8uqFYw4dAoOhzPfpYiIiIiI5JVCWx5ZUQugR502f10Yh8dB1JDc0xYM4fCl6LS1nNE2ttvvlSuB+r3Ubq2hcor2s4mIiIiIKLTlUUunzd2DTtveMAlvshuVHEQSwhSnCm0boN8IcPf+8862rF4JaD+biIiIiAgotOVVc6etxNWT5ZFhAk4YVOqmrLgIOxBoZ3lk4Yz7r1m9AleRm2ETDsl3KSIiIiIieafQlkctyyN7OPJ/dyLGlOFl2Lad7LR52wttY7v9Prm0ZfUKhk2chNNVlO9SRERERETyTqEtj/xRP9D9QSTxWIJgQ4Qt4WgytEWjkEjg8B6wBDLcCP6dBdFpiwQD7NxQRaWWRoqIiIiIAApteWVFejaIxN807n+vnWDK8H7YwSAAjgM7bXUbk38WQGjbumYVtp1gpIaQiIiIiIgACm151dNOW/MZbZbDZsrwMhJNoc0cuKetZXJk7x/3X7N6JcbhYMSkyfkuRURERESkV1Boy6OWQSTdDG3+umRoC7kMEwaXtoQ2x4GHazeHtoreH9q2rF7BkLETcKeagCkiIiIichBSaMsjf9SP1+XF5XB16/utptA2ZKiPIqcDO5Q8aDtlaPMNguL+Pao322LRKNvWraFyytR8lyIiIiIi0msotOWRFbW63WUDsPaGiBqbiSPLAEgEk6GtzTlttRsKYj/b9vUfE49GtZ9NRERERGQ/Cm155I/4uz2EBKB2V5AGYzNlRHNoSx7W3WYQye6PYeCEbr9PrmxZtQKAkYeq0yYiIiIi0kyhLY962mmr3R2g0WEzdXgytDUvj2w1iMS/B6wdMKT3B6Etq1dQMXIUvrLevYxTRERERCSXFNryyB/tWactsDdCY9PkSNi3PLLVnradye4VQ3v3uWeJRJwta1bpfDYRERERkQMotOVRTzptiXgCOxjHLnZSXuIGwA6lmB65ozBC265NG4kEA1RO6d11ioiIiIjkmkJbHvmjfkrd3eu0BRoiGKCsYt9SyH2DSPZbHrljBfgGQunQnpSadVtWN+1nU2gTEREREWlFoS2PetJpq9ud7KoNG7bv+1Oe07ZzZXI/mzHdLzQHtqxaQb9BgykbNCTfpYiIiIiI9CoKbXli2zb+qL/boW3dxnoAxo4q2/eaoSAYg/F4kg8kErBzFQzt3SP0bdumZvUK7WcTEREREUlBoS1PIokIsUSs26Ft85YGAKZMKG95LBEMYbxeTHNXrW4DRAMwtHdPjqzbtpVA/V4qdT6biIiIiEgbCm15YkUsgG5Pj9y1I0AMm0mj9o3HT4SCOA7czwYwpHd3sFr2s6nTJuXKV6QAACAASURBVCIiIiLShkJbnvijfoBud9qsvWHCbgcu576/QjtwQGjbuRIwMGRyT0rNui2rV+DtV0bFyMp8lyIiIiIi0usotOWJFe1+p822beL+GM4SV6vHE6Hk8sgWO1ZAxThwd/8A71yoWfURIydP3besU0REREREWii05Ulzp607I/+3N4QojtmUlntaPZ4IBdue0dbLz2drrN1N/c4dWhopIiIiItIOhbY8ad7T1p3lkSu31tMvYRg8xNfqcTsY2rc8MhKA2qrev59tVXI/m4aQiIiIiIikptCWJz1ZHrl6Yz1ODKNG9mv1eCIY3Lc8ctcqwO71kyNrVq+kqNjLkLHj812KiIiIiEivpNCWJz0ZRFK1OXlG28DBB3Ta9p8euWNl8s9efkbbllUfMWLSZBxOZ75LERERERHplRTa8qSl09adPW3bm/bDHbinLRjCeJtC286V4PJC+dge1ZlNQauR3dWbGDm5d3cDRURERETySaEtT/xRPy6HC7fD3aXvC0biBPeGASgtL271XCIUwuFt6r7t+AiGTAFH7+1gbV2T7AZqP5uIiIiISPsU2vLEiliUFpV2ecz9mh2NlCQMOMBbWtTqOTt4wPLIXr6fbe/27QAMGjUmz5WIiIiIiPReaYU2Y8zZxpg1xph1xpjvpHj+emPMLmPMsqb/uzHzpfYt/qi/W/vZVm1roF/C4O3vwTj2BT7btpsGkRSDtRMCu3v95MiQP7lE1FPSu8+RExERERHJJ1dnFxhjnMD9wBlADbDIGDPPtu2VB1z6D9u2v5SFGvskK2p1a3Lkqm0NDMBB/4rWSyPtaBQSCRzF3uTSSOj1Z7SF/RZurw9HL17CKSIiIiKSb+l02mYD62zbrrJtOwI8AlyY3bL6vkA00O1OW7lxthlCYgeDADh83v0mR/bu0BbyWxSXdj24ioiIiIgcTDrttAEjger9vq4Bjklx3aXGmJOBj4Gv2bZdfeAFxpibgJsAhg4dyoIFC7pccLZZlpWTurbVbqPUWdql90rYNss3Bzg5Wkxd485W3+uo28tgYO2mzYzxL6bcXc7CRR9lvO5M2rp5E1GbXnkfZFuu7jM5uOk+k2zTPSa5oPtMcqG332fphLZ0PA08bNt22BhzM/Bn4NQDL7Jt+wHgAYBZs2bZc+fOzdDbZ86CBQvIRV2/fOKXjKkYw9w56b/X5j0BeP41HLZhyhETOXLuqJbnwhs2UAVMnn4k/be+CJXTc/Jz9MS2V56lrLS019eZDbm6z+TgpvtMsk33mOSC7jPJhd5+n6WzPHILMGq/ryubHmth2/Ye27bDTV8+CMzMTHl9lxW1KHF3bXnkym0N9LOTw0dKBhywPDIUAsC43bBrTa9fGglNyyNLtDxSRERERKQj6YS2RcBEY8w4Y4wb+BQwb/8LjDHD9/vyAmBV5krsm/xRf5cHkaza1kBZU2hLdbA2gCO2F2Khgght4YAfj0KbiIiIiEiHOl0eadt2zBjzJWA+4AT+YNv2CmPM7cAHtm3PA75sjLkAiAG1wPVZrLngxRIxgrFglweRrNrWwFhvMVh220EkoaZBJMGtyQeG9O4z2gDClgaRiIiIiIh0Jq09bbZtPwc8d8Bjt+33n78LfDezpfVd/qgfoOudtu0NnFbswZgwvjJ3q+cSTdMjjVUNxgGDJ2em2CyJRSLEohEtjxQRERER6URah2tLZjWHtq502hpDUaprgwxyOvH19+Bwtv6ra1ke2bgRBh4CRcUpXqX32HewtkKbiIiIiEhHFNrywIomA0upO/3Asnp7IwAlcdNmaSTstzyyYV1h7GdrCW1dP6tORERERORgotCWB93ptK3a1gCAI5SgdEDb0NbcaTPWZhjS+0NbyEqGNi2PFBERERHpmEJbHliRpk5bF/a0rdrWQP/iIkINEUpSdNqa97Q5nDYM7f1DSJqXRyq0iYiIiIh0TKEtD7oziGTltkamDS0lFo5TOqDtfrXm5ZHGSYEtj1RoExERERHpiEJbHjTvaUt3eWQ8YbNmewNT+ievT7WnLREMYdxOjKcU+o/OXLFZ0tJp08h/EREREZEOKbTlQVf3tG3c4ycUTTDWlwxrKZdHhoI4XHbyfDZH7/9rbd7T5vFpEImIiIiISEd6/6f7Pqi50+Yr8qV1/cbdyZA30JE8Vi/VIBI7GMJhYgWxnw2SyyOLir04XWkdFSgiIiIictBSaMsDf9RPSVEJDpPer7+6NgCAN2YDUJJqemRDLcYZL4jJkQDhgF9DSERERERE0qDQlgfNoS1dm2uDeIuc2IEY3jI3Tlfbv7ZEw+6myZGFEdpCfotindEmIiIiItIphbY8sCJWlyZHVtcFGFXhxaqLpFwaCWA37k3uaSuQ5ZEhy8KjISQiIiIiIp1SaMsDf9TftdBWG2B0hQ//3lDKyZEACX8jptgD3vJMlZlVYb+l5ZEiIiIiImlQaMsDK2qlvTzStm2qawNUlvuw6sLtd9qCARwl/TJZZlaF/JbOaBMRERERSYNCWx74o35K3ekFllp/BH8kzqiyYsKBWMpx/8SjJEIRTL/C6LJB8542hTYRERERkc4otOVBVzptm5smRw4rKgJSj/tnzzoScXD0H5KxGrMpHosSC4cV2kRERERE0qDQlgf+SPp72qrrggBUGCcAJeXFbS/asQI7ZnCUD81YjdnUcrC2pkeKiIiIiHRKoS3HbNvuUqet+Yw2Xzz5dcqDtXesJBE3mPIRGaszm0L+ZGhTp01EREREpHMKbTkWjAWxsdPvtNUGGFTqJmpFAVLuabNrN4NtcPgKo3MVbgptGvkvIiIiItI5hbYcs6LJwFLiTn9P26gKH/66MJ4SF0VuZ5tr7D3VADi8KZZO9kLqtImIiIiIpE+hLceaQ1v6e9oCjCr3Ye0NUzogdShL7NkKgPF6M1NkloVb9rQptImIiIiIdEahLcf8ET9AWnvaYvEEW/eGGF3hw6pr52DtRJzE3p0AOAoktIUCyd+BOm0iIiIiIp1TaMuxluWRaYS2bfUh4gmbURVe/HvDqc9os3ZiR2MAmOLCWB7Z3Gkr1p42EREREZFOKbTlWCCanAaZzvLI5jPaRpYVE2yMpj6jrb6GRNwA4CgukE6b38Ll8eB0FeW7FBERERGRXk+hLce60mlrHvc/yOkCSL08sr4aO9YU2nyFE9q0NFJEREREJD0KbTnWlUEkm2sDuBwGdyB5SFvZoBShbL9OW8Esj1RoExERERFJm0Jbjvmj6Q8i2VwbYMQAL7s3NYKBwaP7tb2ovoYEydcqmEEkfkuTI0VERERE0qTQlmNW1MLj9FDk7Hw/V3VdkNEVPnZsbKRieAnuYlfbi+prsN3lADgKpdNmWRpCIiIiIiKSJoW2HPNH/Gl12SC5p21UuZedGxsYOrYs9UX11SRcA4DCOact5PdreaSIiIiISJoU2nLMilpp7WezwjFq/REq3UWE/FGGtBvaakg4k8smC2p5pC+94CoiIiIicrBTaMsxfzS9TlvL5MhIcsjI0HEpQlskAMFaEvjAGIwnxXTJXiYeixENBbWnTUREREQkTQptOWZFLUrdnQeW5tDmro/hKnIwcESKoNewBYDw9gDusWMxxmS01mwIB5KDWLSnTUREREQkPQptOZZup635YO3IjiCDx/TD4UzxV1VfjW1DcO0WvEcdlelSsyJkJY880J42EREREZH0KLTlmBVJb09bTV2QMreLui3+DvezRRpdxBv9+GYURmgLB5KhTcsjRURERETSo9CWY13ptE3xeYnHEh1MjqwhuNsNUDCdtrA6bSIiIiIiXaLQlmNW1Ep7EMl4R/Jcto5CW2DvAJz9++MeNy6TZWZNyN8U2rSnTUREREQkLQptORSJR4gmop0uj7Rtm821AQZHDN5+RfQb2M6h2fXVBHe78E6fjnEUxl9lyJ8cRKLlkSIiIiIi6SmMT/p9hBVNdpk667TtagwTjiXwNsYYMras3amQsR3VRGpjeGfMyHit2RL2a3mkiIiIiEhXKLTlkD+a7DJ1NvK/ui6A24ZEfbT9pZGJBMH1uwDwHjU9o3VmU8hv4Spy43K7812KiIiIiEhBUGjLoebQ1lmnbXNtgGGx5F9Nu6EtsJvgTgNOB97DD89ondkUsiw82s8mIiIiIpI2hbYcsiLJpYGd7Wmrrg0yPJ78q+lo3H9wt5vi8SNxeL0ZrTObwn5LSyNFRERERLpAoS2HWpZHdhLaNtcGGGtc9B/ipbikKOU19p5NBGvdeI+YlvE6synktzSERERERESkCxTacijdQSTVtQGGRk37SyOB0PKl2HGDb/bxGa0x20J+i+KSzo88EBERERGRpLRCmzHmbGPMGmPMOmPMdzq47lJjjG2MmZW5EvuOdAeR7N4ZwBPrYGkkEFy+GgDvMSdmrsAcCKvTJiIiIiLSJZ2GNmOME7gfOAeYClxpjJma4rp+wFeA9zJdZF+RTqctEkvg3BsBYOi49kNbYG0NRf0MRcOGZbbILAtZ2tMmIiIiItIV6XTaZgPrbNuusm07AjwCXJjiup8AdwOhDNbXp1gRC6dxUuxs57BsYMveYHJypAMGVaYON7ZtE9xYj3d0v2yVmhWJRJxIMKBOm4iIiIhIF7jSuGYkUL3f1zXAMftfYIyZAYyybftZY8wt7b2QMeYm4CaAoUOHsmDBgi4XnG2WZWWtro9rP8ZjPLz++uvtXrN8Vyw5ObLU5q2330x5jWPPHgb7E0SG+Hrl77A9sVAQgJpt2wuq7mzI5n0m0kz3mWSb7jHJBd1nkgu9/T5LJ7R1yBjjAO4Fru/sWtu2HwAeAJg1a5Y9d+7cnr59xi1YsIBs1fXSWy8xYPuADl+/+p2NxGLrGX/YMObOPSzlNfVPPsFWYPjxsxnXC3+H7dm7fRv/BqZNn85hc+bmu5y8yuZ9JtJM95lkm+4xyQXdZ5ILvf0+S2d55BZg1H5fVzY91qwfMA1YYIzZCBwLzNMwkrb8UT8l7o4nJ27d3IAbw9iJFe1eE3z/bRyuBJ6phXOoNiQnRwJaHikiIiIi0gXphLZFwERjzDhjjBv4FDCv+Unbtutt2x5k2/ZY27bHAu8CF9i2/UFWKi5gVtSixNVxaKvfkpwwOWx8B0NIln1I8cAopmJ0RuvLtubQppH/IiIiIiLp6zS02bYdA74EzAdWAY/atr3CGHO7MeaCbBfYl/gjnXfa4rvDxJwwYIgv9fOWn/CGGnyDItC/MhtlZk24JbSp0yYiIiIikq609rTZtv0c8NwBj93WzrVze15W32RFLUb2G9nhNb7GOPEBbozDpHw+9OG/wbbxDopAWcev1duErKblkaUKbSIiIiIi6erxIBJJXyAaoLSo/cCypz5ERQxcw7ztv8bSpWDAO7o/uDzZKDNrQuq0iYiIiIh0mUJbDllRq8ODtVev2oMDw5Cx7e9nCy5dhmewB+fgwjpUG5LLI50uFy53YYVNEREREZF8SmcQiWRAPBEnEOu407Z5bR0A4yalnhxpx+MEly3DOyRecPvZINlp85SUYkzqpZ8iIiIiItKWOm05EogFADrstNVVW9SbBIeM6Z/y+fC69SQsC19/P/QflfKa3ixsWVoaKSIiIiLSReq05Yg/mhzlX+puP7REd4XY44Gy4qKUzweXLgHAO8Aq4E6bxv2LiIiIiHSFQluOWJHkEI72Om3BxgiuYIJI/9SBDSC4dCnO8v4UlRbm8shwwK9Om4iIiIhIFym05YgVTYa29va07djYAIBnaHG7rxFYshTfoaMwhoIMbc172kREREREJH0KbTnSvDyyvU7b9g0NJLAZOCp1qInt2kW0uhrv2PLkA4W6p01ntImIiIiIdIlCW4501mmrXlfHbofN6CGpnw8sXQqAd7gTnB4oGZSdQrPETiQIaXmkiIiIiEiXaXpkjnQ2iKRucz3BhMWo/qmXRwaXLsO43RSXBSA8EgpsbH44GADb1vJIEREREZEuUqctR9obRBJauZKNn72ZaDDBnI1vMfi6C6n56teo+8ejRKqrW64LLl1K8bRpOPxbC3I/W9if/PnVaRMRERER6Rp12nKkudPmc/kAiGzaxK7f3EfDc88RHVgJhzt4fdihHDm9iODChTS+8AIARZWVlBx3HKEVK6i47lqo/z+YcErefo7uClnJ0ObRnjYRERERkS5RaMsRK2rhdXlJ7NrDzv/+HXv/9RimqIiBn7uZxJlXwH2r2DRqApU/+Sy2bRPZsBH/wnfwL1xIw/PPY0ej+GYfDa/eUZCdtpA6bSIiIiIi3aLQliOhhjquWpBg/T1nYcdilF9+OYM+/zlcgwez8cPdAPQr9wBgjMEzfhye8eOouPpq7FiM6LZtFJXE4VW7IEOblkeKiIiIiHSPQluOTHxyGYe/5affeecz+Mv/iXvUvpH9/vowABWDvCm/17hcyes3vp18oABDW3OnTYNIRERERES6RqEtR8o31rJ1dAmH3fPzNs81h7byivYP1gagvib5ZwGe0da8p03ntImIiIiIdI2mR+ZI+Q4/DcNSB5aGujABYzOg1NPxi9Q3TZMsG5nh6rIv7LdwOJ0UeToJpiIiIiIi0opCWw7EGxvp1xDDP7Ii5fP1dSH8xqbcV9TxC9XXgG8guH1ZqDK7wgE/npJSTIGdLyciIiIikm8KbTkQ2bABgHDloJTP+/eG8TtsBvjcHb9QfU1B7meD5PLI4pKSzi8UEREREZFWFNpyIFxVBUBi9PCUz4cao1jpdNoatkBZgYY2v6UhJCIiIiIi3aDQlgPBdWuJOcA1ckSb52zbJuqP4nfYlJf03U5b2G9p3L+IiIiISDcotOVAYP1atpdDqXdAm+dC/igkwHLYDOio0xaqh3BDwYY2ddpERERERLpHoS0Hohs2smWgodTdNrQE6iMA+A0M8HbQaWsZ91+ooc2vTpuIiIiISDcotGWZHY2SqN7KlkHQz92vzfPNZ7QlPA7crg7+Ogr4jDbbtpPLI3VGm4iIiIhIlym0ZVmkugYTj7OlwqQMbc2dNldpJ+ecN5/RVoCdtkgwiJ1IaHmkiIiIiEg3KLRlWaRqPQBbBxr6FbXfafP0S+OMNkcRlA7NeI3ZFvZbAFoeKSIiIiLSDQptWRauSp7RtnUg7e5pizqgrNTT8QvV10DZCHAU3l9ZSKFNRERERKTbCi8BFJhIVRWRilKCHkOZu6zN8/76CEEnnZ/RVl9TkPvZIHmwNqDlkSIiIiIi3aDQlmXhDVU0Du+P0zjxurxtng80hGk0Ccp96ZzRNjJLVWZXy/JIDSIREREREekyhbYssm2bSNUG9g4rodRdijGmzTX+vWHq7UTHZ7Ql4tCwtSCHkACEAloeKSIiIiLSXQptWRTbtYtEYyO7hxRTWtQ2sNi2jb8+gt9hd9xpa9wOdrxgQ1tYyyNFRERERLpNoS2LIk1DSLYPcqbczxYNxYlHE8mDtTvqtO1YkfyzYnw2ysy6kN+PMQ7cxcX5LkVEREREpOAotGVRZEMVADUVdsrJkc3j/q3OOm3rXgKXF0Ydm5U6sy3kt/CUlGAKcPKliIiIiEi+6VN0FoWrNuDw+djqC7VzRlvyYG2/sakoaSe02TasfRHGz4GiwuxUhf2W9rOJiIiIiHSTQlsWRaqqcI8fjxX1t3NG275OW7vLI/esg7qNMPHMLFaaXclOm0KbiIiIiEh3KLRlUbiqCvf4cTRGGts9ow2SnbZ2l0eufTH558QzslVm1oUtS+P+RURERES6SaEtSxJ+P7Ft23CPG4e/g06b7QDb5cDndqZ+oY/nw+ApMGB0livOHnXaRERERES6T6EtS8IbNwKQGD0cG7vdPW1xj4MBJUUpz3Aj3Aib3inoLhskQ1txSUm+yxARERERKUgKbVnSPO4/UjkEgH7utqEt0BAm7DLtL42seh0S0YLez2bbtgaRiIiIiIj0gEJblkQ2VIHDgX9oci9bytBWHyHQ0RCStS+CpwxGF+aof4BoOEQiHtfySBERERGRblJoy5Lw+irco0ZhEQJo55y2CI3tDSGxbVj7Ekw4BZwdHLzdy4UsC0CDSEREREREukmhLUuax/03RhqBtp22WCROJBijLhGnvCRFKNuxAhq3FvTSSIBwwA+g5ZEiIiIiIt2UVmgzxpxtjFljjFlnjPlOiuc/Z4xZboxZZox5yxgzNfOlFg47HieycSPu8eOwoslO04GDSJrH/e+OxxmQqtO2dn7yz0NOz2qt2RZu6rRpeaSIiIiISPd0GtqMMU7gfuAcYCpwZYpQ9nfbtg+3bXs68HPg3oxXWkCiW7ZgR6N4xo+nIdIAtO20NR+s3UCCipSh7SUYfiT0G5b1erMp5G9aHqnQJiIiIiLSLel02mYD62zbrrJtOwI8Aly4/wW2bTfs92UJYGeuxMITrqoCwD1uPFYkGVoO3NPWcrB2qkEkwTqofq/gl0bCfqFNe9pERERERLrFlcY1I4Hq/b6uAY458CJjzBeBrwNu4NRUL2SMuQm4CWDo0KEsWLCgi+Vmn2VZPa7L9+JL9AMWbdvKyshK3MbN22+83eqaPR8nc63f2NRUrWGBtb7lucE73+QwO8GSxsE09MLfUVfs+PcyABYtXYbLU5znanqPTNxnIp3RfSbZpntMckH3meRCb7/P0gltabFt+37gfmPMVcD3getSXPMA8ADArFmz7Llz52bq7TNmwYIF9LSurS+/jDVwIHPOPZfX3llE/2j/Nq+5cO96tjs2ETBw8jEzmDmmYt+TTzwC3gpmnH8jOJw9qiXf3t5ZQ40xnHbGmRiH5t40y8R9JtIZ3WeSbbrHJBd0n0ku9Pb7LJ1P0VuAUft9Xdn0WHseAS7qSVGFLlK1Ac+4cQA0RBraOaMtjNPrAkPrQSSJRHI/2yGnF3xgg+TIf4/Pp8AmIiIiItJN6XySXgRMNMaMM8a4gU8B8/a/wBgzcb8vzwXWZq7EwtM87h/Ailgpz2gL1Eewi5O//lbntG1dCoHdfWI/G0DYb2kIiYiIiIhID3S6PNK27Zgx5kvAfMAJ/MG27RXGmNuBD2zbngd8yRhzOhAF6kixNPJgEaurI753L+7xyU5bY6SR/p7+ba7z10eIuh2YEPT37jeIZO2LgIFDTstRxdkV8lsa9y8iIiIi0gNp7Wmzbfs54LkDHrttv//8lQzXVbAi65MDRTzNnbaoRWW/yjbXBRrChAc4KSsuwukw+55Y+yJUHg2+ijbfU4hC6rSJiIiIiPSINhplWMu4/6bQ1hBpaLM8Mh5PEGyM4ndA+f7j/q2dsHUJTOobSyMhebi2QpuIiIiISPcptGVYpGoDxuOhaMQIILmn7cBBJMGG5BltjSRaDyFZ90ryzz6ynw2alkfqjDYRERERkW5TaMuw8IYq3OPGYRwOwvEwkUSEfkWtQ1vzwdp1dpyKkv1C29r5UDoMhh2Ry5KzKhzwq9MmIiIiItIDCm0ZFqnagGe/ISRAm05boD4MwK5onAHNyyPjMVj3Kkw8HYyhL4hGwsSjUQ0iERERERHpAYW2DEqEQkRranCPS+5naw5tB+5pa+60bY9E9o37r3kfwvV9amlk2LIAKC4pyXMlIiIiIiKFS6EtgyKbNoFtt4z7tyLJ0FLmLmt1nb8+DAZ2x+L7BpGsfREcLhh/Sk5rzqZwwA+gTpuIiIiISA8otGVQpGlyZPO4/5ZOW1Hr0BKoj+ApKcI27BtEsvYlGH0cFLcOeIUs1NJpU2gTEREREekuhbYMCldVgTG4x44FoDHa/p62opLkEXnlPjcEamHHRzB+Tk7rzbZ9nTYtjxQRERER6S6FtgyKVG2gaMQIHF4v0P4gEn99BIevObQVweZ3k0+MOSF3xeZAyK9Om4iIiIhITym0ZVC4qqrlUG3Yt6ctVact4Un+6gf43LD5HXC6YcSM3BWbA+Gm0KY9bSIiIiIi3afQliF2IkFkw75x/wANkQYcxoHP5Wt5LJGwCTRGibmTv/rykiLY9A6MnAlFxTmvO5vC/qblkT4tjxQRERER6S6FtgyJbduGHQq1jPsHsKIWpUWlmP3OXQtZUeyETSi5OpJyVxS2/RvGHJ/rkrMu5Lco8hTjdLnyXYqIiIiISMFSaMuQ0McfA+CZsC+0NUYaU+xnSx6s7Tc2xUUOincsgUQMRve90BYO+DWERERERESkhxTaMiS4ZCm4XBRPm9bymBWxUuxnSx6sXU+CCp8bNi0E44BRs3Naby6ELEtDSEREREREekihLUMCixdTfNjUlsmRkNzTduAZbc2dttp4PDmEZNPbMHRanzqfrZk6bSIiIiIiPafQlgGJcJjQ8uX4Zsxs9bgVbb/TtisWZZAPqPmgz436bxb2+zWERERERESkhxTaMiD00UfY0Si+Wa1DW6o9bYH6MB6fi9pQjMMdmyAWhDHH5bLcnAn5tTxSRERERKSnFNoyIPDBYgC8Rx3V6vFUe9r8DRF8/T3UBSIcHl+RfHB03wxt4YClM9pERERERHpIoS0DAksW4x4/HldFRctjCTvRMvK/1bX1YXxlbuqDUSYGP4SBh0DpkFyXnHV2IkE4EFBoExERERHpIYW2HrITCYJLluKb2XpppD/qx8ZOMfI/QlGpC9tOUNn4YZ88nw0gHAyAbVOsQSQiIiIiIj2i0NZD4bXrSDQ24p05o9XjjZFGgFahzbZtAvURHF4Xk0wNnlhDnzyfDSDstwDUaRMRERER6SGFth4KLP4AoE2nLVVoCwdixGMJ4sUOjnasST7YZ4eQ+AE08l9EREREpIcU2noouHgJrsGDKaqsbPV4c2jbf09b87j/aJHhGMcqIr5hMGBM7orNoXBTaCvWyH8RERERkR5RaOuhwJIleGfNxBjT6nErmlweWObed2i2vyF5sHbIYXO0Yw3RymPhgO/rK7Q8UkREREQkMxTaeiC6dSuxbdvaHKoN+3Xa3G07bXZkO8NMHY6xfXM/G0AokAxtOqdNREREdv0RMwAAHoNJREFURKRnFNp6ILA4eT6b74AhJJB6T5u/PtlpK2tYAkDxhBOzXWLehC112kREREREMkGhrQcCixfjKCnBc+ihbZ5rCW1F+0JboD6Cy+NkWOMi6inFDJ6Ss1pzLRzwY4wDd3FxvksRERERESloCm09EFy8BO9RR2GczjbPWVGLYmcxRc6ilsf89WFKytyMavw3H7mmgqPv/vpDfj+ekhJMH/4ZRURERERyQZ+ouyleX0947dqUSyMh2Wnbfz8bJDttvlLD0GgN64sPz0WZeRP2Wxr3LyIiIiKSAQpt3RRYktyX5p3ZdggJJEPb/vvZoKnT5qwHoLrfUdktMM/CAT8ejfsXEREREekxhbZuCi5ZAkVFeA9P3TFrjDS22s8GTZ22xBaCeGgon5qLMvMmZFmaHCkiIiIikgEKbd0UWLwE79SpOLzelM9bUatVpy0SihENxykJfszSxET6l/pyVWpehAN+LY8UEREREckAhbZuSITDhJYvxzsr9dJIaLunrfmMNm9gNe/FD2WAr6i9b+0TQn512kREREREMkGhrRtCy5djR6P42tnPBm33tAUakme0lTpqed+eTIXPnfU68yns9+uMNhERERGRDFBo64bA4qYhJEe1P0zkwD1t/uZOm7OBpYlDGNCHQ1ssGiUWCWsQiYiIiIhIBii0dUNgyWLcEybgKi9P+Xw4HiaSiLTutDWFNnvQcEJ4KO/DyyPDfgtAyyNFRERERDLAle8CCo0djxNcspSyc85p9XhjbYj5//sRsUiCmB3jsr3fIr52CI889T6QXB7pIMreQZNhO5SX9N1OWzjgB9AgEpH/b+/eg+su7zuPvx9drast34SRwAZszJjYYGJuIW28SdqBEC7JpAnZZDfLZJekaQbabbfDttN0k2m7TbuTZHeTJpAQmu3sQlLCJjSlZXNBIaSGYBDYYGwQF9sSvluydI6soyPp2T/OkXx8l+1z89H7NeORzu/8kL5Iz/zg4+d5vo8kSVIeONOWY3T7dmp7XjvhPameHiaGho46VPv17j3semOQlnmzmNVWxVD9PuraAq3zZ9E6fxbnnDPGlc3fo7flMoCKbkQyknCmTZIkScoXZ9py7Pqvf0nbE09wYNE5zL7xxmPeM/zss8DRh2r3bumndUEDN35mFRv3bOTzj97HTe/+Ku86b1Xmhif+Gn72EPfU/zawizkNzrRJkiRJOjln2nIs+vM/I71kCW/9/h+w9xv3EGM86p6Dzz5HTXs7tR0dU9cmxid465V+Opdn9rgNpYcADtvTxs4XYe6F7Ew30FxfQ11N5f7oJ/e02T1SkiRJOnOVmxxOQ01bG/133UnrTTex5ytfYcef/AkxnZ56P8bI8LPP0vj2KwghTF3fvW2I0ZFxOi/JhrbRTGjLPaeNfT0wbxkDw+mKXhoJMJLMzLS5PFKSJEk6c4a2I9XWcu5ffZH5n/ltDjz0fbbdcQfjg4MAjL31FmM7d9JwxeFLI/u29APQcXEmtCVGMzNNrXWtmRsmJmDfazB/Gf3Do7RVcLt/yJlps+W/JEmSdMamFdpCCNeHELaEEHpCCHcf4/3/GELYFELYEEL4aQhhcf5LLZ4QAgvuvJNFf/EXDD+znq0f+xjpvj6Gn8ucz9a45oj9bJv7mdfRRGNrJoxNzbTVZmeaBnth7CDMW0r/jJhpS1BTW0dNXWWHU0mSJKkYThraQgjVwNeAG4AVwEdDCCuOuK0bWBNjXAU8BPxVvgsthTkf/ADnf+ubpHfu4o2P3MbA9x+mqrmZ+mXLpu4ZS4+z47UDdCw/dGbbUHqIqlBFY21j5sLeVzMf5y9jYHiUuRXc7h8yjUhsQiJJkiTlx3Rm2q4CemKMr8cYR4EHgVtyb4gxPh5jHM6+fArozG+ZpdN0zTUsefABqurrGX7qKRpWryZUV0+9v/P1QcbTE3ReMnfq2tDoEE21TVSF7I93X0/m47xl9CdnwPLIRMImJJIkSVKeTKflfwewPed1L3D1Ce7/JPBPx3ojhHAHcAdAe3s7XV1d06uyiBKJxDHrqrrrTloe/C4Dl17K6znv79owAQFe37mRrfszzUl69vZQO1479XWWvdJFe3UjP//VSwyOjDGwu4+urj1F+LcpjR19vUyMjZfl77dcHG+cSfnkOFOhOcZUDI4zFUO5j7O8ntMWQvg4sAZ417HejzHeC9wLsGbNmrh27dp8fvu86Orq4rh13XzzUZe+/6v1tC+B9/zmmkPXfvp9FiQXHPo6274M7Zdw2dXXwY9/wuoVF7P2HUvyXXrZ6H3shzQuWHD8n6NOPM6kPHGcqdAcYyoGx5mKodzH2XSWR/YB5+W87sxeO0wI4b3AHwM3xxhT+SmvvI2OjLHrzaGp89kmDaWHDm/3v7dnaj8bUPGNSFLJhJ0jJUmSpDyZTmh7BlgWQrgghFAH3AY8kntDCGE1cA+ZwLY7/2WWp7deHSBORDouOSK0jQ4dOlh7NJnpHjlvGf3DmTPfKn1P28hw0j1tkiRJUp6cNLTFGMeAzwKPAS8D34sxvhRC+EIIYXK94F8DzcDfhxCeDyE8cpwvV1F6t/RTXVPFogtnH3Y9MZqgpTYb2va9lvk4fyn9ycxMWyWHthgjqWTCg7UlSZKkPJnWnrYY46PAo0dc+1zO5+/Nc11nhd7N/ZxzUSs1ddWHXT9spm1ftt3/vGUMbM/MtFXy8sj0yEHixIQt/yVJkqQ8mdbh2jrawcQo+3oTdC6fe9j1iThBIp04tKdt72S7/4voz+5pa6vgc9pGkgkAZ9okSZKkPDG0naa+LQMAdB6xny2ZThKJtNa1Zi7sexVmnwe1DfQPp6mtDjQdMTNXSVLJJIAzbZIkSVKeGNpOU++WfmpnVbNwccth1xOjmZmm5trJmbZXYd5SAAaGMwdrhxCKWmsxTYY2Z9okSZKk/DC0nabezfs5d9kcqqoP/xEOjg4CZPa0xQj7Mu3+Afqzoa2STS6PtOW/JEmSlB+GttMwtH+EA7sPHnU+G0AinZ1pq2uGoZ0wmoB52dCWTFd0ExKA1PDk8khn2iRJkqR8MLSdhr4t/cDR+9ng0PLI1rrWQ50j52eWR86ImbaEjUgkSZKkfDK0nYbeLf3Maq5l3rlHB5PJ5ZHNtc2Z/WxwaKZtOE1bU6XPtGVCW11jQ4krkSRJkiqDoe0UxRjp3dxP5/I2QtXRDUUml0e21LVk9rPVNEBrBzFGBoZHmVPhM22pZJL6xiaqqiq3Q6YkSZJUTIa2U3Rg90GSAyk6jrGfDTIHa0M2tE12jqyqIpEaY2wi0lbhe9pGkgnb/UuSJEl5ZGg7Rb2b9wPH3s8GmT1t9dX11FXXZTtHTrb7TwNU/kzbcNLOkZIkSVIeGdpOUe+Wfprb6pm94Nh7tgZHBzP72cZSMLA1Zz/bKMCMaERiExJJkiQpfwxtpyBORPq2DNB5SdtxD8hOpBOZpZH734A4kXNGW2amrdKXR6aGky6PlCRJkvLI0HYK9vYlGEmmj3k+26Sh0aFsE5LJzpGTyyMzM22Vvjwys6fNmTZJkiQpXwxtp6B3c+Z8to7lc497T2I0cagJCUyFtv5kJrTNbars0JZKJpnlTJskSZKUN4a2U9C3pZ857Y00t9Uf956pPW37eqD5HJjVCsD+4TQhwOyGyl0eOTE+TnrkoDNtkiRJUh4Z2qYhxsiLP+9l+6b9nL/i+LNskLOnbe+rU/vZAPYMpZjTUEv1Mc52qxQjycwZdfWNhjZJkiQpX2pKXUC5G0uP88QDr/Dyv+xg8dvmcdVNF5zw/sP2tK24der6prcOcMk5rYUut6RSw0kAl0dKkiRJeWRoO4Gh/SP88z0b2b11iDXvW8JV77+AcIKZstHxUVLjKVqohoP9UzNto2MTvLxjiNuvW1KkyksjlcjOtLk8UpIkScobQ9tx9L3Sz2PffJGx9AQ3fHolF16+4KT/zNDoEADNo8OZC9kz2rbsHGJ0fIJVnXMKVm85GMnOtNnyX5IkScofQ9sRYoy88LPt/PKhHmYvaODWT69k7qLphZBEOjPT1HJwMHNhfqZz5Ia+AQBWdc7Of8FlJJWcXB7pTJskSZKUL4a2HGOj4/Q9Hdn05qssWTWf37h9BXUN0/8RTc60tQzvg+o6mLMYgI29B5jTWEtnW0NB6i4XqclGJM60SZIkSXljaMvx4/s3ceBNuOqmC1hzw5IT7l87lqnQNrgb5l4IVdUAbOg9wMqO2YRQuZ0j4VD3SGfaJEmSpPyx5X+ONTcs4fxfC1x544kbjhzP1J62A71Th2qPpMd5ZddQxS+NhMxMW1V1DTV1xz/HTpIkSdKpMbTlWHB+Cy0dpz8bNrmnrbV/+1TnyJd3DDI2EVnZUdlNSCDT8r++qaniZxQlSZKkYjK05dHUTNvY6FTnyI19B4DKb0ICMJJIuDRSkiRJyjNDWx4NjQ4RCDTFODXT9sL2A8xvrmfR7Fklrq7wJmfaJEmSJOWPoS2PhkaHaK6qzfxQs3vaNvYNsKqz8puQQKblvzNtkiRJUn4Z2vIokU7QQhU0zoPGuSRTY/TsTrCyo/KXRkKme2R9ozNtkiRJUj4Z2vJocHSQ5vHxqVm2TTsGmYgzYz8buDxSkiRJKgRDWx4lRhO0pFNTTUg29GaakMyEmbYYo41IJEmSpAIwtOXRUGqAlvQIzM/uZ+sd4JzWWSxsrfwmJGOjKSbGx6g3tEmSJEl5ZWjLo8TIAC0T8dBMW98BVs6UpZHJJIAzbZIkSVKeGdryaDCdoHliAuYvY2gkzet7klw2Q0LbSDJzsLh72iRJkqT8MrTlyUScIDmeoiVGaLtg6lDtlZ1zSlxZcUzOtLk8UpIkScovQ1ueDKeHmSDSUj8HaurYOIOakMChmbZZtvyXJEmS8srQlieJdCa0tDQtBDL72TrbGpjbVFfKsoomNTw502ZokyRJkvLJ0JYngyOZmbXm1k4ANvYemDHnswGMJCb3tLk8UpIkSconQ1ueDPX3ANAy+3wGhkfZtn+YlR0zYz8bQGo4G9pcHilJkiTllaEtT3q2PgHAeYvWTDUhmUkzbalkktpZDVTX1JS6FEmSJKmiGNrypHvbz5g/AZ1L38eGbBOSt82QJiSQaUTifjZJkiQp/wxt+dD3LM+PJ1jdehGhupqNvQe4YH4TsxtqS11Z0aSSSQ/WliRJkgrA0JYHu9b9T/pqa1i99EYANvQOzJhW/5NSyYT72SRJkqQCmFZoCyFcH0LYEkLoCSHcfYz3fz2E8FwIYSyE8KH8l1nGhnbR/eZPAFjdcS17hlK8dWBkRu1nAxgZTro8UpIkSSqAk4a2EEI18DXgBmAF8NEQwoojbtsG/Dvg/+S7wLL37P1019XQUF3P8rnLebFvZh2qPcnlkZIkSVJhTKfV31VAT4zxdYAQwoPALcCmyRtijG9m35soQI3la2wUnrmP7oXzWbngMmqratnQe4AQ4NIZF9psRCJJkiQVwnRCWwewPed1L3D16XyzEMIdwB0A7e3tdHV1nc6XKahEIjHtuhbu6mLx8B4208BvHmyjq6uLx18YYVFjYP26JwtbaBmJMZIaTrJzz96y/J2Wo1MZZ9Lpcpyp0BxjKgbHmYqh3MdZUQ/VijHeC9wLsGbNmrh27dpifvtp6erqYlp1xQjf/DzrFlxIJM0HrvwA13Vcxx/+8ie8c/l81q69vOC1louRRILnvgHLV7yNt5fh77QcTXucSWfAcaZCc4ypGBxnKoZyH2fTaUTSB5yX87oze21m610Pbz3H84uvIBBYtWAVuwZH2D2UYtVMWxo5nABweaQkSZJUANMJbc8Ay0IIF4QQ6oDbgEcKW9ZZ4OlvQH0r3dWRZW3LaKlr4YXtAwCs7JxT4uKKaySRCW02IpEkSZLy76ShLcY4BnwWeAx4GfhejPGlEMIXQgg3A4QQrgwh9AK/BdwTQnipkEWX3OAO2PQDxi7/GC/se5HVC1cDsLHvANVVgRWLWktcYHGlhpOAM22SJElSIUxrT1uM8VHg0SOufS7n82fILJucGdbfBxPjvHrJexl+4h+nQtuG3gMsW9hMQ111iQssrlQyE9qcaZMkSZLyb1qHaytHegTW3w8XX0/3yG4AVi9cTYyRjX0HZtyh2gAjSfe0SZIkSYViaDtVLz0Mw3vhmk/Tvbub9sZ2FjUtYtv+YfYnR2fcfjbInNEGzrRJkiRJhWBoOxUxwlNfhwWXwAXvont3N6sXruaVXQk+8e1fUVsdeMdF80pdZdGNJJOEUEXtrIZSlyJJkiRVHEPbqdj2FOzcAFd/ih3Jnewa3kXd2EV84G9+SXJ0nAf+wzVctGDmzTalhhPUNzURQih1KZIkSVLFKerh2me14f3w5Jdg1mxY9RHWb38cgAeeqOLyc1r4+sffTnvrrBIXWRqpZNKlkZIkSVKBGNpOZHg/bP4RvPQDeOPnMDEGa/+IgbFavvTE/yPGOj582VV8/uaV1NfMrI6RuUaSCZuQSJIkSQViaDtCTXoQnv0ObPoBvPFEJqjNWQzX/g6suJVNXMSnvvok+2dv5qJ5l/KXH7y81CWXXCqZpN6ZNkmSJKkgDG25/ulurnv6HmAC2pbAtZ+FS2+FRZdDCPzDC2/xnx76F2Y3jlM9ayc3LP1AqSsuCyPJBPPb5pa6DEmSJKkiGdpynbOSbed/kMU33AnnrIKcxhrr39zPXQ92c8X5bdz+Gynu/mXk8oXOsgGkhpPUNzvTJkmSJBWC3SNzrf4Yb1z4b2DRZYcFtsGRNHc9+DwdbQ3cf/uVvDH0EtWhmlULVpWw2PKRSiSob3RPmyRJklQIzrSdRIyRP/6/L7JzcIS///S1tMyqpXt3Nxe3XUxTrUFlbHSUsfSo3SMlSZKkAnGm7SQefq6Pf3jhLX73Pcu44vw20hNpNu7dyBXtV5S6tLKQGk4C2IhEkiRJKhBD2wls3Zfkcz98kauWzOUz/2opAFv2b+Hg2EH3s2WNJBMAtvyXJEmSCsTQdhzp8QnufPB5qqsCX77tcqqrMnvcund3A7B6wepSllc2UsnMTJvLIyVJkqTCcE/bcXzlJ6/wwvYBvvqvV9Mxp2HqevfubjqaO2hvai9hdeUjNTnTZiMSSZIkqSCcaTuGp17fx990vcaH13Ty/lXnTl2PMdK9u9ulkTlGpva0GdokSZKkQjC0HSExGvm97z7PknlN/OlNlx72Xm+il70H97o0MofLIyVJkqTCcnlkjhgjf/tSij1DEzz8mXfQVH/4j+f53c8DsLrd0DZpanmkoU2SJEkqCGfacnxz3XM8u38Xn33vuVx6bstR7z+3+zlaaltYOmdpCaorTyPJBDV19dTU1pa6FEmSJKkiOdOW4x+330/z0p9z33a47++gpbaF1vpWWusyfzb3b2bVwlVUBbPupFQy4X42SZIkqYAMbTn+7N2f4kdPncd5F3UwODqY+ZManPp8QcMCbl16a6nLLCupZNL9bJIkSVIBGdpyrFywkn0t+1i7Ym2pSzlrjCQTtvuXJEmSCsh1fjptE+Pj7N76BrMXemadJEmSVCiGNp22bS9tYGRokGXXXFfqUiRJkqSKZWjTaXvlqSepndXAksuuKHUpkiRJUsUytOm0jI+N8eqv1nHR26+itq6+1OVIkiRJFcvQptOyPbs0cvm1v1bqUiRJkqSKZmjTadmy7knqGlwaKUmSJBWaoU2nbHxsjJ5n1nHR26+mpq6u1OVIkiRJFc3QplO2/cUXGEkMcbFLIyVJkqSCM7TplG156knqGhpZsmp1qUuRJEmSKp6hTadkfGyMnl+tY+kal0ZKkiRJxWBo0ynZ9uILjCQTXHztO0tdiiRJkjQjGNp0Sras+wV1DY0sXmXXSEmSJKkYDG2atvGxND3PrGPplddQU1tb6nIkSZKkGcHQpmnbuvF5UsmkB2pLkiRJRWRo07S9su6X1Dc2sXjV5aUuRZIkSZoxDG2altylkdU1Lo2UJEmSisXQpmnZuuF5UsNJu0ZKkiRJRWZo07RsWfcL6puaWLzSpZGSJElSMRnadFJj6TSvrX+apWuudWmkJEmSVGSGNp3U1g3dpIaTLHdppCRJklR00wptIYTrQwhbQgg9IYS7j/F+fQjhu9n3nw4hLMl3oSqdV9b9gllNzZy/8rJSlyJJkiTNOCcNbSGEauBrwA3ACuCjIYQVR9z2SaA/xrgU+DLwxXwXqtIYS6fpWf80S69yaaQkSZJUCjXTuOcqoCfG+DpACOFB4BZgU849twD/Jfv5Q8BXQwghxhjzWGvBdT/2I17/+eMMPf90qUspG6lkgtGDw1x8jUsjJUmSpFKYTmjrALbnvO4Frj7ePTHGsRDCAWAesDf3phDCHcAdAO3t7XR1dZ1e1QXS2/0cyT27OLhvT6lLKSut5y3hzX0DbC2z39fZLJFIlN34V+VxnKnQHGMqBseZiqHcx9l0QlvexBjvBe4FWLNmTVy7dm0xv/3JrV1LV1cXZVeXKo7jTMXgOFOhOcZUDI4zFUO5j7PpNCLpA87Led2ZvXbMe0IINcBsYF8+CpQkSZKkmWw6oe0ZYFkI4YIQQh1wG/DIEfc8Anwi+/mHgJ+dbfvZJEmSJKkcnXR5ZHaP2meBx4Bq4NsxxpdCCF8A1scYHwHuA/4uhNAD7CcT7CRJkiRJZ2hae9pijI8Cjx5x7XM5n48Av5Xf0iRJkiRJ0zpcW5IkSZJUGoY2SZIkSSpjhjZJkiRJKmOGNkmSJEkqY4Y2SZIkSSpjhjZJkiRJKmOGNkmSJEkqY4Y2SZIkSSpjhjZJkiRJKmOGNkmSJEkqY4Y2SZIkSSpjhjZJkiRJKmOGNkmSJEkqY4Y2SZIkSSpjhjZJkiRJKmOGNkmSJEkqY4Y2SZIkSSpjhjZJkiRJKmOGNkmSJEkqYyHGWJpvHMIeYGtJvvmJzQf2lroIVTzHmYrBcaZCc4ypGBxnKoZSjbPFMcYFJ7upZKGtXIUQ1scY15S6DlU2x5mKwXGmQnOMqRgcZyqGch9nLo+UJEmSpDJmaJMkSZKkMmZoO9q9pS5AM4LjTMXgOFOhOcZUDI4zFUNZjzP3tEmSJElSGXOmTZIkSZLKmKEtRwjh+hDClhBCTwjh7lLXo7NfCOG8EMLjIYRNIYSXQgh3Za/PDSH8OITwavZjW6lr1dkvhFAdQugOIfwo+/qCEMLT2Wfad0MIdaWuUWe3EMKcEMJDIYTNIYSXQwjX+jxTPoUQfi/738sXQwgPhBBm+SzTmQohfDuEsDuE8GLOtWM+u0LG/8iOtw0hhCtKV/khhrasEEI18DXgBmAF8NEQworSVqUKMAb8foxxBXAN8DvZcXU38NMY4zLgp9nX0pm6C3g55/UXgS/HGJcC/cAnS1KVKsl/B/45xngJcBmZ8ebzTHkRQugA7gTWxBjfBlQDt+GzTGfub4Hrj7h2vGfXDcCy7J87gK8XqcYTMrQdchXQE2N8PcY4CjwI3FLimnSWizHuiDE+l/18iMz/4HSQGVvfyd72HeDW0lSoShFC6ARuBL6VfR2AdwMPZW9xnOmMhBBmA78O3AcQYxyNMQ7g80z5VQM0hBBqgEZgBz7LdIZijE8A+4+4fLxn1y3A/4oZTwFzQgiLilPp8RnaDukAtue87s1ek/IihLAEWA08DbTHGHdk39oJtJeoLFWOrwB/CExkX88DBmKMY9nXPtN0pi4A9gD3Z5fhfiuE0ITPM+VJjLEP+G/ANjJh7QDwLD7LVBjHe3aVZSYwtElFEEJoBr4P/G6McTD3vZhp4WobV522EML7gd0xxmdLXYsqWg1wBfD1GONqIMkRSyF9nulMZPcU3ULmLwjOBZo4ekmblHdnw7PL0HZIH3BezuvO7DXpjIQQaskEtv8dY3w4e3nX5FR79uPuUtWninAdcHMI4U0yS7vfTWbv0ZzsEiPwmaYz1wv0xhifzr5+iEyI83mmfHkv8EaMcU+MMQ08TOb55rNMhXC8Z1dZZgJD2yHPAMuyHYrqyGx8faTENeksl91XdB/wcozxSzlvPQJ8Ivv5J4AfFrs2VY4Y43+OMXbGGJeQeXb9LMb4MeBx4EPZ2xxnOiMxxp3A9hDC8uyl9wCb8Hmm/NkGXBNCaMz+93NyjPksUyEc79n1CPBvs10krwEO5CyjLBkP184RQngfmX0h1cC3Y4x/XuKSdJYLIbwT+AWwkUN7jf6IzL627wHnA1uBD8cYj9wgK52yEMJa4A9ijO8PIVxIZuZtLtANfDzGmCplfTq7hRAuJ9Pspg54HbidzF8A+zxTXoQQPg98hEz35W7g35PZT+SzTKcthPAAsBaYD+wC/hT4Acd4dmX/wuCrZJbmDgO3xxjXl6LuXIY2SZIkSSpjLo+UJEmSpDJmaJMkSZKkMmZokyRJkqQyZmiTJEmSpDJmaJMkSZKkMmZokyRJkqQyZmiTJEmSpDJmaJMkSZKkMvb/AVZmCyqo719pAAAAAElFTkSuQmCC\n",
-      "text/plain": [
-       "<Figure size 1080x720 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "plot_by_class_curve(learner.history, \"f1\", learner.sup_labels)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 5. Predict on new data\n",
-    "Create new data loader from existing path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from modules.data.elmo_data import get_elmo_data_loader_for_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dl = get_elmo_data_loader_for_predict(data_path + \"valid_with_pos.csv\", learner)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load our best model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "learner.load_model()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Call predict from learner."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "preds = learner.predict(dl)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 6. Transform predictions to tokens and spans"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['<bos>', 'Сделка', 'состоится', ',', 'если', 'будет', 'одобрена', 'регуляторами', ',', 'из-за', 'которых', 'в', 'начале', 'года', 'сорвалось', 'слияние', 'NYSE', 'Euronext', 'с', 'Deutsche', 'Börse', '<eos>']\n",
-      "['<bos>', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_ORG', 'I_ORG', 'O', 'B_ORG', 'I_ORG', '<eos>']\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.utils import tokens2spans\n",
-    "\n",
-    "\n",
-    "tp, lp = [x.tokens for x in dl.dataset], preds\n",
-    "print(tp[0])\n",
-    "print(lp[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sp = tokens2spans(tp, lp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[('<bos>', '<bos>'), ('Сделка', 'O'), ('состоится', 'O'), (',', 'O'), ('если', 'O'), ('будет', 'O'), ('одобрена', 'O'), ('регуляторами', 'O'), (',', 'O'), ('из-за', 'O'), ('которых', 'O'), ('в', 'O'), ('начале', 'O'), ('года', 'O'), ('сорвалось', 'O'), ('слияние', 'O'), ('NYSE Euronext', 'ORG'), ('с', 'O'), ('Deutsche Börse', 'ORG')]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(sp[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 7. Evaluate"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "IOB precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HBox(children=(IntProgress(value=0, max=26), HTML(value='')))"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "       B_ORG      0.821     0.862     0.841       260\n",
-      "       I_ORG      0.917     0.784     0.846       283\n",
-      "       B_LOC      0.877     0.877     0.877       195\n",
-      "       B_PER      0.963     0.963     0.963       191\n",
-      "       I_PER      0.977     0.962     0.969       130\n",
-      "       I_LOC      0.639     0.657     0.648        35\n",
-      "\n",
-      "   micro avg      0.891     0.867     0.879      1094\n",
-      "   macro avg      0.866     0.851     0.857      1094\n",
-      "weighted avg      0.893     0.867     0.879      1094\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.train.train import validate_step\n",
-    "print(validate_step(learner.data.valid_dl, learner.model, learner.data.id2label, learner.sup_labels))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Span precision"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "         PER      0.880     0.885     0.883       191\n",
-      "         LOC      0.773     0.785     0.779       195\n",
-      "         ORG      0.760     0.815     0.787       260\n",
-      "\n",
-      "   micro avg      0.798     0.827     0.812       646\n",
-      "   macro avg      0.804     0.828     0.816       646\n",
-      "weighted avg      0.799     0.827     0.813       646\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "from modules.utils.plot_metrics import get_elmo_span_report\n",
-    "clf_report = get_elmo_span_report(dl, preds)\n",
-    "print(clf_report)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/exps/conll2003 BERTBiLSTMAttnCRF base BERT.ipynb b/exps/conll2003 BERTBiLSTMAttnCRF base BERT.ipynb
new file mode 100644
index 0000000..2a27085
--- /dev/null
+++ b/exps/conll2003 BERTBiLSTMAttnCRF base BERT.ipynb	
@@ -0,0 +1,580 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.conll2003.prc import conll2003_preprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"/home/eartemov/ae/work/conll2003/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f14d2a1ce44947ce98b9f430cf82caf1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.train', max=2195…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fa6fddbba84a4de78a48e7503af8d616",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testa', max=5504…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e3c3e6e2ebdb4e51ba4051a1499e53cf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testb', max=5035…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "conll2003_preprocess(data_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMAttnCRF-base-IO.cpt\",\n",
+    "    t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2629775"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=False,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c to temp dir /tmp/tmpk62t5dpm\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 28996\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMAttnCRF-base-IO.cpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2629775"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[4:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.9441    0.9357    0.9399      2005\n",
+      "         I_O     0.9968    0.9964    0.9966     41651\n",
+      "      I_MISC     0.9147    0.8972    0.9059      1255\n",
+      "       I_PER     0.9769    0.9807    0.9788      2848\n",
+      "       I_LOC     0.9670    0.9615    0.9643      1922\n",
+      "\n",
+      "   micro avg     0.9904    0.9892    0.9898     49681\n",
+      "   macro avg     0.9599    0.9543    0.9571     49681\n",
+      "weighted avg     0.9903    0.9892    0.9897     49681\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.9136    0.9020    0.9078      1664\n",
+      "        MISC     0.8788    0.8594    0.8690       903\n",
+      "           O     0.9895    0.9911    0.9903     41616\n",
+      "         ORG     0.8508    0.8302    0.8404      1278\n",
+      "         PER     0.9489    0.9506    0.9498      1681\n",
+      "\n",
+      "    accuracy                         0.9797     47142\n",
+      "   macro avg     0.9163    0.9067    0.9114     47142\n",
+      "weighted avg     0.9795    0.9797    0.9796     47142\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/conll2003 BERTBiLSTMAttnNCRF base BERT.ipynb b/exps/conll2003 BERTBiLSTMAttnNCRF base BERT.ipynb
new file mode 100644
index 0000000..ba6ad5a
--- /dev/null
+++ b/exps/conll2003 BERTBiLSTMAttnNCRF base BERT.ipynb	
@@ -0,0 +1,602 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.conll2003.prc import conll2003_preprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"/home/eartemov/ae/work/conll2003/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f14d2a1ce44947ce98b9f430cf82caf1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.train', max=2195…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fa6fddbba84a4de78a48e7503af8d616",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testa', max=5504…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e3c3e6e2ebdb4e51ba4051a1499e53cf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testb', max=5035…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "conll2003_preprocess(data_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels2.txt\",\n",
+    "    clear_cache=True,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3, nbest=len(data.train_ds.idx2label)-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMAttnNCRF-base-IO.cpt\",\n",
+    "    t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2630289"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels2.txt\",\n",
+    "    clear_cache=False,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c to temp dir /tmp/tmplvm3oqv1\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 28996\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3, nbest=len(data.train_ds.idx2label)-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMAttnNCRF-base-IO.cpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2630289"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[4:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.9552    0.9461    0.9506      2005\n",
+      "         I_O     0.9966    0.9973    0.9970     41651\n",
+      "      I_MISC     0.9343    0.8956    0.9146      1255\n",
+      "       I_PER     0.9846    0.9863    0.9854      2848\n",
+      "       I_LOC     0.9727    0.9636    0.9681      1922\n",
+      "\n",
+      "   micro avg     0.9918    0.9907    0.9913     49681\n",
+      "   macro avg     0.9687    0.9578    0.9631     49681\n",
+      "weighted avg     0.9917    0.9907    0.9912     49681\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.9227    0.9117    0.9172      1664\n",
+      "        MISC     0.9018    0.8639    0.8825       904\n",
+      "           O     0.9904    0.9920    0.9912     41631\n",
+      "         ORG     0.8654    0.8559    0.8606      1277\n",
+      "         PER     0.9460    0.9477    0.9468      1682\n",
+      "\n",
+      "    accuracy                         0.9815     47158\n",
+      "   macro avg     0.9253    0.9142    0.9197     47158\n",
+      "weighted avg     0.9813    0.9815    0.9814     47158\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/conll2003 BERTBiLSTMCRF base BERT.ipynb b/exps/conll2003 BERTBiLSTMCRF base BERT.ipynb
new file mode 100644
index 0000000..2b7f385
--- /dev/null
+++ b/exps/conll2003 BERTBiLSTMCRF base BERT.ipynb	
@@ -0,0 +1,1027 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.conll2003.prc import conll2003_preprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"/home/eartemov/ae/work/conll2003/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f14d2a1ce44947ce98b9f430cf82caf1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.train', max=2195…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fa6fddbba84a4de78a48e7503af8d616",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testa', max=5504…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e3c3e6e2ebdb4e51ba4051a1499e53cf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testb', max=5035…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "conll2003_preprocess(data_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-base-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235023"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    model_name=\"bert-base-cased\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-base-IO.cpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235023"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[4:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.9504    0.9451    0.9477      2005\n",
+      "         I_O     0.9970    0.9969    0.9970     41651\n",
+      "      I_MISC     0.9372    0.9036    0.9201      1255\n",
+      "       I_PER     0.9852    0.9849    0.9851      2848\n",
+      "       I_LOC     0.9668    0.9688    0.9678      1922\n",
+      "\n",
+      "   micro avg     0.9918    0.9907    0.9912     49681\n",
+      "   macro avg     0.9673    0.9599    0.9635     49681\n",
+      "weighted avg     0.9917    0.9907    0.9912     49681\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## BIO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    model_name=\"bert-base-cased\", markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-base-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2236155"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=False,\n",
+    "    model_name=\"bert-base-cased\", markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/a803ce83ca27fecf74c355673c434e51c265fb8a3e0e57ac62a80e38ba98d384.681017f415dfb33ec8d0e04fe51a619f3f01532ecea04edbfd48c5d160550d9c to temp dir /tmp/tmpb9shk2lo\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 28996\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label), model_name=\"bert-base-cased\",\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-base-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2236155"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels,\n",
+    "                                           labels=[\"I_ORG\", \"I_PER\", \"I_LOC\", \"B_ORG\", \"B_PER\", \"B_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.9505    0.9243    0.9372       727\n",
+      "       I_PER     0.9914    0.9854    0.9884      1166\n",
+      "       I_LOC     0.9444    0.9261    0.9352       257\n",
+      "       B_ORG     0.9273    0.9280    0.9276      1278\n",
+      "       B_PER     0.9687    0.9744    0.9715      1682\n",
+      "       B_LOC     0.9621    0.9616    0.9619      1665\n",
+      "\n",
+      "   micro avg     0.9603    0.9572    0.9588      6775\n",
+      "   macro avg     0.9574    0.9500    0.9536      6775\n",
+      "weighted avg     0.9603    0.9572    0.9587      6775\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_f1_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9536399507613248"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_f1_score(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\", \"B_ORG\", \"B_PER\", \"B_LOC\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.9135    0.9075    0.9105      1664\n",
+      "        MISC     0.8922    0.8695    0.8807       904\n",
+      "           O     0.9907    0.9917    0.9912     41632\n",
+      "         ORG     0.8498    0.8412    0.8455      1278\n",
+      "         PER     0.9495    0.9507    0.9501      1682\n",
+      "\n",
+      "    accuracy                         0.9809     47160\n",
+      "   macro avg     0.9191    0.9121    0.9156     47160\n",
+      "weighted avg     0.9808    0.9809    0.9808     47160\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/conll2003 BERTBiLSTMCRF.ipynb b/exps/conll2003 BERTBiLSTMCRF.ipynb
new file mode 100644
index 0000000..14976dc
--- /dev/null
+++ b/exps/conll2003 BERTBiLSTMCRF.ipynb	
@@ -0,0 +1,1063 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.conll2003.prc import conll2003_preprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir = \"/home/eartemov/ae/work/conll2003/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f14d2a1ce44947ce98b9f430cf82caf1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.train', max=2195…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fa6fddbba84a4de78a48e7503af8d616",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testa', max=5504…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e3c3e6e2ebdb4e51ba4051a1499e53cf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process /home/eartemov/ae/work/conll2003/eng.testb', max=5035…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "conll2003_preprocess(data_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label),\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235023"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpcxh2myyl\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label),\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-IO.cpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235023"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[4:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.9502    0.9474    0.9488      2016\n",
+      "         I_O     0.9971    0.9970    0.9971     41702\n",
+      "      I_MISC     0.9249    0.9109    0.9178      1257\n",
+      "       I_PER     0.9836    0.9853    0.9844      2856\n",
+      "       I_LOC     0.9715    0.9564    0.9639      1926\n",
+      "\n",
+      "   micro avg     0.9917    0.9906    0.9911     49757\n",
+      "   macro avg     0.9655    0.9594    0.9624     49757\n",
+      "weighted avg     0.9916    0.9906    0.9911     49757\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.9365    0.9197    0.9280      1668\n",
+      "        MISC     0.8979    0.8840    0.8909       905\n",
+      "           O     0.9919    0.9931    0.9925     41683\n",
+      "         ORG     0.8701    0.8619    0.8660      1282\n",
+      "         PER     0.9585    0.9596    0.9591      1684\n",
+      "\n",
+      "    accuracy                         0.9837     47222\n",
+      "   macro avg     0.9310    0.9237    0.9273     47222\n",
+      "weighted avg     0.9836    0.9837    0.9836     47222\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## BIO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=6973, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels1.txt\",\n",
+    "    clear_cache=True,\n",
+    "    markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label),\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2236155"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/conll2003/eng.train.train.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/conll2003/eng.testa.dev.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/conll2003/idx2labels1.txt\",\n",
+    "    clear_cache=False,\n",
+    "    markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmplmqxuayi\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(\n",
+    "    len(data.train_ds.idx2label),\n",
+    "    lstm_dropout=0., crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/conll2003-BERTBiLSTMCRF-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2236155"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=109, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(\n",
+    "    true_labels, pred_labels,\n",
+    "    labels=data.train_ds.idx2label[4:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       B_ORG     0.9363    0.9509    0.9435      1282\n",
+      "         B_O     0.9969    0.9982    0.9976     41702\n",
+      "      B_MISC     0.9319    0.9072    0.9194       905\n",
+      "       B_PER     0.9792    0.9792    0.9792      1686\n",
+      "       I_PER     0.9922    0.9812    0.9867      1170\n",
+      "       B_LOC     0.9699    0.9658    0.9679      1669\n",
+      "       I_ORG     0.9574    0.9482    0.9528       734\n",
+      "      I_MISC     0.8980    0.8750    0.8863       352\n",
+      "       I_LOC     0.9751    0.9144    0.9438       257\n",
+      "\n",
+      "    accuracy                         0.9912     49757\n",
+      "   macro avg     0.9597    0.9467    0.9530     49757\n",
+      "weighted avg     0.9912    0.9912    0.9912     49757\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.9270    0.9131    0.9200      1668\n",
+      "        MISC     0.8965    0.8707    0.8834       905\n",
+      "           O     0.9907    0.9921    0.9914     41681\n",
+      "         ORG     0.8769    0.8783    0.8776      1282\n",
+      "         PER     0.9487    0.9431    0.9459      1686\n",
+      "\n",
+      "    accuracy                         0.9821     47222\n",
+      "   macro avg     0.9279    0.9194    0.9236     47222\n",
+      "weighted avg     0.9820    0.9821    0.9821     47222\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTAttnCRF.ipynb b/exps/fre BERTAttnCRF.ipynb
new file mode 100644
index 0000000..41cc0a5
--- /dev/null
+++ b/exps/fre BERTAttnCRF.ipynb	
@@ -0,0 +1,557 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTAttnCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "890617"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 47.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 48, average train epoch loss=0.076331\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 42 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.988     0.981     48875\n",
+      "       I_LOC      0.883     0.787     0.832      1557\n",
+      "       I_PER      0.952     0.953     0.953      2112\n",
+      "       I_ORG      0.844     0.731     0.783      3865\n",
+      "\n",
+      "   micro avg      0.964     0.963     0.964     56409\n",
+      "   macro avg      0.913     0.865     0.887     56409\n",
+      "weighted avg      0.962     0.963     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "32379758c4f84234ab99e3d4af56981d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTAttnCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "890617"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8357    0.7581    0.7950      3865\n",
+      "       I_PER     0.9342    0.9673    0.9505      2112\n",
+      "       I_LOC     0.8879    0.8035    0.8436      1557\n",
+      "\n",
+      "   micro avg     0.8764    0.8261    0.8505      7534\n",
+      "   macro avg     0.8859    0.8430    0.8630      7534\n",
+      "weighted avg     0.8741    0.8261    0.8486      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7893    0.7108    0.7480      1307\n",
+      "           O     0.9734    0.9823    0.9779     48691\n",
+      "         ORG     0.6611    0.5510    0.6010      1873\n",
+      "         PER     0.8256    0.8237    0.8247      1299\n",
+      "\n",
+      "    accuracy                         0.9566     53170\n",
+      "   macro avg     0.8124    0.7670    0.7879     53170\n",
+      "weighted avg     0.9543    0.9566    0.9552     53170\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMAttnCRF-fit_BERT.ipynb b/exps/fre BERTBiLSTMAttnCRF-fit_BERT.ipynb
new file mode 100644
index 0000000..cf78521
--- /dev/null
+++ b/exps/fre BERTBiLSTMAttnCRF-fit_BERT.ipynb	
@@ -0,0 +1,478 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    batch_size=8\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, is_freeze=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnCRF-fit_BERT-IO.cpt\",\n",
+    "    t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "180482937"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, is_freeze=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnCRF-fit_BERT-IO.cpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "180482937"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.7280    0.7977    0.7612      3865\n",
+      "       I_PER     0.9743    0.8447    0.9049      2112\n",
+      "       I_LOC     0.8222    0.8407    0.8314      1557\n",
+      "\n",
+      "   micro avg     0.8065    0.8198    0.8131      7534\n",
+      "   macro avg     0.8415    0.8277    0.8325      7534\n",
+      "weighted avg     0.8165    0.8198    0.8160      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7537    0.7485    0.7511      1304\n",
+      "           O     0.9702    0.9809    0.9755     48370\n",
+      "         ORG     0.5860    0.4961    0.5374      1812\n",
+      "         PER     0.8207    0.6628    0.7333      1278\n",
+      "\n",
+      "    accuracy                         0.9508     52764\n",
+      "   macro avg     0.7827    0.7221    0.7493     52764\n",
+      "weighted avg     0.9480    0.9508    0.9490     52764\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMAttnCRF.ipynb b/exps/fre BERTBiLSTMAttnCRF.ipynb
new file mode 100644
index 0000000..db3464b
--- /dev/null
+++ b/exps/fre BERTBiLSTMAttnCRF.ipynb	
@@ -0,0 +1,582 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2629497"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 58.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 59, average train epoch loss=0.00029528\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 45 by max_f1: 0.91\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.987     0.984     48875\n",
+      "       I_LOC      0.890     0.834     0.861      1557\n",
+      "       I_PER      0.960     0.968     0.964      2112\n",
+      "       I_ORG      0.863     0.774     0.816      3865\n",
+      "\n",
+      "   micro avg      0.971     0.968     0.969     56409\n",
+      "   macro avg      0.923     0.891     0.906     56409\n",
+      "weighted avg      0.970     0.968     0.968     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a985eb6882e7421094e5bfe76100f89b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpa98t5m09\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2629497"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8850    0.7622    0.8190      3865\n",
+      "       I_PER     0.9598    0.9725    0.9661      2112\n",
+      "       I_LOC     0.9318    0.8163    0.8702      1557\n",
+      "\n",
+      "   micro avg     0.9178    0.8324    0.8730      7534\n",
+      "   macro avg     0.9255    0.8504    0.8851      7534\n",
+      "weighted avg     0.9156    0.8324    0.8708      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.8368    0.7418    0.7864      1313\n",
+      "           O     0.9764    0.9877    0.9820     48753\n",
+      "         ORG     0.7478    0.5954    0.6629      1893\n",
+      "         PER     0.8763    0.8568    0.8664      1306\n",
+      "\n",
+      "    accuracy                         0.9644     53265\n",
+      "   macro avg     0.8593    0.7954    0.8244     53265\n",
+      "weighted avg     0.9623    0.9644    0.9630     53265\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMAttnNCRF-fit_BERT.ipynb b/exps/fre BERTBiLSTMAttnNCRF-fit_BERT.ipynb
new file mode 100644
index 0000000..ddd73fa
--- /dev/null
+++ b/exps/fre BERTBiLSTMAttnNCRF-fit_BERT.ipynb	
@@ -0,0 +1,1778 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True,\n",
+    "    batch_size=8, max_sequence_length=400\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(data.train_ds.label2idx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmp8b6y244f\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=8, is_freeze=False, hidden_dim=256)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnNCRF-fit_BERT-IO.cpt\",\n",
+    "    t_total=num_epochs * len(data.train_dl), lr=0.0001)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "179004667"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 0.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 1, average train epoch loss=40.742\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 0 by max_f1: 0.747\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.941     0.992     0.966     48875\n",
+      "       I_LOC      0.860     0.403     0.549      1557\n",
+      "       I_PER      0.914     0.891     0.902      2112\n",
+      "       I_ORG      0.815     0.438     0.570      3865\n",
+      "\n",
+      "   micro avg      0.934     0.934     0.934     56409\n",
+      "   macro avg      0.882     0.681     0.747     56409\n",
+      "weighted avg      0.929     0.934     0.925     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 2, average train epoch loss=3.4443\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 1 by max_f1: 0.816\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.956     0.991     0.973     48875\n",
+      "       I_LOC      0.757     0.821     0.788      1557\n",
+      "       I_PER      0.968     0.858     0.910      2112\n",
+      "       I_ORG      0.908     0.439     0.592      3865\n",
+      "\n",
+      "   micro avg      0.949     0.943     0.946     56409\n",
+      "   macro avg      0.897     0.777     0.816     56409\n",
+      "weighted avg      0.947     0.943     0.939     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 3, average train epoch loss=2.0083\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 2 by max_f1: 0.891\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.978     0.986     0.982     48875\n",
+      "       I_LOC      0.916     0.791     0.849      1557\n",
+      "       I_PER      0.957     0.920     0.938      2112\n",
+      "       I_ORG      0.823     0.770     0.796      3865\n",
+      "\n",
+      "   micro avg      0.966     0.964     0.965     56409\n",
+      "   macro avg      0.918     0.867     0.891     56409\n",
+      "weighted avg      0.965     0.964     0.964     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 4, average train epoch loss=1.1347\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.893\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.982     0.981     48875\n",
+      "       I_LOC      0.932     0.789     0.854      1557\n",
+      "       I_PER      0.958     0.924     0.941      2112\n",
+      "       I_ORG      0.788     0.803     0.795      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.915     0.874     0.893     56409\n",
+      "weighted avg      0.965     0.962     0.964     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 5, average train epoch loss=0.73484\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.893\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.983     0.982     0.983     48875\n",
+      "       I_LOC      0.857     0.845     0.851      1557\n",
+      "       I_PER      0.900     0.955     0.927      2112\n",
+      "       I_ORG      0.804     0.805     0.805      3865\n",
+      "\n",
+      "   micro avg      0.964     0.965     0.965     56409\n",
+      "   macro avg      0.886     0.897     0.891     56409\n",
+      "weighted avg      0.965     0.965     0.965     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 6, average train epoch loss=0.40636\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.893\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.983     0.983     0.983     48875\n",
+      "       I_LOC      0.825     0.861     0.843      1557\n",
+      "       I_PER      0.943     0.926     0.935      2112\n",
+      "       I_ORG      0.829     0.782     0.805      3865\n",
+      "\n",
+      "   micro avg      0.967     0.964     0.965     56409\n",
+      "   macro avg      0.895     0.888     0.891     56409\n",
+      "weighted avg      0.967     0.964     0.965     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 7, average train epoch loss=0.2691\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.893\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.984     0.984     0.984     48875\n",
+      "       I_LOC      0.825     0.863     0.843      1557\n",
+      "       I_PER      0.926     0.930     0.928      2112\n",
+      "       I_ORG      0.842     0.785     0.812      3865\n",
+      "\n",
+      "   micro avg      0.968     0.965     0.967     56409\n",
+      "   macro avg      0.894     0.890     0.892     56409\n",
+      "weighted avg      0.968     0.965     0.966     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 8, average train epoch loss=0.19706\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.893\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.987     0.982     48875\n",
+      "       I_LOC      0.855     0.845     0.850      1557\n",
+      "       I_PER      0.944     0.918     0.931      2112\n",
+      "       I_ORG      0.878     0.720     0.791      3865\n",
+      "\n",
+      "   micro avg      0.967     0.962     0.965     56409\n",
+      "   macro avg      0.914     0.868     0.889     56409\n",
+      "weighted avg      0.966     0.962     0.964     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 9, average train epoch loss=0.087387\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 8 by max_f1: 0.897\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.986     0.984     48875\n",
+      "       I_LOC      0.853     0.864     0.858      1557\n",
+      "       I_PER      0.927     0.950     0.938      2112\n",
+      "       I_ORG      0.867     0.757     0.808      3865\n",
+      "\n",
+      "   micro avg      0.969     0.966     0.967     56409\n",
+      "   macro avg      0.907     0.889     0.897     56409\n",
+      "weighted avg      0.968     0.966     0.966     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 10, average train epoch loss=0.048758\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 8 by max_f1: 0.897\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.980     0.986     0.983     48875\n",
+      "       I_LOC      0.872     0.843     0.857      1557\n",
+      "       I_PER      0.906     0.964     0.934      2112\n",
+      "       I_ORG      0.846     0.752     0.796      3865\n",
+      "\n",
+      "   micro avg      0.966     0.966     0.966     56409\n",
+      "   macro avg      0.901     0.886     0.893     56409\n",
+      "weighted avg      0.965     0.966     0.965     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 11, average train epoch loss=0.047667\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 8 by max_f1: 0.897\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.979     0.988     0.984     48875\n",
+      "       I_LOC      0.885     0.845     0.865      1557\n",
+      "       I_PER      0.915     0.953     0.933      2112\n",
+      "       I_ORG      0.891     0.734     0.804      3865\n",
+      "\n",
+      "   micro avg      0.969     0.966     0.967     56409\n",
+      "   macro avg      0.917     0.880     0.897     56409\n",
+      "weighted avg      0.968     0.966     0.966     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 12, average train epoch loss=0.028147\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 11 by max_f1: 0.899\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.986     0.984     48875\n",
+      "       I_LOC      0.870     0.844     0.857      1557\n",
+      "       I_PER      0.943     0.931     0.937      2112\n",
+      "       I_ORG      0.857     0.782     0.818      3865\n",
+      "\n",
+      "   micro avg      0.970     0.966     0.968     56409\n",
+      "   macro avg      0.913     0.886     0.899     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 13, average train epoch loss=0.033275\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.987     0.984     48875\n",
+      "       I_LOC      0.885     0.841     0.862      1557\n",
+      "       I_PER      0.940     0.934     0.937      2112\n",
+      "       I_ORG      0.874     0.773     0.820      3865\n",
+      "\n",
+      "   micro avg      0.970     0.966     0.968     56409\n",
+      "   macro avg      0.920     0.884     0.901     56409\n",
+      "weighted avg      0.970     0.966     0.968     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 14, average train epoch loss=0.0052319\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.987     0.984     48875\n",
+      "       I_LOC      0.865     0.846     0.855      1557\n",
+      "       I_PER      0.945     0.929     0.937      2112\n",
+      "       I_ORG      0.873     0.768     0.817      3865\n",
+      "\n",
+      "   micro avg      0.970     0.966     0.968     56409\n",
+      "   macro avg      0.916     0.882     0.898     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 15, average train epoch loss=0.015508\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.985     0.983     0.984     48875\n",
+      "       I_LOC      0.865     0.841     0.853      1557\n",
+      "       I_PER      0.949     0.928     0.938      2112\n",
+      "       I_ORG      0.823     0.816     0.819      3865\n",
+      "\n",
+      "   micro avg      0.969     0.965     0.967     56409\n",
+      "   macro avg      0.905     0.892     0.899     56409\n",
+      "weighted avg      0.969     0.965     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 16, average train epoch loss=0.0090177\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.985     0.984     48875\n",
+      "       I_LOC      0.874     0.838     0.855      1557\n",
+      "       I_PER      0.945     0.932     0.939      2112\n",
+      "       I_ORG      0.844     0.789     0.816      3865\n",
+      "\n",
+      "   micro avg      0.969     0.966     0.967     56409\n",
+      "   macro avg      0.911     0.886     0.898     56409\n",
+      "weighted avg      0.968     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 17, average train epoch loss=0.0071027\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.986     0.984     48875\n",
+      "       I_LOC      0.864     0.845     0.854      1557\n",
+      "       I_PER      0.948     0.927     0.938      2112\n",
+      "       I_ORG      0.862     0.779     0.819      3865\n",
+      "\n",
+      "   micro avg      0.970     0.966     0.968     56409\n",
+      "   macro avg      0.914     0.884     0.899     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 18, average train epoch loss=0.0042519\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.986     0.984     48875\n",
+      "       I_LOC      0.869     0.845     0.857      1557\n",
+      "       I_PER      0.947     0.931     0.939      2112\n",
+      "       I_ORG      0.854     0.784     0.818      3865\n",
+      "\n",
+      "   micro avg      0.970     0.966     0.968     56409\n",
+      "   macro avg      0.913     0.886     0.899     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 19, average train epoch loss=0.00098504\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.985     0.984     48875\n",
+      "       I_LOC      0.868     0.842     0.855      1557\n",
+      "       I_PER      0.947     0.931     0.939      2112\n",
+      "       I_ORG      0.851     0.786     0.817      3865\n",
+      "\n",
+      "   micro avg      0.969     0.966     0.967     56409\n",
+      "   macro avg      0.912     0.886     0.899     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=190), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 20, average train epoch loss=0.0013706\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=340), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.901\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.982     0.985     0.984     48875\n",
+      "       I_LOC      0.869     0.842     0.855      1557\n",
+      "       I_PER      0.946     0.931     0.938      2112\n",
+      "       I_ORG      0.852     0.786     0.817      3865\n",
+      "\n",
+      "   micro avg      0.969     0.966     0.968     56409\n",
+      "   macro avg      0.912     0.886     0.899     56409\n",
+      "weighted avg      0.969     0.966     0.967     56409\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=8, is_freeze=False, hidden_dim=256)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnNCRF-fit_BERT-IO.cpt\",\n",
+    "    t_total=num_epochs * len(data.train_dl), lr=0.0001)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "179004667"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8776    0.7721    0.8215      3865\n",
+      "       I_PER     0.9409    0.9342    0.9375      2112\n",
+      "       I_LOC     0.8857    0.8407    0.8626      1557\n",
+      "\n",
+      "   micro avg     0.8984    0.8317    0.8637      7534\n",
+      "   macro avg     0.9014    0.8490    0.8739      7534\n",
+      "weighted avg     0.8970    0.8317    0.8625      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.8135    0.7586    0.7851      1305\n",
+      "           O     0.9771    0.9871    0.9821     48734\n",
+      "         ORG     0.7667    0.6220    0.6868      1886\n",
+      "         PER     0.8442    0.8093    0.8264      1306\n",
+      "\n",
+      "    accuracy                         0.9642     53231\n",
+      "   macro avg     0.8504    0.7943    0.8201     53231\n",
+      "weighted avg     0.9624    0.9642    0.9630     53231\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMAttnNCRF.ipynb b/exps/fre BERTBiLSTMAttnNCRF.ipynb
new file mode 100644
index 0000000..a7d0262
--- /dev/null
+++ b/exps/fre BERTBiLSTMAttnNCRF.ipynb	
@@ -0,0 +1,8071 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(data.train_ds.label2idx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnNCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2630011"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 0.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 1, average train epoch loss=73.343\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 0 by max_f1: 0.157\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.483     0.873     0.622     48875\n",
+      "       I_LOC      0.000     0.000     0.000      1557\n",
+      "       I_PER      0.026     0.004     0.007      2112\n",
+      "       I_ORG      0.000     0.000     0.000      3865\n",
+      "\n",
+      "   micro avg      0.480     0.757     0.587     56409\n",
+      "   macro avg      0.127     0.219     0.157     56409\n",
+      "weighted avg      0.419     0.757     0.539     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 2, average train epoch loss=33.635\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 1 by max_f1: 0.237\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.858     0.968     0.910     48875\n",
+      "       I_LOC      0.000     0.000     0.000      1557\n",
+      "       I_PER      0.929     0.018     0.036      2112\n",
+      "       I_ORG      0.000     0.000     0.000      3865\n",
+      "\n",
+      "   micro avg      0.858     0.840     0.849     56409\n",
+      "   macro avg      0.447     0.247     0.237     56409\n",
+      "weighted avg      0.778     0.840     0.790     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 3, average train epoch loss=10.198\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 2 by max_f1: 0.73\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.935     0.987     0.960     48875\n",
+      "       I_LOC      0.826     0.553     0.663      1557\n",
+      "       I_PER      0.758     0.872     0.811      2112\n",
+      "       I_ORG      0.771     0.354     0.486      3865\n",
+      "\n",
+      "   micro avg      0.920     0.927     0.924     56409\n",
+      "   macro avg      0.823     0.692     0.730     56409\n",
+      "weighted avg      0.914     0.927     0.914     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 4, average train epoch loss=4.3081\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 3 by max_f1: 0.82\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.958     0.985     0.972     48875\n",
+      "       I_LOC      0.780     0.766     0.773      1557\n",
+      "       I_PER      0.869     0.908     0.888      2112\n",
+      "       I_ORG      0.827     0.533     0.648      3865\n",
+      "\n",
+      "   micro avg      0.944     0.945     0.945     56409\n",
+      "   macro avg      0.858     0.798     0.820     56409\n",
+      "weighted avg      0.941     0.945     0.941     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 5, average train epoch loss=2.7504\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 4 by max_f1: 0.846\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.974     0.974     48875\n",
+      "       I_LOC      0.745     0.824     0.782      1557\n",
+      "       I_PER      0.947     0.881     0.913      2112\n",
+      "       I_ORG      0.773     0.663     0.714      3865\n",
+      "\n",
+      "   micro avg      0.953     0.945     0.949     56409\n",
+      "   macro avg      0.859     0.836     0.846     56409\n",
+      "weighted avg      0.952     0.945     0.948     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 6, average train epoch loss=1.9541\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 5 by max_f1: 0.862\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.980     0.972     0.976     48875\n",
+      "       I_LOC      0.799     0.797     0.798      1557\n",
+      "       I_PER      0.944     0.926     0.935      2112\n",
+      "       I_ORG      0.713     0.766     0.739      3865\n",
+      "\n",
+      "   micro avg      0.954     0.952     0.953     56409\n",
+      "   macro avg      0.859     0.865     0.862     56409\n",
+      "weighted avg      0.955     0.952     0.953     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 7, average train epoch loss=1.4117\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 5 by max_f1: 0.862\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.964     0.988     0.976     48875\n",
+      "       I_LOC      0.893     0.726     0.801      1557\n",
+      "       I_PER      0.923     0.946     0.935      2112\n",
+      "       I_ORG      0.825     0.602     0.696      3865\n",
+      "\n",
+      "   micro avg      0.954     0.953     0.953     56409\n",
+      "   macro avg      0.901     0.816     0.852     56409\n",
+      "weighted avg      0.951     0.953     0.950     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 8, average train epoch loss=1.0423\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 7 by max_f1: 0.864\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.981     0.977     48875\n",
+      "       I_LOC      0.861     0.766     0.810      1557\n",
+      "       I_PER      0.910     0.959     0.934      2112\n",
+      "       I_ORG      0.789     0.684     0.733      3865\n",
+      "\n",
+      "   micro avg      0.957     0.954     0.955     56409\n",
+      "   macro avg      0.883     0.847     0.864     56409\n",
+      "weighted avg      0.955     0.954     0.954     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 9, average train epoch loss=0.89463\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 8 by max_f1: 0.867\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.986     0.979     48875\n",
+      "       I_LOC      0.832     0.818     0.825      1557\n",
+      "       I_PER      0.916     0.944     0.930      2112\n",
+      "       I_ORG      0.850     0.645     0.734      3865\n",
+      "\n",
+      "   micro avg      0.959     0.957     0.958     56409\n",
+      "   macro avg      0.892     0.848     0.867     56409\n",
+      "weighted avg      0.957     0.957     0.956     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 10, average train epoch loss=0.77347\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 8 by max_f1: 0.867\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.979     0.973     0.976     48875\n",
+      "       I_LOC      0.870     0.665     0.754      1557\n",
+      "       I_PER      0.966     0.856     0.908      2112\n",
+      "       I_ORG      0.680     0.796     0.733      3865\n",
+      "\n",
+      "   micro avg      0.952     0.948     0.950     56409\n",
+      "   macro avg      0.874     0.822     0.843     56409\n",
+      "weighted avg      0.955     0.948     0.951     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 11, average train epoch loss=0.50332\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 10 by max_f1: 0.872\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.979     0.978     0.978     48875\n",
+      "       I_LOC      0.849     0.787     0.817      1557\n",
+      "       I_PER      0.950     0.914     0.931      2112\n",
+      "       I_ORG      0.766     0.760     0.763      3865\n",
+      "\n",
+      "   micro avg      0.960     0.955     0.957     56409\n",
+      "   macro avg      0.886     0.860     0.872     56409\n",
+      "weighted avg      0.960     0.955     0.957     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 12, average train epoch loss=0.33252\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 10 by max_f1: 0.872\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.979     0.978     48875\n",
+      "       I_LOC      0.866     0.802     0.833      1557\n",
+      "       I_PER      0.883     0.972     0.925      2112\n",
+      "       I_ORG      0.753     0.750     0.752      3865\n",
+      "\n",
+      "   micro avg      0.955     0.958     0.956     56409\n",
+      "   macro avg      0.870     0.876     0.872     56409\n",
+      "weighted avg      0.955     0.958     0.956     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 13, average train epoch loss=0.25459\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.873\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.981     0.979     48875\n",
+      "       I_LOC      0.868     0.759     0.810      1557\n",
+      "       I_PER      0.950     0.928     0.939      2112\n",
+      "       I_ORG      0.758     0.768     0.763      3865\n",
+      "\n",
+      "   micro avg      0.958     0.958     0.958     56409\n",
+      "   macro avg      0.888     0.859     0.873     56409\n",
+      "weighted avg      0.958     0.958     0.958     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 14, average train epoch loss=0.23457\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 12 by max_f1: 0.873\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.981     0.971     0.976     48875\n",
+      "       I_LOC      0.922     0.748     0.826      1557\n",
+      "       I_PER      0.951     0.929     0.940      2112\n",
+      "       I_ORG      0.684     0.805     0.739      3865\n",
+      "\n",
+      "   micro avg      0.954     0.952     0.953     56409\n",
+      "   macro avg      0.884     0.863     0.870     56409\n",
+      "weighted avg      0.958     0.952     0.954     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 15, average train epoch loss=0.23872\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 14 by max_f1: 0.879\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.978     0.979     0.978     48875\n",
+      "       I_LOC      0.891     0.797     0.841      1557\n",
+      "       I_PER      0.909     0.966     0.936      2112\n",
+      "       I_ORG      0.781     0.740     0.760      3865\n",
+      "\n",
+      "   micro avg      0.960     0.957     0.959     56409\n",
+      "   macro avg      0.890     0.871     0.879     56409\n",
+      "weighted avg      0.959     0.957     0.958     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 16, average train epoch loss=0.20621\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 14 by max_f1: 0.879\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.970     0.986     0.978     48875\n",
+      "       I_LOC      0.847     0.825     0.836      1557\n",
+      "       I_PER      0.950     0.905     0.927      2112\n",
+      "       I_ORG      0.833     0.678     0.747      3865\n",
+      "\n",
+      "   micro avg      0.958     0.957     0.958     56409\n",
+      "   macro avg      0.900     0.849     0.872     56409\n",
+      "weighted avg      0.957     0.957     0.956     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 17, average train epoch loss=0.12929\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 16 by max_f1: 0.883\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.985     0.981     48875\n",
+      "       I_LOC      0.841     0.837     0.839      1557\n",
+      "       I_PER      0.950     0.927     0.939      2112\n",
+      "       I_ORG      0.838     0.717     0.773      3865\n",
+      "\n",
+      "   micro avg      0.963     0.961     0.962     56409\n",
+      "   macro avg      0.901     0.867     0.883     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 18, average train epoch loss=0.086214\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 16 by max_f1: 0.883\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.968     0.990     0.979     48875\n",
+      "       I_LOC      0.874     0.794     0.832      1557\n",
+      "       I_PER      0.921     0.955     0.938      2112\n",
+      "       I_ORG      0.877     0.647     0.744      3865\n",
+      "\n",
+      "   micro avg      0.959     0.960     0.959     56409\n",
+      "   macro avg      0.910     0.847     0.873     56409\n",
+      "weighted avg      0.957     0.960     0.957     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 19, average train epoch loss=0.087525\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 16 by max_f1: 0.883\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.970     0.990     0.980     48875\n",
+      "       I_LOC      0.821     0.838     0.829      1557\n",
+      "       I_PER      0.908     0.965     0.936      2112\n",
+      "       I_ORG      0.908     0.608     0.728      3865\n",
+      "\n",
+      "   micro avg      0.961     0.958     0.960     56409\n",
+      "   macro avg      0.902     0.850     0.868     56409\n",
+      "weighted avg      0.959     0.958     0.957     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 20, average train epoch loss=0.10373\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 16 by max_f1: 0.883\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.984     0.979     48875\n",
+      "       I_LOC      0.915     0.749     0.823      1557\n",
+      "       I_PER      0.940     0.941     0.941      2112\n",
+      "       I_ORG      0.801     0.748     0.774      3865\n",
+      "\n",
+      "   micro avg      0.962     0.959     0.960     56409\n",
+      "   macro avg      0.908     0.855     0.879     56409\n",
+      "weighted avg      0.961     0.959     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 21, average train epoch loss=0.06161\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 16 by max_f1: 0.883\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.982     0.980     48875\n",
+      "       I_LOC      0.845     0.798     0.821      1557\n",
+      "       I_PER      0.960     0.918     0.939      2112\n",
+      "       I_ORG      0.791     0.752     0.771      3865\n",
+      "\n",
+      "   micro avg      0.961     0.959     0.960     56409\n",
+      "   macro avg      0.894     0.863     0.878     56409\n",
+      "weighted avg      0.960     0.959     0.959     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 22, average train epoch loss=0.08982\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.978     0.984     0.981     48875\n",
+      "       I_LOC      0.849     0.832     0.840      1557\n",
+      "       I_PER      0.947     0.947     0.947      2112\n",
+      "       I_ORG      0.838     0.739     0.785      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.903     0.876     0.888     56409\n",
+      "weighted avg      0.964     0.962     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 23, average train epoch loss=0.070473\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.985     0.980     48875\n",
+      "       I_LOC      0.851     0.816     0.833      1557\n",
+      "       I_PER      0.925     0.963     0.943      2112\n",
+      "       I_ORG      0.859     0.688     0.764      3865\n",
+      "\n",
+      "   micro avg      0.963     0.959     0.961     56409\n",
+      "   macro avg      0.902     0.863     0.880     56409\n",
+      "weighted avg      0.961     0.959     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 24, average train epoch loss=0.090512\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.972     0.989     0.980     48875\n",
+      "       I_LOC      0.873     0.803     0.836      1557\n",
+      "       I_PER      0.947     0.934     0.940      2112\n",
+      "       I_ORG      0.874     0.674     0.761      3865\n",
+      "\n",
+      "   micro avg      0.963     0.960     0.961     56409\n",
+      "   macro avg      0.916     0.850     0.879     56409\n",
+      "weighted avg      0.961     0.960     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 25, average train epoch loss=0.044604\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.985     0.980     48875\n",
+      "       I_LOC      0.857     0.820     0.838      1557\n",
+      "       I_PER      0.948     0.944     0.946      2112\n",
+      "       I_ORG      0.847     0.720     0.779      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.907     0.867     0.886     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 26, average train epoch loss=0.03871\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.979     0.981     0.980     48875\n",
+      "       I_LOC      0.886     0.778     0.829      1557\n",
+      "       I_PER      0.946     0.951     0.948      2112\n",
+      "       I_ORG      0.790     0.771     0.780      3865\n",
+      "\n",
+      "   micro avg      0.963     0.960     0.961     56409\n",
+      "   macro avg      0.900     0.870     0.884     56409\n",
+      "weighted avg      0.962     0.960     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 27, average train epoch loss=0.056433\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.978     0.983     0.980     48875\n",
+      "       I_LOC      0.888     0.807     0.846      1557\n",
+      "       I_PER      0.942     0.949     0.946      2112\n",
+      "       I_ORG      0.813     0.746     0.778      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.962     56409\n",
+      "   macro avg      0.905     0.871     0.887     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 28, average train epoch loss=0.045758\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.986     0.979     48875\n",
+      "       I_LOC      0.881     0.803     0.840      1557\n",
+      "       I_PER      0.938     0.952     0.945      2112\n",
+      "       I_ORG      0.858     0.684     0.761      3865\n",
+      "\n",
+      "   micro avg      0.963     0.959     0.961     56409\n",
+      "   macro avg      0.913     0.856     0.881     56409\n",
+      "weighted avg      0.961     0.959     0.959     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 29, average train epoch loss=0.0274\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.985     0.981     48875\n",
+      "       I_LOC      0.841     0.830     0.835      1557\n",
+      "       I_PER      0.947     0.940     0.944      2112\n",
+      "       I_ORG      0.839     0.727     0.779      3865\n",
+      "\n",
+      "   micro avg      0.964     0.962     0.963     56409\n",
+      "   macro avg      0.901     0.871     0.885     56409\n",
+      "weighted avg      0.962     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 30, average train epoch loss=0.032021\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.968     0.991     0.979     48875\n",
+      "       I_LOC      0.890     0.783     0.833      1557\n",
+      "       I_PER      0.944     0.926     0.935      2112\n",
+      "       I_ORG      0.900     0.650     0.755      3865\n",
+      "\n",
+      "   micro avg      0.962     0.959     0.961     56409\n",
+      "   macro avg      0.925     0.837     0.876     56409\n",
+      "weighted avg      0.960     0.959     0.958     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 31, average train epoch loss=0.034389\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.987     0.980     48875\n",
+      "       I_LOC      0.864     0.815     0.839      1557\n",
+      "       I_PER      0.942     0.936     0.939      2112\n",
+      "       I_ORG      0.861     0.696     0.770      3865\n",
+      "\n",
+      "   micro avg      0.963     0.961     0.962     56409\n",
+      "   macro avg      0.910     0.859     0.882     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 32, average train epoch loss=0.009842\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.987     0.980     48875\n",
+      "       I_LOC      0.885     0.802     0.842      1557\n",
+      "       I_PER      0.929     0.950     0.940      2112\n",
+      "       I_ORG      0.857     0.687     0.763      3865\n",
+      "\n",
+      "   micro avg      0.963     0.960     0.962     56409\n",
+      "   macro avg      0.911     0.857     0.881     56409\n",
+      "weighted avg      0.961     0.960     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 33, average train epoch loss=0.020966\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.983     0.980     48875\n",
+      "       I_LOC      0.874     0.804     0.837      1557\n",
+      "       I_PER      0.950     0.936     0.943      2112\n",
+      "       I_ORG      0.809     0.740     0.773      3865\n",
+      "\n",
+      "   micro avg      0.963     0.960     0.961     56409\n",
+      "   macro avg      0.902     0.866     0.883     56409\n",
+      "weighted avg      0.961     0.960     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 34, average train epoch loss=0.027037\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.970     0.989     0.979     48875\n",
+      "       I_LOC      0.869     0.799     0.832      1557\n",
+      "       I_PER      0.940     0.942     0.941      2112\n",
+      "       I_ORG      0.885     0.650     0.750      3865\n",
+      "\n",
+      "   micro avg      0.962     0.958     0.960     56409\n",
+      "   macro avg      0.916     0.845     0.876     56409\n",
+      "weighted avg      0.961     0.958     0.958     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 35, average train epoch loss=0.020547\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.989     0.980     48875\n",
+      "       I_LOC      0.871     0.798     0.833      1557\n",
+      "       I_PER      0.945     0.948     0.947      2112\n",
+      "       I_ORG      0.862     0.684     0.763      3865\n",
+      "\n",
+      "   micro avg      0.962     0.961     0.962     56409\n",
+      "   macro avg      0.912     0.855     0.881     56409\n",
+      "weighted avg      0.960     0.961     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 36, average train epoch loss=0.0097076\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.987     0.980     48875\n",
+      "       I_LOC      0.906     0.777     0.837      1557\n",
+      "       I_PER      0.945     0.945     0.945      2112\n",
+      "       I_ORG      0.831     0.713     0.768      3865\n",
+      "\n",
+      "   micro avg      0.963     0.961     0.962     56409\n",
+      "   macro avg      0.914     0.855     0.882     56409\n",
+      "weighted avg      0.961     0.961     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 37, average train epoch loss=0.017548\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.986     0.981     48875\n",
+      "       I_LOC      0.907     0.785     0.842      1557\n",
+      "       I_PER      0.942     0.953     0.947      2112\n",
+      "       I_ORG      0.830     0.738     0.781      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.865     0.888     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 38, average train epoch loss=0.013931\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.987     0.981     48875\n",
+      "       I_LOC      0.901     0.800     0.848      1557\n",
+      "       I_PER      0.942     0.950     0.946      2112\n",
+      "       I_ORG      0.855     0.709     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.918     0.862     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 39, average train epoch loss=0.016594\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 21 by max_f1: 0.888\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.978     0.984     0.981     48875\n",
+      "       I_LOC      0.879     0.812     0.844      1557\n",
+      "       I_PER      0.940     0.954     0.947      2112\n",
+      "       I_ORG      0.821     0.741     0.779      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.905     0.873     0.888     56409\n",
+      "weighted avg      0.963     0.961     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 40, average train epoch loss=0.0071077\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.988     0.981     48875\n",
+      "       I_LOC      0.901     0.793     0.844      1557\n",
+      "       I_PER      0.941     0.956     0.949      2112\n",
+      "       I_ORG      0.855     0.721     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.963     0.964     56409\n",
+      "   macro avg      0.918     0.864     0.889     56409\n",
+      "weighted avg      0.964     0.963     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 41, average train epoch loss=0.0053882\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.985     0.981     48875\n",
+      "       I_LOC      0.892     0.804     0.846      1557\n",
+      "       I_PER      0.942     0.947     0.945      2112\n",
+      "       I_ORG      0.838     0.735     0.783      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.912     0.868     0.889     56409\n",
+      "weighted avg      0.964     0.961     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 42, average train epoch loss=0.0049295\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.988     0.979     48875\n",
+      "       I_LOC      0.841     0.825     0.833      1557\n",
+      "       I_PER      0.942     0.938     0.940      2112\n",
+      "       I_ORG      0.893     0.642     0.747      3865\n",
+      "\n",
+      "   micro avg      0.962     0.958     0.960     56409\n",
+      "   macro avg      0.912     0.848     0.875     56409\n",
+      "weighted avg      0.961     0.958     0.958     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 43, average train epoch loss=0.0091113\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.986     0.981     48875\n",
+      "       I_LOC      0.866     0.821     0.843      1557\n",
+      "       I_PER      0.940     0.952     0.946      2112\n",
+      "       I_ORG      0.857     0.711     0.777      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.910     0.868     0.887     56409\n",
+      "weighted avg      0.963     0.961     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 44, average train epoch loss=0.010603\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.988     0.980     48875\n",
+      "       I_LOC      0.864     0.816     0.839      1557\n",
+      "       I_PER      0.945     0.938     0.942      2112\n",
+      "       I_ORG      0.875     0.682     0.766      3865\n",
+      "\n",
+      "   micro avg      0.964     0.960     0.962     56409\n",
+      "   macro avg      0.914     0.856     0.882     56409\n",
+      "weighted avg      0.962     0.960     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 45, average train epoch loss=0.011899\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.988     0.979     48875\n",
+      "       I_LOC      0.878     0.806     0.841      1557\n",
+      "       I_PER      0.951     0.927     0.939      2112\n",
+      "       I_ORG      0.883     0.670     0.762      3865\n",
+      "\n",
+      "   micro avg      0.963     0.959     0.961     56409\n",
+      "   macro avg      0.921     0.848     0.880     56409\n",
+      "weighted avg      0.962     0.959     0.959     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 46, average train epoch loss=0.010241\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.988     0.980     48875\n",
+      "       I_LOC      0.894     0.789     0.838      1557\n",
+      "       I_PER      0.953     0.913     0.933      2112\n",
+      "       I_ORG      0.873     0.696     0.774      3865\n",
+      "\n",
+      "   micro avg      0.963     0.960     0.962     56409\n",
+      "   macro avg      0.923     0.846     0.881     56409\n",
+      "weighted avg      0.962     0.960     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 47, average train epoch loss=0.010324\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.986     0.980     48875\n",
+      "       I_LOC      0.863     0.812     0.837      1557\n",
+      "       I_PER      0.965     0.907     0.935      2112\n",
+      "       I_ORG      0.852     0.715     0.777      3865\n",
+      "\n",
+      "   micro avg      0.964     0.959     0.962     56409\n",
+      "   macro avg      0.913     0.855     0.882     56409\n",
+      "weighted avg      0.962     0.959     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 48, average train epoch loss=0.0086854\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.972     0.987     0.980     48875\n",
+      "       I_LOC      0.864     0.810     0.836      1557\n",
+      "       I_PER      0.950     0.923     0.937      2112\n",
+      "       I_ORG      0.865     0.685     0.765      3865\n",
+      "\n",
+      "   micro avg      0.963     0.959     0.961     56409\n",
+      "   macro avg      0.913     0.851     0.879     56409\n",
+      "weighted avg      0.961     0.959     0.959     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 49, average train epoch loss=0.0082937\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.987     0.981     48875\n",
+      "       I_LOC      0.878     0.813     0.844      1557\n",
+      "       I_PER      0.939     0.953     0.946      2112\n",
+      "       I_ORG      0.862     0.695     0.770      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.913     0.862     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 50, average train epoch loss=0.0038096\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.988     0.981     48875\n",
+      "       I_LOC      0.868     0.823     0.845      1557\n",
+      "       I_PER      0.945     0.942     0.943      2112\n",
+      "       I_ORG      0.877     0.682     0.767      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.916     0.859     0.884     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 51, average train epoch loss=0.0064582\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.989     0.981     48875\n",
+      "       I_LOC      0.867     0.818     0.842      1557\n",
+      "       I_PER      0.945     0.944     0.944      2112\n",
+      "       I_ORG      0.877     0.684     0.769      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.915     0.859     0.884     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 52, average train epoch loss=0.00073816\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.871     0.814     0.842      1557\n",
+      "       I_PER      0.955     0.932     0.943      2112\n",
+      "       I_ORG      0.865     0.703     0.776      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.916     0.859     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 53, average train epoch loss=0.0084374\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.985     0.980     48875\n",
+      "       I_LOC      0.873     0.813     0.842      1557\n",
+      "       I_PER      0.960     0.919     0.939      2112\n",
+      "       I_ORG      0.857     0.719     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.960     0.962     56409\n",
+      "   macro avg      0.916     0.859     0.886     56409\n",
+      "weighted avg      0.964     0.960     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 54, average train epoch loss=0.0070119\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.989     0.981     48875\n",
+      "       I_LOC      0.882     0.795     0.836      1557\n",
+      "       I_PER      0.932     0.954     0.943      2112\n",
+      "       I_ORG      0.879     0.681     0.767      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.962     56409\n",
+      "   macro avg      0.917     0.855     0.882     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 55, average train epoch loss=0.0017449\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.971     0.990     0.981     48875\n",
+      "       I_LOC      0.883     0.803     0.841      1557\n",
+      "       I_PER      0.927     0.957     0.942      2112\n",
+      "       I_ORG      0.899     0.657     0.759      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.962     56409\n",
+      "   macro avg      0.920     0.852     0.881     56409\n",
+      "weighted avg      0.962     0.961     0.960     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 56, average train epoch loss=0.0039374\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 39 by max_f1: 0.889\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.987     0.982     48875\n",
+      "       I_LOC      0.855     0.838     0.846      1557\n",
+      "       I_PER      0.928     0.964     0.946      2112\n",
+      "       I_ORG      0.874     0.703     0.780      3865\n",
+      "\n",
+      "   micro avg      0.966     0.963     0.964     56409\n",
+      "   macro avg      0.909     0.873     0.888     56409\n",
+      "weighted avg      0.964     0.963     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 57, average train epoch loss=0.00041438\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n",
+      "INFO:root:Saving new best model...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.977     0.986     0.982     48875\n",
+      "       I_LOC      0.872     0.826     0.848      1557\n",
+      "       I_PER      0.938     0.958     0.948      2112\n",
+      "       I_ORG      0.848     0.731     0.785      3865\n",
+      "\n",
+      "   micro avg      0.965     0.963     0.964     56409\n",
+      "   macro avg      0.909     0.875     0.891     56409\n",
+      "weighted avg      0.964     0.963     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 58, average train epoch loss=0.0020322\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.877     0.813     0.844      1557\n",
+      "       I_PER      0.949     0.946     0.947      2112\n",
+      "       I_ORG      0.858     0.718     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.915     0.866     0.889     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 59, average train epoch loss=0.00073169\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.840     0.836     0.838      1557\n",
+      "       I_PER      0.952     0.934     0.943      2112\n",
+      "       I_ORG      0.880     0.684     0.769      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.911     0.860     0.883     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 60, average train epoch loss=0.0055515\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.987     0.981     48875\n",
+      "       I_LOC      0.867     0.822     0.844      1557\n",
+      "       I_PER      0.927     0.956     0.941      2112\n",
+      "       I_ORG      0.866     0.708     0.779      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.909     0.868     0.886     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 61, average train epoch loss=0.0016875\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.986     0.981     48875\n",
+      "       I_LOC      0.878     0.820     0.848      1557\n",
+      "       I_PER      0.940     0.953     0.946      2112\n",
+      "       I_ORG      0.854     0.720     0.781      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.912     0.870     0.889     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 62, average train epoch loss=6.8231e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.972     0.989     0.980     48875\n",
+      "       I_LOC      0.889     0.801     0.843      1557\n",
+      "       I_PER      0.946     0.945     0.945      2112\n",
+      "       I_ORG      0.880     0.680     0.767      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.922     0.854     0.884     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 63, average train epoch loss=0.00028691\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.877     0.812     0.843      1557\n",
+      "       I_PER      0.946     0.944     0.945      2112\n",
+      "       I_ORG      0.874     0.699     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.918     0.860     0.886     56409\n",
+      "weighted avg      0.963     0.961     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 64, average train epoch loss=0.00015938\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.889     0.804     0.845      1557\n",
+      "       I_PER      0.933     0.956     0.945      2112\n",
+      "       I_ORG      0.869     0.704     0.778      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.917     0.863     0.887     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 65, average train epoch loss=0.00010046\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.892     0.800     0.843      1557\n",
+      "       I_PER      0.936     0.954     0.945      2112\n",
+      "       I_ORG      0.863     0.707     0.778      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.917     0.862     0.887     56409\n",
+      "weighted avg      0.963     0.961     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 66, average train epoch loss=8.7377e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.972     0.988     0.980     48875\n",
+      "       I_LOC      0.884     0.801     0.841      1557\n",
+      "       I_PER      0.950     0.933     0.941      2112\n",
+      "       I_ORG      0.872     0.692     0.771      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.962     56409\n",
+      "   macro avg      0.920     0.853     0.883     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 67, average train epoch loss=0.0022959\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.893     0.795     0.841      1557\n",
+      "       I_PER      0.942     0.943     0.943      2112\n",
+      "       I_ORG      0.867     0.703     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.919     0.857     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 68, average train epoch loss=0.0020286\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.875     0.817     0.845      1557\n",
+      "       I_PER      0.935     0.953     0.944      2112\n",
+      "       I_ORG      0.879     0.685     0.770      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.916     0.861     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 69, average train epoch loss=1.2014e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.989     0.981     48875\n",
+      "       I_LOC      0.882     0.807     0.843      1557\n",
+      "       I_PER      0.939     0.947     0.943      2112\n",
+      "       I_ORG      0.880     0.683     0.769      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.918     0.857     0.884     56409\n",
+      "weighted avg      0.962     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 70, average train epoch loss=0.00025534\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.987     0.981     48875\n",
+      "       I_LOC      0.869     0.827     0.847      1557\n",
+      "       I_PER      0.940     0.954     0.947      2112\n",
+      "       I_ORG      0.859     0.720     0.783      3865\n",
+      "\n",
+      "   micro avg      0.965     0.963     0.964     56409\n",
+      "   macro avg      0.911     0.872     0.890     56409\n",
+      "weighted avg      0.964     0.963     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 71, average train epoch loss=0.00048053\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.867     0.826     0.846      1557\n",
+      "       I_PER      0.940     0.951     0.945      2112\n",
+      "       I_ORG      0.876     0.688     0.771      3865\n",
+      "\n",
+      "   micro avg      0.964     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 72, average train epoch loss=-7.7097e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.878     0.817     0.847      1557\n",
+      "       I_PER      0.935     0.955     0.945      2112\n",
+      "       I_ORG      0.877     0.687     0.771      3865\n",
+      "\n",
+      "   micro avg      0.964     0.962     0.963     56409\n",
+      "   macro avg      0.916     0.862     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 73, average train epoch loss=2.8954e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.874     0.821     0.846      1557\n",
+      "       I_PER      0.946     0.942     0.944      2112\n",
+      "       I_ORG      0.870     0.702     0.777      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.916     0.863     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 74, average train epoch loss=0.0002357\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.986     0.981     48875\n",
+      "       I_LOC      0.876     0.818     0.846      1557\n",
+      "       I_PER      0.939     0.954     0.947      2112\n",
+      "       I_ORG      0.859     0.717     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.912     0.869     0.889     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 75, average train epoch loss=2.0281e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.877     0.820     0.847      1557\n",
+      "       I_PER      0.938     0.953     0.945      2112\n",
+      "       I_ORG      0.872     0.697     0.774      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.915     0.864     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 76, average train epoch loss=2.0838e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.879     0.816     0.846      1557\n",
+      "       I_PER      0.937     0.954     0.945      2112\n",
+      "       I_ORG      0.879     0.689     0.773      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.917     0.862     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 77, average train epoch loss=-1.1993e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.881     0.812     0.845      1557\n",
+      "       I_PER      0.938     0.952     0.945      2112\n",
+      "       I_ORG      0.878     0.690     0.773      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.917     0.860     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 78, average train epoch loss=0.0013875\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.988     0.981     48875\n",
+      "       I_LOC      0.884     0.804     0.842      1557\n",
+      "       I_PER      0.943     0.945     0.944      2112\n",
+      "       I_ORG      0.871     0.698     0.775      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.918     0.859     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 79, average train epoch loss=0.00036236\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.883     0.808     0.844      1557\n",
+      "       I_PER      0.947     0.942     0.944      2112\n",
+      "       I_ORG      0.871     0.703     0.778      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.919     0.860     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 80, average train epoch loss=0.0065931\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.988     0.981     48875\n",
+      "       I_LOC      0.880     0.811     0.844      1557\n",
+      "       I_PER      0.947     0.940     0.943      2112\n",
+      "       I_ORG      0.878     0.691     0.773      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.919     0.858     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 81, average train epoch loss=0.00010143\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.989     0.981     48875\n",
+      "       I_LOC      0.883     0.808     0.844      1557\n",
+      "       I_PER      0.940     0.946     0.943      2112\n",
+      "       I_ORG      0.881     0.685     0.771      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.919     0.857     0.885     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 82, average train epoch loss=0.00024607\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.883     0.807     0.843      1557\n",
+      "       I_PER      0.943     0.946     0.944      2112\n",
+      "       I_ORG      0.868     0.705     0.778      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.917     0.861     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 83, average train epoch loss=1.6383e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.974     0.988     0.981     48875\n",
+      "       I_LOC      0.880     0.811     0.844      1557\n",
+      "       I_PER      0.944     0.943     0.944      2112\n",
+      "       I_ORG      0.868     0.707     0.779      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.917     0.862     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 84, average train epoch loss=0.0026055\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.988     0.981     48875\n",
+      "       I_LOC      0.881     0.811     0.844      1557\n",
+      "       I_PER      0.945     0.942     0.943      2112\n",
+      "       I_ORG      0.874     0.696     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.961     0.963     56409\n",
+      "   macro avg      0.918     0.859     0.886     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 85, average train epoch loss=1.0644e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.877     0.812     0.844      1557\n",
+      "       I_PER      0.944     0.943     0.944      2112\n",
+      "       I_ORG      0.864     0.710     0.780      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.915     0.863     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 86, average train epoch loss=3.8549e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.879     0.812     0.844      1557\n",
+      "       I_PER      0.943     0.946     0.944      2112\n",
+      "       I_ORG      0.859     0.717     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.914     0.866     0.888     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 87, average train epoch loss=-2.8911e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.987     0.981     48875\n",
+      "       I_LOC      0.879     0.812     0.844      1557\n",
+      "       I_PER      0.943     0.947     0.945      2112\n",
+      "       I_ORG      0.858     0.721     0.784      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.914     0.867     0.889     56409\n",
+      "weighted avg      0.964     0.962     0.963     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 88, average train epoch loss=-4.4973e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.976     0.987     0.981     48875\n",
+      "       I_LOC      0.877     0.815     0.845      1557\n",
+      "       I_PER      0.943     0.946     0.945      2112\n",
+      "       I_ORG      0.859     0.717     0.782      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.914     0.866     0.888     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 89, average train epoch loss=3.3366e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.878     0.814     0.845      1557\n",
+      "       I_PER      0.943     0.948     0.945      2112\n",
+      "       I_ORG      0.860     0.716     0.781      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.964     56409\n",
+      "   macro avg      0.914     0.866     0.888     56409\n",
+      "weighted avg      0.964     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 90, average train epoch loss=3.9598e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.875     0.816     0.845      1557\n",
+      "       I_PER      0.942     0.947     0.945      2112\n",
+      "       I_ORG      0.864     0.708     0.778      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.865     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 91, average train epoch loss=5.975e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.874     0.817     0.845      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.865     0.706     0.777      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.864     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 92, average train epoch loss=6.7032e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.874     0.817     0.845      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.866     0.705     0.777      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.864     0.887     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 93, average train epoch loss=2.4093e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.818     0.844      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.867     0.702     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.864     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 94, average train epoch loss=6.7781e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.818     0.844      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.867     0.702     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.864     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 95, average train epoch loss=3.8463e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.818     0.844      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.867     0.702     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.864     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 96, average train epoch loss=1.482e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.818     0.844      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.867     0.701     0.776      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 97, average train epoch loss=0.0067129\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.817     0.844      1557\n",
+      "       I_PER      0.941     0.947     0.944      2112\n",
+      "       I_ORG      0.867     0.701     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 98, average train epoch loss=-5.1398e-06\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.817     0.844      1557\n",
+      "       I_PER      0.941     0.948     0.944      2112\n",
+      "       I_ORG      0.867     0.701     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 99, average train epoch loss=1.6662e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.817     0.844      1557\n",
+      "       I_PER      0.941     0.948     0.944      2112\n",
+      "       I_ORG      0.867     0.701     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 100, average train epoch loss=-2.0516e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 56 by max_f1: 0.891\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.975     0.987     0.981     48875\n",
+      "       I_LOC      0.872     0.817     0.844      1557\n",
+      "       I_PER      0.941     0.948     0.944      2112\n",
+      "       I_ORG      0.867     0.701     0.775      3865\n",
+      "\n",
+      "   micro avg      0.965     0.962     0.963     56409\n",
+      "   macro avg      0.914     0.863     0.886     56409\n",
+      "weighted avg      0.963     0.962     0.962     56409\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMAttnNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmph8zbmrar\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMAttnNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMAttnNCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2630011"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8513    0.7288    0.7853      3865\n",
+      "       I_PER     0.9388    0.9583    0.9485      2112\n",
+      "       I_LOC     0.8730    0.8259    0.8488      1557\n",
+      "\n",
+      "   micro avg     0.8831    0.8132    0.8467      7534\n",
+      "   macro avg     0.8877    0.8377    0.8609      7534\n",
+      "weighted avg     0.8803    0.8132    0.8442      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7741    0.7261    0.7493      1307\n",
+      "           O     0.9728    0.9831    0.9779     48723\n",
+      "         ORG     0.6741    0.5305    0.5937      1887\n",
+      "         PER     0.8379    0.8154    0.8265      1300\n",
+      "\n",
+      "    accuracy                         0.9566     53217\n",
+      "   macro avg     0.8147    0.7638    0.7869     53217\n",
+      "weighted avg     0.9540    0.9566    0.9550     53217\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMCRF.ipynb b/exps/fre BERTBiLSTMCRF.ipynb
new file mode 100644
index 0000000..76db888
--- /dev/null
+++ b/exps/fre BERTBiLSTMCRF.ipynb	
@@ -0,0 +1,1137 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/ubuntu/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/ubuntu/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/ubuntu/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTBiLSTMCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, hidden_dim=256)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/ubuntu/models/fre-BERTBiLSTMCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "953593"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 72.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 73, average train epoch loss=4.9766e-05\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 67 by max_f1: 0.904\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.979     0.987     0.983     48875\n",
+      "       I_LOC      0.905     0.818     0.859      1557\n",
+      "       I_ORG      0.857     0.766     0.809      3865\n",
+      "       I_PER      0.957     0.962     0.959      2112\n",
+      "\n",
+      "   micro avg      0.969     0.967     0.968     56409\n",
+      "   macro avg      0.925     0.883     0.903     56409\n",
+      "weighted avg      0.968     0.967     0.967     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "529cd0b4534d42b99a073e8d7654de11",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/ubuntu/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/ubuntu/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/ubuntu/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/ubuntu/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpea90pt0v\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, hidden_dim=256)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(model, data, \"/home/ubuntu/models/fre-BERTBiLSTMCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "953593"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8490    0.7840    0.8152      3865\n",
+      "       I_PER     0.9562    0.9602    0.9582      2112\n",
+      "       I_LOC     0.8992    0.8253    0.8607      1557\n",
+      "\n",
+      "   micro avg     0.8910    0.8419    0.8658      7534\n",
+      "   macro avg     0.9015    0.8565    0.8780      7534\n",
+      "weighted avg     0.8894    0.8419    0.8647      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.8002    0.7391    0.7684      1311\n",
+      "           O     0.9755    0.9852    0.9803     48686\n",
+      "         ORG     0.7244    0.5951    0.6534      1877\n",
+      "         PER     0.8570    0.8254    0.8409      1300\n",
+      "\n",
+      "    accuracy                         0.9614     53174\n",
+      "   macro avg     0.8393    0.7862    0.8108     53174\n",
+      "weighted avg     0.9594    0.9614    0.9601     53174\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## BIO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/ubuntu/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/ubuntu/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/ubuntu/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/ubuntu/factRuEval-2016/idx2labels_BIO.txt\",\n",
+    "    clear_cache=True,\n",
+    "    markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF, BERTBiLSTMNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpktkqtzsx\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, hidden_dim=512)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(model, data, \"/home/ubuntu/models/fre-BERTBiLSTMCRF-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235585"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 44.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 45, average train epoch loss=0.008046\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 33 by max_f1: 0.849\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         B_O      0.982     0.985     0.984     48875\n",
+      "       B_LOC      0.893     0.820     0.855      1320\n",
+      "       B_ORG      0.835     0.757     0.794      1910\n",
+      "       I_ORG      0.766     0.785     0.775      1955\n",
+      "       B_PER      0.923     0.941     0.932      1319\n",
+      "       I_PER      0.959     0.951     0.955       793\n",
+      "       I_LOC      0.805     0.540     0.646       237\n",
+      "\n",
+      "   micro avg      0.966     0.963     0.965     56409\n",
+      "   macro avg      0.881     0.825     0.849     56409\n",
+      "weighted avg      0.965     0.963     0.964     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "325658d5e66446c683f7699537193e9f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/ubuntu/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/ubuntu/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/ubuntu/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/ubuntu/factRuEval-2016/idx2labels_BIO.txt\",\n",
+    "    clear_cache=False,\n",
+    "    markup=\"BIO\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/ubuntu/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmppbrjc9tb\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, hidden_dim=512)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(model, data, \"/home/ubuntu/models/fre-BERTBiLSTMCRF-BIO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235585"
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels,\n",
+    "                                           labels=[\"I_ORG\", \"I_PER\", \"I_LOC\", \"B_ORG\", \"B_PER\", \"B_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8183    0.7488    0.7821      1955\n",
+      "       I_PER     0.9619    0.9546    0.9582       793\n",
+      "       I_LOC     0.8065    0.5274    0.6378       237\n",
+      "       B_ORG     0.8297    0.7424    0.7836      1910\n",
+      "       B_PER     0.9357    0.9371    0.9364      1319\n",
+      "       B_LOC     0.8814    0.8386    0.8595      1320\n",
+      "\n",
+      "   micro avg     0.8703    0.8106    0.8394      7534\n",
+      "   macro avg     0.8722    0.7915    0.8263      7534\n",
+      "weighted avg     0.8675    0.8106    0.8370      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_f1_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8262519680452184"
+      ]
+     },
+     "execution_count": 120,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_f1_score(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\", \"B_ORG\", \"B_PER\", \"B_LOC\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7840    0.7165    0.7487      1312\n",
+      "           O     0.9749    0.9851    0.9800     48711\n",
+      "         ORG     0.7326    0.5948    0.6565      1888\n",
+      "         PER     0.8504    0.8204    0.8352      1303\n",
+      "\n",
+      "    accuracy                         0.9606     53214\n",
+      "   macro avg     0.8355    0.7792    0.8051     53214\n",
+      "weighted avg     0.9585    0.9606    0.9592     53214\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTBiLSTMNCRF.ipynb b/exps/fre BERTBiLSTMNCRF.ipynb
new file mode 100644
index 0000000..5da0ef6
--- /dev/null
+++ b/exps/fre BERTBiLSTMNCRF.ipynb	
@@ -0,0 +1,604 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMNCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, nbest=7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMNCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235259"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Resuming train... Current epoch 50.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:\n",
+      "epoch 51, average train epoch loss=0.0015935\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=170), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:on epoch 38 by max_f1: 0.89\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         I_O      0.973     0.989     0.981     48875\n",
+      "       I_LOC      0.884     0.788     0.833      1557\n",
+      "       I_PER      0.955     0.925     0.939      2112\n",
+      "       I_ORG      0.874     0.695     0.775      3865\n",
+      "\n",
+      "   micro avg      0.964     0.961     0.963     56409\n",
+      "   macro avg      0.921     0.849     0.882     56409\n",
+      "weighted avg      0.963     0.961     0.961     56409\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7720e298bd584cd2abc0331ac0431e52",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, max=95), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=\"/home/eartemov/ae/work/factRuEval-2016/dev.csv\",\n",
+    "    valid_df_path=\"/home/eartemov/ae/work/factRuEval-2016/test.csv\",\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTBiLSTMNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpxxd4ym1l\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTBiLSTMNCRF.create(len(data.train_ds.idx2label), lstm_dropout=0., crf_dropout=0.3, nbest=7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(model, data, \"/home/eartemov/ae/work/models/fre-BERTBiLSTMNCRF-IO.cpt\",\n",
+    "                     t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2235259"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=[\"I_ORG\", \"I_PER\", \"I_LOC\"], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_ORG     0.8358    0.7545    0.7930      3865\n",
+      "       I_PER     0.9432    0.9508    0.9469      2112\n",
+      "       I_LOC     0.8908    0.7913    0.8381      1557\n",
+      "\n",
+      "   micro avg     0.8793    0.8171    0.8471      7534\n",
+      "   macro avg     0.8899    0.8322    0.8594      7534\n",
+      "weighted avg     0.8773    0.8171    0.8455      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7730    0.6955    0.7322      1307\n",
+      "           O     0.9727    0.9836    0.9782     48682\n",
+      "         ORG     0.6786    0.5466    0.6055      1877\n",
+      "         PER     0.8376    0.8048    0.8209      1301\n",
+      "\n",
+      "    accuracy                         0.9568     53167\n",
+      "   macro avg     0.8155    0.7576    0.7842     53167\n",
+      "weighted avg     0.9541    0.9568    0.9551     53167\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTCRF.ipynb b/exps/fre BERTCRF.ipynb
new file mode 100644
index 0000000..4dd9c20
--- /dev/null
+++ b/exps/fre BERTCRF.ipynb	
@@ -0,0 +1,475 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels3.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "298489"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels3.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = BERTCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "298489"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[5:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_LOC     0.8761    0.7996    0.8361      1557\n",
+      "       I_PER     0.9543    0.9683    0.9612      2112\n",
+      "       I_ORG     0.7944    0.7700    0.7820      3865\n",
+      "\n",
+      "   micro avg     0.8572    0.8317    0.8442      7534\n",
+      "   macro avg     0.8750    0.8460    0.8598      7534\n",
+      "weighted avg     0.8561    0.8317    0.8434      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7639    0.6869    0.7234      1300\n",
+      "           O     0.9709    0.9799    0.9754     48614\n",
+      "         ORG     0.5976    0.5078    0.5490      1863\n",
+      "         PER     0.8359    0.8094    0.8224      1296\n",
+      "\n",
+      "    accuracy                         0.9520     53073\n",
+      "   macro avg     0.7921    0.7460    0.7676     53073\n",
+      "weighted avg     0.9494    0.9520    0.9505     53073\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/fre BERTNCRF.ipynb b/exps/fre BERTNCRF.ipynb
new file mode 100644
index 0000000..57de8e6
--- /dev/null
+++ b/exps/fre BERTNCRF.ipynb	
@@ -0,0 +1,519 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "\n",
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## IO markup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "valid_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Creating labels vocabs', max=1519, style=ProgressStyle(descri…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels4.txt\",\n",
+    "    clear_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=len(data.train_ds.idx2label)-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTNCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "299259"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "learner.fit(epochs=num_epochs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Eval"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data import bert_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:pytorch_pretrained_bert.tokenization:The pre-trained model you are loading is a cased model but you have not set `do_lower_case` to False. We are setting `do_lower_case=False` for you but you may want to check this behavior.\n",
+      "INFO:pytorch_pretrained_bert.tokenization:loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt from cache at /home/eartemov/.pytorch_pretrained_bert/96435fa287fbf7e469185f1062386e05a075cadbf6838b74da22bf64b080bc32.99bcd55fc66f4f3360bc49ba472b940b8dcf223ea6a345deb969d607ca900729\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = bert_data.LearnData.create(\n",
+    "    train_df_path=train_df_path,\n",
+    "    valid_df_path=valid_df_path,\n",
+    "    idx2labels_path=\"/home/eartemov/ae/work/factRuEval-2016/idx2labels3.txt\",\n",
+    "    clear_cache=False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.models.bert_models import BERTNCRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased.tar.gz from cache at /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9\n",
+      "INFO:pytorch_pretrained_bert.modeling:extracting archive file /home/eartemov/.pytorch_pretrained_bert/731c19ddf94e294e00ec1ba9a930c69cc2a0fd489b25d3d691373fae4c0986bd.4e367b0d0155d801930846bb6ed98f8a7c23e0ded37888b29caa37009a40c7b9 to temp dir /tmp/tmpclku_2kv\n",
+      "INFO:pytorch_pretrained_bert.modeling:Model config {\n",
+      "  \"attention_probs_dropout_prob\": 0.1,\n",
+      "  \"directionality\": \"bidi\",\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 768,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 3072,\n",
+      "  \"max_position_embeddings\": 512,\n",
+      "  \"num_attention_heads\": 12,\n",
+      "  \"num_hidden_layers\": 12,\n",
+      "  \"pooler_fc_size\": 768,\n",
+      "  \"pooler_num_attention_heads\": 12,\n",
+      "  \"pooler_num_fc_layers\": 3,\n",
+      "  \"pooler_size_per_head\": 128,\n",
+      "  \"pooler_type\": \"first_token_transform\",\n",
+      "  \"type_vocab_size\": 2,\n",
+      "  \"vocab_size\": 119547\n",
+      "}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "build CRF...\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = BERTNCRF.create(len(data.train_ds.idx2label), crf_dropout=0.3, nbest=len(data.train_ds.idx2label)-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.train.train import NerLearner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs = 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner = NerLearner(\n",
+    "    model, data, \"/home/eartemov/ae/work/models/fre-BERTNCRF-IO.cpt\", t_total=num_epochs * len(data.train_dl))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "299259"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.get_n_trainable_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "learner.load_model()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.bert_data import get_data_loader_for_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dl = get_data_loader_for_predict(data, df_path=data.valid_ds.config[\"df_path\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Predicting', max=170, style=ProgressStyle(description_width='…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r"
+     ]
+    }
+   ],
+   "source": [
+    "preds = learner.predict(dl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn_crfsuite.metrics import flat_classification_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.utils import bert_labels2tokens, voting_choicer\n",
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_tokens, pred_labels = bert_labels2tokens(dl, preds)\n",
+    "true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert pred_tokens == true_tokens\n",
+    "tokens_report = flat_classification_report(true_labels, pred_labels, labels=data.train_ds.idx2label[5:], digits=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "       I_LOC     0.8939    0.7848    0.8358      1557\n",
+      "       I_PER     0.9545    0.9645    0.9595      2112\n",
+      "       I_ORG     0.7930    0.7780    0.7854      3865\n",
+      "\n",
+      "   micro avg     0.8592    0.8317    0.8452      7534\n",
+      "   macro avg     0.8805    0.8424    0.8603      7534\n",
+      "weighted avg     0.8591    0.8317    0.8446      7534\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens_report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.analyze_utils.plot_metrics import get_bert_span_report\n",
+    "from modules.analyze_utils.utils import voting_choicer, tokens2spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         LOC     0.7760    0.6938    0.7326      1303\n",
+      "           O     0.9726    0.9814    0.9770     48636\n",
+      "         ORG     0.6089    0.5300    0.5667      1868\n",
+      "         PER     0.8567    0.8185    0.8371      1300\n",
+      "\n",
+      "    accuracy                         0.9545     53107\n",
+      "   macro avg     0.8035    0.7559    0.7783     53107\n",
+      "weighted avg     0.9522    0.9545    0.9531     53107\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(get_bert_span_report(dl, preds, fn=voting_choicer))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/exps/prc fre.ipynb b/exps/prc fre.ipynb
new file mode 100644
index 0000000..a324e91
--- /dev/null
+++ b/exps/prc fre.ipynb	
@@ -0,0 +1,220 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### FactRuEval-2016 preprocess\n",
+    "More info about dataset: https://github.com/dialogue-evaluation/factRuEval-2016"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "sys.path.append(\"../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modules.data.fre import fact_ru_eval_preprocess"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_dir = \"/home/eartemov/ae/work/factRuEval-2016/devset/\"\n",
+    "test_dir = \"/home/eartemov/ae/work/factRuEval-2016/testset/\"\n",
+    "dev_df_path = \"/home/eartemov/ae/work/factRuEval-2016/dev.csv\"\n",
+    "test_df_path = \"/home/eartemov/ae/work/factRuEval-2016/test.csv\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43de7e40d1784421bb55921e2d0058f3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process FactRuEval2016 dev set.', max=1519, style=ProgressSty…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "27617d0f776d4b37b6f893bcac517f24",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=0, description='Process FactRuEval2016 test set.', max=2715, style=ProgressSt…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "fact_ru_eval_preprocess(dev_dir, test_dir, dev_df_path, test_df_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>labels</th>\n",
+       "      <th>text</th>\n",
+       "      <th>cls</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>O O B_LOC O O O O O B_PER I_PER O O O O O</td>\n",
+       "      <td>Сегодня в Москве на 40-й день после смерти Его...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>O B_LOC I_LOC O O O O B_ORG O B_PER I_PER O O ...</td>\n",
+       "      <td>К Кронштадтскому бульвару , где болельщика « С...</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>O O O O O O O O O O</td>\n",
+       "      <td>И тишина ... Все прошло мирно и не столь массово</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>O O O O O O O O O O O O O</td>\n",
+       "      <td>Правда , были задержания , но , как пояснили в...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>O O O O O O O O O O</td>\n",
+       "      <td>Одним словом , очередной « Русский марш » не с...</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                              labels  \\\n",
+       "0          O O B_LOC O O O O O B_PER I_PER O O O O O   \n",
+       "1  O B_LOC I_LOC O O O O B_ORG O B_PER I_PER O O ...   \n",
+       "2                                O O O O O O O O O O   \n",
+       "3                          O O O O O O O O O O O O O   \n",
+       "4                                O O O O O O O O O O   \n",
+       "\n",
+       "                                                text    cls  \n",
+       "0  Сегодня в Москве на 40-й день после смерти Его...  False  \n",
+       "1  К Кронштадтскому бульвару , где болельщика « С...  False  \n",
+       "2   И тишина ... Все прошло мирно и не столь массово   True  \n",
+       "3  Правда , были задержания , но , как пояснили в...   True  \n",
+       "4  Одним словом , очередной « Русский марш » не с...   True  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_csv(dev_df_path, sep=\"\\t\").head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/modules/__init__.py b/modules/__init__.py
index ee9c206..b118112 100644
--- a/modules/__init__.py
+++ b/modules/__init__.py
@@ -1,6 +1,7 @@
-from .train.train import NerLearner
-from .data.bert_data import BertNerData
-from .models.bert_models import BertBiLSTMCRF
+from .utils import get_tqdm
 
 
-__all__ = ["NerLearner", "BertNerData", "BertBiLSTMCRF"]
+tqdm = get_tqdm()
+
+
+__all__ = ["tqdm"]
diff --git a/modules/utils/__init__.py b/modules/analyze_utils/__init__.py
similarity index 100%
rename from modules/utils/__init__.py
rename to modules/analyze_utils/__init__.py
diff --git a/modules/analyze_utils/main_metrics.py b/modules/analyze_utils/main_metrics.py
new file mode 100644
index 0000000..f74a1b6
--- /dev/null
+++ b/modules/analyze_utils/main_metrics.py
@@ -0,0 +1,214 @@
+# This code is reused from https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/metrics/fmeasure.py
+import itertools
+from collections import OrderedDict
+
+
+def chunk_finder(current_token, previous_token, tag):
+    current_tag = current_token.split('-', 1)[-1]
+    previous_tag = previous_token.split('-', 1)[-1]
+    if previous_tag != tag:
+        previous_tag = 'O'
+    if current_tag != tag:
+        current_tag = 'O'
+    if (previous_tag == 'O' and current_token == 'B_' + tag) or \
+            (previous_token == 'I_' + tag and current_token == 'B_' + tag) or \
+            (previous_token == 'B_' + tag and current_token == 'B_' + tag) or \
+            (previous_tag == 'O' and current_token == 'I_' + tag):
+        create_chunk = True
+    else:
+        create_chunk = False
+
+    if (previous_token == 'I-' + tag and current_token == 'B_' + tag) or \
+            (previous_token == 'B-' + tag and current_token == 'B_' + tag) or \
+            (current_tag == 'O' and previous_token == 'I_' + tag) or \
+            (current_tag == 'O' and previous_token == 'B_' + tag):
+        pop_out = True
+    else:
+        pop_out = False
+    return create_chunk, pop_out
+
+
+def _global_stats_f1(results):
+    total_true_entities = 0
+    total_predicted_entities = 0
+    total_precision = 0
+    total_recall = 0
+    total_f1 = 0
+    total_correct = 0
+    for tag in results:
+        if tag == '__total__':
+            continue
+
+        n_pred = results[tag]['n_pred']
+        n_true = results[tag]['n_true']
+        total_correct += results[tag]['tp']
+        total_true_entities += n_true
+        total_predicted_entities += n_pred
+        total_precision += results[tag]['precision'] * n_pred
+        total_recall += results[tag]['recall'] * n_true
+        total_f1 += results[tag]['f1'] * n_true
+    if total_true_entities > 0:
+        accuracy = total_correct / total_true_entities * 100
+        total_recall = total_recall / total_true_entities
+    else:
+        accuracy = 0
+        total_recall = 0
+    if total_predicted_entities > 0:
+        total_precision = total_precision / total_predicted_entities
+    else:
+        total_precision = 0
+
+    if total_precision + total_recall > 0:
+        total_f1 = 2 * total_precision * total_recall / (total_precision + total_recall)
+    else:
+        total_f1 = 0
+
+    total_res = {'n_predicted_entities': total_predicted_entities,
+                 'n_true_entities': total_true_entities,
+                 'precision': total_precision,
+                 'recall': total_recall,
+                 'f1': total_f1}
+    return total_res, accuracy, total_true_entities, total_predicted_entities, total_correct
+
+
+def precision_recall_f1(y_true, y_pred, print_results=True, short_report=False, entity_of_interest=None):
+    true_labels_list = []
+    pred_labels_list = []
+    for x in y_true:
+        true_labels_list.extend(x)
+    for x in y_pred:
+        pred_labels_list.extend(x)
+    # Find all tags
+    tags = set()
+    for tag in itertools.chain(y_true, y_pred):
+        if tag not in ["O", "I_O", "B_O"]:
+            current_tag = tag[2:]
+            tags.add(current_tag)
+    tags = sorted(list(tags))
+
+    results = OrderedDict()
+    for tag in tags:
+        results[tag] = OrderedDict()
+    results['__total__'] = OrderedDict()
+    n_tokens = len(y_true)
+    # Firstly we find all chunks in the ground truth and prediction
+    # For each chunk we write starting and ending indices
+
+    for tag in tags:
+        count = 0
+        true_chunk = []
+        pred_chunk = []
+        y_true = [str(y) for y in y_true]
+        y_pred = [str(y) for y in y_pred]
+        prev_tag_true = 'O'
+        prev_tag_pred = 'O'
+        while count < n_tokens:
+            yt = y_true[count]
+            yp = y_pred[count]
+
+            create_chunk_true, pop_out_true = chunk_finder(yt, prev_tag_true, tag)
+            if pop_out_true:
+                true_chunk[-1] = (true_chunk[-1], count - 1)
+            if create_chunk_true:
+                true_chunk.append(count)
+
+            create_chunk_pred, pop_out_pred = chunk_finder(yp, prev_tag_pred, tag)
+            if pop_out_pred:
+                pred_chunk[-1] = (pred_chunk[-1], count - 1)
+            if create_chunk_pred:
+                pred_chunk.append(count)
+            prev_tag_true = yt
+            prev_tag_pred = yp
+            count += 1
+
+        if len(true_chunk) > 0 and not isinstance(true_chunk[-1], tuple):
+            true_chunk[-1] = (true_chunk[-1], count - 1)
+        if len(pred_chunk) > 0 and not isinstance(pred_chunk[-1], tuple):
+            pred_chunk[-1] = (pred_chunk[-1], count - 1)
+
+        # Then we find all correctly classified intervals
+        # True positive results
+        tp = len(set(pred_chunk).intersection(set(true_chunk)))
+        # And then just calculate errors of the first and second kind
+        # False negative
+        fn = len(true_chunk) - tp
+        # False positive
+        fp = len(pred_chunk) - tp
+        if tp + fp > 0:
+            precision = tp / (tp + fp) * 100
+        else:
+            precision = 0
+        if tp + fn > 0:
+            recall = tp / (tp + fn) * 100
+        else:
+            recall = 0
+        if precision + recall > 0:
+            f1 = 2 * precision * recall / (precision + recall)
+        else:
+            f1 = 0
+        results[tag]['precision'] = precision
+        results[tag]['recall'] = recall
+        results[tag]['f1'] = f1
+        results[tag]['n_pred'] = len(pred_chunk)
+        results[tag]['n_true'] = len(true_chunk)
+        results[tag]['tp'] = tp
+        results[tag]['fn'] = fn
+        results[tag]['fp'] = fp
+
+    results['__total__'], accuracy, total_true_entities, total_predicted_entities, total_correct = _global_stats_f1(results)
+    results['__total__']['n_pred'] = total_predicted_entities
+    results['__total__']['n_true'] = total_true_entities
+    results['__total__']["n_tokens"] = n_tokens
+    if print_results:
+        _print_conll_report(results, short_report, entity_of_interest)
+    return results
+
+
+def _print_conll_report(results, short_report=False, entity_of_interest=None):
+    _, accuracy, total_true_entities, total_predicted_entities, total_correct = _global_stats_f1(results)
+    n_tokens = results['__total__']["n_tokens"]
+    tags = list(results.keys())
+
+    s = 'processed {len} tokens ' \
+        'with {tot_true} phrases; ' \
+        'found: {tot_pred} phrases;' \
+        ' correct: {tot_cor}.\n\n'.format(len=n_tokens,
+                                          tot_true=total_true_entities,
+                                          tot_pred=total_predicted_entities,
+                                          tot_cor=total_correct)
+
+    s += 'precision:  {tot_prec:.2f}%; ' \
+         'recall:  {tot_recall:.2f}%; ' \
+         'FB1:  {tot_f1:.2f}\n\n'.format(acc=accuracy,
+                                         tot_prec=results['__total__']['precision'],
+                                         tot_recall=results['__total__']['recall'],
+                                         tot_f1=results['__total__']['f1'])
+
+    if not short_report:
+        for tag in tags:
+            if entity_of_interest is not None:
+                if entity_of_interest in tag:
+                    s += '\t' + tag + ': precision:  {tot_prec:.2f}%; ' \
+                                      'recall:  {tot_recall:.2f}%; ' \
+                                      'F1:  {tot_f1:.2f} ' \
+                                      '{tot_predicted}\n\n'.format(tot_prec=results[tag]['precision'],
+                                                                   tot_recall=results[tag]['recall'],
+                                                                   tot_f1=results[tag]['f1'],
+                                                                   tot_predicted=results[tag]['n_pred'])
+            elif tag != '__total__':
+                s += '\t' + tag + ': precision:  {tot_prec:.2f}%; ' \
+                                  'recall:  {tot_recall:.2f}%; ' \
+                                  'F1:  {tot_f1:.2f} ' \
+                                  '{tot_predicted}\n\n'.format(tot_prec=results[tag]['precision'],
+                                                               tot_recall=results[tag]['recall'],
+                                                               tot_f1=results[tag]['f1'],
+                                                               tot_predicted=results[tag]['n_pred'])
+    elif entity_of_interest is not None:
+        s += '\t' + entity_of_interest + ': precision:  {tot_prec:.2f}%; ' \
+                          'recall:  {tot_recall:.2f}%; ' \
+                          'F1:  {tot_f1:.2f} ' \
+                          '{tot_predicted}\n\n'.format(tot_prec=results[entity_of_interest]['precision'],
+                                                       tot_recall=results[entity_of_interest]['recall'],
+                                                       tot_f1=results[entity_of_interest]['f1'],
+                                                       tot_predicted=results[entity_of_interest]['n_pred'])
+    print(s)
diff --git a/modules/utils/plot_metrics.py b/modules/analyze_utils/plot_metrics.py
similarity index 54%
rename from modules/utils/plot_metrics.py
rename to modules/analyze_utils/plot_metrics.py
index 10248b8..1987a6a 100644
--- a/modules/utils/plot_metrics.py
+++ b/modules/analyze_utils/plot_metrics.py
@@ -3,6 +3,7 @@
 from matplotlib import pyplot as plt
 from .utils import tokens2spans, bert_labels2tokens, voting_choicer, first_choicer
 from sklearn_crfsuite.metrics import flat_classification_report
+from sklearn.metrics import f1_score
 
 
 def plot_by_class_curve(history, metric_, sup_labels):
@@ -63,36 +64,22 @@ def get_mean_max_metric(history, metric_="f1", return_idx=False):
     return res
 
 
-def get_bert_span_report(dl, preds, ignore_labels=["O"], fn=first_choicer):
-    tokens, labels = bert_labels2tokens(dl, preds, fn)
-    spans_pred = tokens2spans(tokens, labels)
-    tokens, labels = bert_labels2tokens(dl, [x.labels for x in dl.dataset], fn)
-    spans_true = tokens2spans(tokens, labels)
-    set_labels = set()
-    for idx in range(len(spans_pred)):
-        while len(spans_pred[idx]) < len(spans_true[idx]):
-            spans_pred[idx].append(("", "O"))
-        while len(spans_pred[idx]) > len(spans_true[idx]):
-            spans_true[idx].append(("O", "O"))
-        set_labels.update([y for x, y in spans_true[idx]])
-    set_labels -= set(ignore_labels)
-    return flat_classification_report([[y[1] for y in x] for x in spans_true], [[y[1] for y in x] for x in spans_pred], labels=list(set_labels), digits=3)
-
-
-def get_elmo_span_report(dl, preds, ignore_labels=["O"]):
-    tokens, labels = [x.tokens[1:-1] for x in dl.dataset], [p[1:-1] for p in preds]
-    spans_pred = tokens2spans(tokens, labels)
-    labels = [x.labels[1:-1] for x in dl.dataset]
-    spans_true = tokens2spans(tokens, labels)
-    set_labels = set()
-    for idx in range(len(spans_pred)):
-        while len(spans_pred[idx]) < len(spans_true[idx]):
-            spans_pred[idx].append(("", "O"))
-        while len(spans_pred[idx]) > len(spans_true[idx]):
-            spans_true[idx].append(("O", "O"))
-        set_labels.update([y for x, y in spans_true[idx]])
-    set_labels -= set(ignore_labels)
-    return flat_classification_report([[y[1] for y in x] for x in spans_true], [[y[1] for y in x] for x in spans_pred], labels=list(set_labels), digits=3)
+def get_bert_span_report(dl, preds, labels=None, fn=voting_choicer):
+    pred_tokens, pred_labels = bert_labels2tokens(dl, preds)
+    true_tokens, true_labels = bert_labels2tokens(dl, [x.bert_labels for x in dl.dataset])
+    spans_pred = tokens2spans(pred_tokens, pred_labels)
+    spans_true = tokens2spans(true_tokens, true_labels)
+    res_t = []
+    res_p = []
+    for pred_span, true_span in zip(spans_pred, spans_true):
+        text2span = {t: l for t, l in pred_span}
+        for (pt, pl), (tt, tl) in zip(pred_span, true_span):
+            res_t.append(tl)
+            if tt in text2span:
+                res_p.append(pl)
+            else:
+                res_p.append("O")
+    return flat_classification_report([res_t], [res_p], labels=labels, digits=4)
 
 
 def analyze_bert_errors(dl, labels, fn=voting_choicer):
@@ -107,18 +94,29 @@ def analyze_bert_errors(dl, labels, fn=voting_choicer):
         errors_ = []
         # if len(label_r) > 1:
         # assert len(label_r) == len(f.tokens) - 1
-        for idx, (l, rl, t) in enumerate(zip(label, label_r, f.tokens)):
-            if l != rl:
-                errors_.append({"token: ": t,
-                               "real_label": rl,
-                               "pred_label": l,
-                               "bert_token": f.bert_tokens[prev_idx:f.tok_map[idx]],
-                               "real_bert_label": f.labels[prev_idx:f.tok_map[idx]],
-                               "pred_bert_label": l_[prev_idx:f.tok_map[idx]], 
-                               "text_example": " ".join(f.tokens[1:-1]),
-                                "labels": " ".join(label_r[1:])})
+        for idx, (lbl, rl, t) in enumerate(zip(label, label_r, f.tokens)):
+            if lbl != rl:
+                errors_.append(
+                    {"token: ": t,
+                     "real_label": rl,
+                     "pred_label": lbl,
+                     "bert_token": f.bert_tokens[prev_idx:f.tok_map[idx]],
+                     "real_bert_label": f.labels[prev_idx:f.tok_map[idx]],
+                     "pred_bert_label": l_[prev_idx:f.tok_map[idx]],
+                     "text_example": " ".join(f.tokens[1:-1]),
+                     "labels": " ".join(label_r[1:])})
             prev_idx = f.tok_map[idx]
         errors.append(errors_)
         res_tokens.append(f.tokens[1:-1])
         res_labels.append(label[1:])
     return res_tokens, res_labels, errors
+
+
+def get_f1_score(y_true, y_pred, labels):
+    res_t = []
+    res_p = []
+    for yts, yps in zip(y_true, y_pred):
+        for yt, yp in zip(yts, yps):
+                res_t.append(yt)
+                res_p.append(yp)
+    return f1_score(res_t, res_p, average="macro", labels=labels)
diff --git a/modules/utils/utils.py b/modules/analyze_utils/utils.py
similarity index 86%
rename from modules/utils/utils.py
rename to modules/analyze_utils/utils.py
index 88a80ad..c365ccf 100644
--- a/modules/utils/utils.py
+++ b/modules/analyze_utils/utils.py
@@ -8,10 +8,13 @@ def voting_choicer(tok_map, labels):
     label = []
     prev_idx = 0
     for origin_idx in tok_map:
-
-        vote_labels = Counter(
-            # if l not in ["[SEP]", "[CLS]"] else "I_O"
-            ["I_" + l.split("_")[1] if l not in ["[CLS]"] else "I_O" for l in labels[prev_idx:origin_idx]])
+        votes = []
+        for l in labels[prev_idx:origin_idx]:
+            if l != "X":
+                votes.append(l)
+        vote_labels = Counter(votes)
+        if not len(vote_labels):
+            vote_labels = {"B_O": 1}
         # vote_labels = Counter(c)
         lb = sorted(list(vote_labels), key=lambda x: vote_labels[x])
         if len(lb):
@@ -19,8 +22,7 @@ def voting_choicer(tok_map, labels):
         prev_idx = origin_idx
         if origin_idx < 0:
             break
-    assert "[SEP]" not in label
-    
+
     return label
 
 
@@ -29,13 +31,18 @@ def first_choicer(tok_map, labels):
     prev_idx = 0
     for origin_idx in tok_map:
         l = labels[prev_idx]
-        if l in ["[CLS]"]:
-            l = "I_O"
+        if l in ["X"]:
+            l = "B_O"
+        if l == "B_O":
+            for ll in labels[prev_idx + 1:origin_idx]:
+                if ll not in ["B_O", "I_O", "X"]:
+                    l = ll
+                    break
         label.append(l)
         prev_idx = origin_idx
         if origin_idx < 0:
             break
-    assert "[SEP]" not in label
+    # assert "[SEP]" not in label
     return label
 
 
@@ -43,7 +50,7 @@ def bert_labels2tokens(dl, labels, fn=voting_choicer):
     res_tokens = []
     res_labels = []
     for f, l in zip(dl.dataset, labels):
-        label = fn(f.tok_map, l)
+        label = fn(f.tok_map, l[1:])
 
         res_tokens.append(f.tokens[1:-1])
         res_labels.append(label[1:])
diff --git a/modules/data/bert_data.py b/modules/data/bert_data.py
index af15d7f..4aa74ee 100644
--- a/modules/data/bert_data.py
+++ b/modules/data/bert_data.py
@@ -1,34 +1,31 @@
 from torch.utils.data import DataLoader
-from modules.data import tokenization
 import torch
+from pytorch_pretrained_bert import BertTokenizer
+from modules.utils import read_config, if_none
+from modules import tqdm
 import pandas as pd
-import numpy as np
-from tqdm import tqdm
-from modules.utils import read_json, save_json
-import logging
-import os
+from copy import deepcopy
 
 
-class InputFeatures(object):
+class InputFeature(object):
     """A single set of features of data."""
 
     def __init__(
             self,
             # Bert data
             bert_tokens, input_ids, input_mask, input_type_ids,
+            # Ner data
+            bert_labels, labels_ids, labels,
             # Origin data
-            tokens, labels, labels_ids, labels_mask, tok_map, cls=None, cls_idx=None,
-            meta_tokens=None,
-            meta=None):
+            tokens, tok_map,
+            # Cls data
+            cls=None, id_cls=None):
         """
         Data has the following structure.
         data[0]: list, tokens ids
         data[1]: list, tokens mask
         data[2]: list, tokens type ids (for bert)
-        data[3]: list, tokens meta info (if meta is not None)
-        ...
-        data[-2]: list, labels mask
-        data[-1]: list, labels ids
+        data[3]: list, bert labels ids
         """
         self.data = []
         # Bert data
@@ -39,437 +36,357 @@ def __init__(
         self.data.append(input_mask)
         self.input_type_ids = input_type_ids
         self.data.append(input_type_ids)
-        # Meta data
-        self.meta = meta
-        self.meta_tokens = meta_tokens
-        if meta is not None:
-            self.data.append(meta)
-        # Origin data
-        self.tokens = tokens
-        self.labels = labels
-        # Used for joint model
-        self.cls = cls
-        self.cls_idx = cls_idx
-        if cls is not None:
-            self.data.append(cls_idx)
-        # Labels data
-        self.labels_mask = labels_mask
-        self.data.append(labels_mask)
+        # Ner data
+        self.bert_labels = bert_labels
         self.labels_ids = labels_ids
         self.data.append(labels_ids)
+        # Classification data
+        self.cls = cls
+        self.id_cls = id_cls
+        # Origin data
+        self.tokens = tokens
         self.tok_map = tok_map
+        self.labels = labels
 
+    def __iter__(self):
+        return iter(self.data)
 
-class DataLoaderForTrain(DataLoader):
 
-    def __init__(self, data_set, shuffle, cuda, **kwargs):
-        super(DataLoaderForTrain, self).__init__(
+class TextDataLoader(DataLoader):
+    def __init__(self, data_set, shuffle=False, device="cuda", batch_size=16):
+        super(TextDataLoader, self).__init__(
             dataset=data_set,
             collate_fn=self.collate_fn,
             shuffle=shuffle,
-            **kwargs
+            batch_size=batch_size
         )
-        self.cuda = cuda
+        self.device = device
 
     def collate_fn(self, data):
         res = []
         token_ml = max(map(lambda x_: sum(x_.data[1]), data))
-        label_ml = max(map(lambda x_: sum(x_.data[-2]), data))
-        sorted_idx = np.argsort(list(map(lambda x_: sum(x_.data[1]), data)))[::-1]
-        for idx in sorted_idx:
-            f = data[idx]
+        for sample in data:
             example = []
-            for idx_, x in enumerate(f.data[:-2]):
+            for x in sample:
                 if isinstance(x, list):
                     x = x[:token_ml]
                 example.append(x)
-            example.append(f.data[-2][:label_ml])
-            example.append(f.data[-1][:label_ml])
             res.append(example)
         res_ = []
-        for idx, x in enumerate(zip(*res)):
-            if data[0].meta is not None and idx == 3:
-                res_.append(torch.FloatTensor(x))
-            else:
-                res_.append(torch.LongTensor(x))
-        if self.cuda:
-            res_ = [t.cuda() for t in res_]
-        return res_
+        for x in zip(*res):
+            res_.append(torch.LongTensor(x))
+        return [t.to(self.device) for t in res_]
 
 
-class DataLoaderForPredict(DataLoader):
+class TextDataSet(object):
 
-    def __init__(self, data_set, cuda, **kwargs):
-        super(DataLoaderForPredict, self).__init__(
-            dataset=data_set,
-            collate_fn=self.collate_fn,
-            **kwargs
-        )
-        self.cuda = cuda
+    @classmethod
+    def from_config(cls, config, clear_cache=False, df=None):
+        return cls.create(**read_config(config), clear_cache=clear_cache, df=df)
 
-    def collate_fn(self, data):
-        res = []
-        token_ml = max(map(lambda x_: sum(x_.data[1]), data))
-        label_ml = max(map(lambda x_: sum(x_.data[-2]), data))
-        sorted_idx = np.argsort(list(map(lambda x_: sum(x_.data[1]), data)))[::-1]
-        for idx in sorted_idx:
-            f = data[idx]
-            example = []
-            for x in f.data[:-2]:
-                if isinstance(x, list):
-                    x = x[:token_ml]
-                example.append(x)
-            example.append(f.data[-2][:label_ml])
-            example.append(f.data[-1][:label_ml])
-            res.append(example)
-        res_ = []
-        for idx, x in enumerate(zip(*res)):
-            if data[0].meta is not None and idx == 3:
-                res_.append(torch.FloatTensor(x))
-            else:
-                res_.append(torch.LongTensor(x))
-        sorted_idx = torch.LongTensor(list(sorted_idx))
-        if self.cuda:
-            res_ = [t.cuda() for t in res_]
-            sorted_idx = sorted_idx.cuda()
-        return res_, sorted_idx
-
-
-def get_data(
-        df, tokenizer, label2idx=None, cls2idx=None, meta2idx=None, is_cls=False, is_meta=False,
-        max_seq_len=424, pad="<pad>"):
-    if label2idx is None:
-        label2idx = {pad: 0, '[CLS]': 1}
-    features = []
-    all_args = [df["1"].tolist(), df["0"].tolist()]
-    if is_cls:
-        # Use joint model
-        if cls2idx is None:
-            cls2idx = dict()
-        all_args.append(df["2"].tolist())
-
-    if is_meta:
-        # TODO: add multiply meta info
-        if meta2idx is None:
-            meta2idx = {pad: 0, '[CLS]': 1}
-        all_args.append(df["3"].tolist())
-    # TODO: add chunks
-    total = len(df["0"].tolist())
-    cls = None
-    meta = None
-    for args in tqdm(enumerate(zip(*all_args)), total=total, leave=False):
-        if is_cls:
-            if is_meta:
-                idx, (text, labels, cls, meta) = args
+    @classmethod
+    def create(cls,
+               idx2labels_path,
+               df_path=None,
+               idx2labels=None,
+               idx2cls=None,
+               idx2cls_path=None,
+               min_char_len=1,
+               model_name="bert-base-multilingual-cased",
+               max_sequence_length=424,
+               pad_idx=0,
+               clear_cache=False,
+               is_cls=False,
+               markup="IO",
+               df=None, tokenizer=None):
+        if tokenizer is None:
+            tokenizer = BertTokenizer.from_pretrained(model_name)
+        config = {
+            "min_char_len": min_char_len,
+            "model_name": model_name,
+            "max_sequence_length": max_sequence_length,
+            "clear_cache": clear_cache,
+            "df_path": df_path,
+            "pad_idx": pad_idx,
+            "is_cls": is_cls,
+            "idx2labels_path": idx2labels_path,
+            "idx2cls_path": idx2cls_path,
+            "markup": markup
+        }
+        if df is None and df_path is not None:
+            df = pd.read_csv(df_path, sep='\t')
+        elif df is None:
+            if is_cls:
+                df = pd.DataFrame(columns=["labels", "text", "clf"])
             else:
-                idx, (text, labels, cls) = args
+                df = pd.DataFrame(columns=["labels", "text"])
+        if clear_cache:
+            _ = cls.create_vocabs(
+                df, tokenizer, idx2labels_path, markup, idx2cls_path, pad_idx, is_cls, idx2labels, idx2cls)
+        self = cls(tokenizer, df=df, config=config)
+        self.load()
+        return self
+
+    @staticmethod
+    def create_vocabs(
+            df, tokenizer, idx2labels_path, markup="IO",
+            idx2cls_path=None, pad_idx=0, is_cls=False, idx2labels=None, idx2cls=None):
+        if idx2labels is None:
+            label2idx = {"[PAD]": pad_idx, '[CLS]': 1, '[SEP]': 2, "X": 3}
+            idx2label = ["[PAD]", '[CLS]', '[SEP]', "X"]
         else:
-            if is_meta:
-                idx, (text, labels, meta) = args
-            else:
-                idx, (text, labels) = args
+            label2idx = {label: idx for idx, label in enumerate(idx2labels)}
+            idx2label = idx2labels
+        idx2cls = idx2cls
+        cls2idx = None
+        if is_cls:
+            idx2cls = []
+            cls2idx = {label: idx for idx, label in enumerate(idx2cls)}
+        for _, row in tqdm(df.iterrows(), total=len(df), leave=False, desc="Creating labels vocabs"):
+            labels = row.labels.split()
+            origin_tokens = row.text.split()
+            if is_cls and row.cls not in cls2idx:
+                cls2idx[row.cls] = len(cls2idx)
+                idx2cls.append(row.cls)
+            prev_label = ""
+            for origin_token, label in zip(origin_tokens, labels):
+                if markup == "BIO":
+                    prefix = "B_"
+                else:
+                    prefix = "I_"
+                if label != "O":
+                    label = label.split("_")[1]
+                    if label == prev_label:
+                        prefix = "I_"
+                    prev_label = label
+                else:
+                    prev_label = label
+                cur_tokens = tokenizer.tokenize(origin_token)
+                bert_label = [prefix + label] + ["X"] * (len(cur_tokens) - 1)
+                for label_ in bert_label:
+                    if label_ not in label2idx:
+                        label2idx[label_] = len(label2idx)
+                        idx2label.append(label_)
+        with open(idx2labels_path, "w", encoding="utf-8") as f:
+            for label in idx2label:
+                f.write("{}\n".format(label))
+
+        if is_cls:
+            with open(idx2cls_path, "w", encoding="utf-8") as f:
+                for label in idx2cls:
+                    f.write("{}\n".format(label))
+
+        return label2idx, idx2label, cls2idx, idx2cls
+
+    def load(self, df_path=None):
+        df_path = if_none(df_path, self.config["df_path"])
+        self.df = pd.read_csv(df_path, sep='\t')
+        self.label2idx = {}
+        self.idx2label = []
+        with open(self.config["idx2labels_path"], "r", encoding="utf-8") as f:
+            for idx, label in enumerate(f.readlines()):
+                label = label.strip()
+                self.label2idx[label] = idx
+                self.idx2label.append(label)
+
+        if self.config["is_cls"]:
+            self.idx2cls = []
+            self.cls2idx = {}
+            with open(self.config["idx2cls_path"], "r", encoding="utf-8") as f:
+                for idx, label in enumerate(f.readlines()):
+                    label = label.strip()
+                    self.cls2idx[label] = idx
+                    self.idx2cls.append(label)
+
+    def create_feature(self, row):
         bert_tokens = []
         bert_labels = []
-        bert_tokens.append("[CLS]")
-        bert_labels.append("[CLS]")
-        orig_tokens = str(text).split()
-        labels = str(labels).split()
-        pad_idx = label2idx[pad]
-        assert len(orig_tokens) == len(labels)
-        args = [orig_tokens, labels]
+        orig_tokens = row.text.split()
+        origin_labels = row.labels.split()
         tok_map = []
-        meta_tokens = None
-        if is_meta:
-            meta_tokens = ["[CLS]"]
-            meta = str(meta).split()
-            args.append(meta)
-        for idx_, ars in enumerate(zip(*args)):
-            orig_token, label = ars[:2]
-            m = pad
-            if is_meta:
-                m = ars[2]
-            # BIO to IO as BERT proposed https://arxiv.org/pdf/1810.04805.pdf
-            prefix = "I_"
+        prev_label = ""
+        for orig_token, label in zip(orig_tokens, origin_labels):
+            cur_tokens = self.tokenizer.tokenize(orig_token)
+            if self.config["max_sequence_length"] - 2 < len(bert_tokens) + len(cur_tokens):
+                break
+            if self.config["markup"] == "BIO":
+                prefix = "B_"
+            else:
+                prefix = "I_"
             if label != "O":
                 label = label.split("_")[1]
-
-            cur_tokens = tokenizer.tokenize(orig_token)
-            if max_seq_len - 1 < len(bert_tokens) + len(cur_tokens):
-                break
+                if label == prev_label:
+                    prefix = "I_"
+                prev_label = label
+            else:
+                prev_label = label
+            cur_tokens = self.tokenizer.tokenize(orig_token)
+            bert_label = [prefix + label] + ["X"] * (len(cur_tokens) - 1)
             tok_map.append(len(bert_tokens))
-            if is_meta:
-                meta_tokens.extend([m] + ["X"] * (len(cur_tokens) - 1))
             bert_tokens.extend(cur_tokens)
-            bert_label = [prefix + label] + ["X"] * (len(cur_tokens) - 1)
             bert_labels.extend(bert_label)
 
-        orig_tokens = ["[CLS]"] + orig_tokens
-
-        input_ids = tokenizer.convert_tokens_to_ids(bert_tokens)
-        labels = bert_labels
-        for l in labels:
-            if l not in label2idx:
-                label2idx[l] = len(label2idx)
-        labels_ids = [label2idx[l] for l in labels]
-        meta_ids = None
-        if is_meta:
-            for l in meta_tokens:
-                if l not in meta2idx:
-                    meta2idx[l] = len(meta2idx)
-            meta_ids = [meta2idx[l] for l in meta_tokens]
-
+        orig_tokens = ["[CLS]"] + orig_tokens + ["[SEP]"]
+        bert_labels = ["[CLS]"] + bert_labels + ["[SEP]"]
+        if self.config["markup"] == "BIO":
+            O_label = self.label2idx.get("B_O")
+        else:
+            O_label = self.label2idx.get("I_O")
+        input_ids = self.tokenizer.convert_tokens_to_ids(['[CLS]'] + bert_tokens + ['[SEP]'])
+        labels_ids = [self.label2idx.get(l, O_label) for l in bert_labels]
         # The mask has 1 for real tokens and 0 for padding tokens. Only real
         # tokens are attended to.
         input_mask = [1] * len(input_ids)
-        labels_mask = [1] * len(labels_ids)
         # Zero-pad up to the sequence length.
-        while len(input_ids) < max_seq_len:
-            input_ids.append(0)
+        while len(input_ids) < self.config["max_sequence_length"]:
+            input_ids.append(self.config["pad_idx"])
+            labels_ids.append(self.config["pad_idx"])
             input_mask.append(0)
-            labels_ids.append(pad_idx)
-            labels_mask.append(0)
-            if is_meta:
-                meta_ids.append(meta2idx[pad])
             tok_map.append(-1)
-            if is_meta:
-                meta_tokens.append([0] * len(meta[0]))
         input_type_ids = [0] * len(input_ids)
-        # For joint model
-        cls_idx = None
-        if is_cls:
-            if cls not in cls2idx:
-                cls2idx[cls] = len(cls2idx)
-            cls_idx = cls2idx[cls]
-
-        features.append(InputFeatures(
+        cls = None
+        id_cls = None
+        if self.is_cls:
+            cls = row.cls
+            id_cls = self.cls2idx[cls]
+        return InputFeature(
             # Bert data
             bert_tokens=bert_tokens,
             input_ids=input_ids,
             input_mask=input_mask,
             input_type_ids=input_type_ids,
+            bert_labels=bert_labels, labels_ids=labels_ids, labels=origin_labels,
             # Origin data
             tokens=orig_tokens,
-            labels=labels,
-            labels_ids=labels_ids,
-            labels_mask=labels_mask,
             tok_map=tok_map,
-            # Joint data
-            cls=cls,
-            cls_idx=cls_idx,
-            # Meta data
-            meta_tokens=meta_tokens,
-            meta=meta_ids
-        ))
-        assert len(input_ids) == len(input_mask)
-        assert len(input_ids) == len(input_type_ids)
-        assert len(input_ids) == len(labels_ids)
-        assert len(input_ids) == len(labels_mask)
-    return features, label2idx, cls2idx, meta2idx
-
-
-def get_bert_data_loader_for_predict(path, learner):
-    df = pd.read_csv(path)
-
-    f, label2idx, cls2idx, meta2idx = get_data(
-        df, learner.data.tokenizer,
-        learner.data.label2idx, learner.data.cls2idx,
-        learner.data.meta2idx,
-        learner.data.is_cls,
-        learner.data.is_meta,
-        learner.data.max_seq_len,
-        learner.data.pad)
-    dl = DataLoaderForPredict(
-        f, batch_size=learner.data.batch_size, shuffle=False,
-        cuda=learner.data.use_cuda)
-    return dl
-
-
-class BertNerData(object):
-
-    def get_config(self):
-        config = {
-            "train_path": self.train_path,
-            "valid_path": self.valid_path,
-            "bert_vocab_file": self.bert_vocab_file,
-            "bert_model_type": self.bert_model_type,
-            "idx2label_path": self.idx2label_path,
-            "idx2cls_path": self.idx2cls_path,
-            "idx2meta_path": self.idx2meta_path,
-            "max_seq_len": self.max_seq_len,
-            "batch_size": self.batch_size,
-            "is_cls": self.is_cls,
-            "is_meta": self.is_meta,
-            "pad": "<pad>",
-            "use_cuda": self.use_cuda,
-            "config_path": self.config_path
-        }
-        return config
-
-    def __init__(self, bert_vocab_file, idx2label, config_path=None, train_path=None, valid_path=None,
-                 train_dl=None, valid_dl=None, tokenizer=None,
-                 bert_model_type="bert_cased", idx2cls=None, idx2meta=None, max_seq_len=424,
-                 batch_size=16, is_meta=False, is_cls=False,
-                 idx2label_path=None, idx2cls_path=None, idx2meta_path=None, pad="<pad>", use_cuda=True):
-        """Store attributes in one cls. For more doc see BertNerData.create"""
-        self.train_path = train_path
-        self.valid_path = valid_path
-        self.config_path = config_path
-        self.bert_model_type = bert_model_type
-        self.bert_vocab_file = bert_vocab_file
-        self.train_dl = train_dl
-        self.valid_dl = valid_dl
-        self.tokenizer = tokenizer
-        self.max_seq_len = max_seq_len
-        self.batch_size = batch_size
-
-        self.idx2label = idx2label
-        self.label2idx = {label: idx for idx, label in enumerate(idx2label)}
-
-        self.idx2meta = idx2meta
-        self.is_meta = is_meta
-        if is_meta:
-            self.meta2idx = {label: idx for idx, label in enumerate(idx2meta)}
-        self.idx2cls = idx2cls
-        self.is_cls = is_cls
-        if is_cls:
-            self.cls2idx = {label: idx for idx, label in enumerate(idx2cls)}
-
-        self.use_cuda = use_cuda
+            # Cls
+            cls=cls, id_cls=id_cls
+        )
 
-        self.pad = pad
+    def __getitem__(self, item):
+        if self.config["df_path"] is None and self.df is None:
+            raise ValueError("Should setup df_path or df.")
+        if self.df is None:
+            self.load()
 
-        self.idx2label_path = idx2label_path
-        self.idx2cls_path = idx2cls_path
-        self.idx2meta_path = idx2meta_path
+        return self.create_feature(self.df.iloc[item])
 
-        if is_cls and not idx2cls:
-            raise ValueError("Must set idx2cls if run on classification mode.")
-        if is_meta and not idx2meta:
-            raise ValueError("Must set idx2meta if run on meta info using mode.")
+    def __len__(self):
+        return len(self.df) if self.df is not None else 0
 
-    # TODO: write docs
-    @classmethod
-    def from_config(cls, config, clear_cache=True):
-        """Read config and call create. For more docs, see BertNerData.create"""
-        config = read_json(config)
-        config["clear_cache"] = clear_cache
-        return cls.create(**config)
+    def save(self, df_path=None):
+        df_path = if_none(df_path, self.config["df_path"])
+        self.df.to_csv(df_path, sep='\t', index=False)
 
-    @classmethod
-    def create(cls,
-               bert_vocab_file, config_path=None, train_path=None, valid_path=None,
-               idx2label=None, bert_model_type="bert_cased", idx2cls=None, idx2meta=None,
-               max_seq_len=424,
-               batch_size=16, is_meta=False, is_cls=False,
-               idx2label_path=None, idx2cls_path=None, idx2meta_path=None, pad="<pad>", use_cuda=True,
-               clear_cache=True):
-        """
-        Create or skip data loaders, load or create vocabs.
-
-        Parameters
-        ----------
-        bert_vocab_file : str
-            Path of vocabulary for BERT tokenizer.
-        config_path : str, or None, optional (default=None)
-            Path of config of BertNerData.
-        train_path : str or None, optional (default=None)
-            Path of train data frame. If not None update idx2label, idx2cls, idx2meta.
-        valid_path : str or None, optional (default=None)
-            Path of valid data frame. If not None update idx2label, idx2cls, idx2meta.
-        idx2label : list or None, optional (default=None)
-            Map form index to label.
-        bert_model_type : str, optional (default="bert_cased")
-            Mode of BERT model (CASED or UNCASED).
-        idx2cls : list or None, optional (default=None)
-            Map form index to cls.
-        idx2meta : list or None, optional (default=None)
-            Map form index to meta.
-        max_seq_len : int, optional (default=424)
-            Max sequence length.
-        batch_size : int, optional (default=16)
-            Batch size.
-        is_meta : bool, optional (default=False)
-            Use meta info or not.
-        is_cls : bool, optional (default=False)
-            Use joint model or single.
-        idx2label_path : str or None, optional (default=None)
-            Path to idx2label map. If not None and idx2label is None load idx2label.
-        idx2cls_path : str or None, optional (default=None)
-            Path to idx2cls map. If not None and idx2cls is None load idx2cls.
-        idx2meta_path : str or None, optional (default=None)
-            Path to idx2meta map. If not None and idx2meta is None load idx2meta.
-        pad : str, optional (default="<pad>")
-            Padding token.
-        use_cuda : bool, optional (default=True)
-            Run model on gpu or cpu. If gpu pin tensors in data loaders to gpu.
-        clear_cache :
-            If True, rewrite all vocabs and BertNerData config.
-
-        Returns
-        ----------
-        data : BertNerData
-            Created object of BertNerData.
-        """
-        if idx2label is None and idx2label_path is None:
-            raise ValueError("Must set idx2label_path.")
+    def __init__(
+            self, tokenizer,
+            df=None,
+            config=None,
+            idx2label=None,
+            idx2cls=None,
+            is_cls=False):
+        self.df = df
+        self.tokenizer = tokenizer
+        self.config = config
+        self.idx2label = idx2label
+        self.label2idx = None
+        if idx2label is not None:
+            self.label2idx = {label: idx for idx, label in enumerate(idx2label)}
 
-        if bert_model_type == "bert_cased":
-            do_lower_case = False
-        elif bert_model_type == "bert_uncased":
-            do_lower_case = True
-        else:
-            raise NotImplementedError("No requested mode :(.")
+        self.idx2cls = idx2cls
 
-        if is_meta and idx2meta is None and idx2meta_path is None:
-            raise ValueError("Must idx2meta or idx2meta_path.")
+        if idx2cls is not None:
+            self.cls2idx = {label: idx for idx, label in enumerate(idx2cls)}
+        self.is_cls = is_cls
 
-        tokenizer = tokenization.FullTokenizer(vocab_file=bert_vocab_file, do_lower_case=do_lower_case)
 
-        meta2idx = None
-        cls2idx = None
-        label2idx = None
-
-        if idx2label is None and os.path.exists(str(idx2label_path)):
-            idx2label = read_json(idx2label_path)
-            label2idx = {label: idx for idx, label in enumerate(idx2label)}
-        if is_meta and idx2meta is None and os.path.exists(str(idx2meta_path)):
-            idx2meta = read_json(idx2meta_path)
-            meta2idx = {label: idx for idx, label in enumerate(idx2meta)}
-        if is_cls and idx2cls is None and os.path.exists(str(idx2cls_path)):
-            idx2cls = read_json(idx2cls_path)
-            cls2idx = {label: idx for idx, label in enumerate(idx2cls)}
+class LearnData(object):
+    def __init__(self, train_ds=None, train_dl=None, valid_ds=None, valid_dl=None):
+        self.train_ds = train_ds
+        self.train_dl = train_dl
+        self.valid_ds = valid_ds
+        self.valid_dl = valid_dl
 
+    @classmethod
+    def create(cls,
+               # DataSet params
+               train_df_path,
+               valid_df_path,
+               idx2labels_path,
+               idx2labels=None,
+               idx2cls=None,
+               idx2cls_path=None,
+               min_char_len=1,
+               model_name="bert-base-multilingual-cased",
+               max_sequence_length=424,
+               pad_idx=0,
+               clear_cache=False,
+               is_cls=False,
+               markup="IO",
+               train_df=None,
+               valid_df=None,
+               # DataLoader params
+               device="cuda", batch_size=16):
+        train_ds = None
         train_dl = None
-        if train_path:
-            train_df = pd.read_csv(train_path)
-
-            train_f, label2idx, cls2idx, meta2idx = get_data(
-                train_df, tokenizer, label2idx, cls2idx, meta2idx, is_cls, is_meta, max_seq_len, pad)
-            train_dl = DataLoaderForTrain(
-                train_f, batch_size=batch_size, shuffle=True, cuda=use_cuda)
+        valid_ds = None
         valid_dl = None
-        if valid_path:
-            valid_df = pd.read_csv(valid_path)
-            valid_f, label2idx, cls2idx, meta2idx = get_data(
-                valid_df, tokenizer, label2idx, cls2idx, meta2idx, is_cls, is_meta, max_seq_len)
-            valid_dl = DataLoaderForTrain(
-                valid_f, batch_size=batch_size, cuda=use_cuda, shuffle=False)
-
-        idx2label = sorted(label2idx, key=lambda x: label2idx[x])
-        if meta2idx:
-            idx2meta = sorted(idx2meta, key=lambda x: idx2meta[x])
-        if cls2idx:
-            idx2cls = sorted(idx2cls, key=lambda x: idx2cls[x])
-        data = cls(bert_vocab_file, idx2label, config_path, train_path, valid_path,
-                   train_dl, valid_dl, tokenizer,
-                   bert_model_type, idx2cls, idx2meta, max_seq_len,
-                   batch_size, is_meta, is_cls,
-                   idx2label_path, idx2cls_path, idx2meta_path, pad, use_cuda)
-        if clear_cache:
-            logging.info("Saving vocabs...")
-            save_json(idx2label, idx2label_path)
-            save_json(idx2cls, idx2cls_path)
-            save_json(idx2meta, idx2meta_path)
-            save_json(data.get_config(), config_path)
-
-        return data
+        if idx2labels_path is not None:
+            train_ds = TextDataSet.create(
+                idx2labels_path,
+                train_df_path,
+                idx2labels=idx2labels,
+                idx2cls=idx2cls,
+                idx2cls_path=idx2cls_path,
+                min_char_len=min_char_len,
+                model_name=model_name,
+                max_sequence_length=max_sequence_length,
+                pad_idx=pad_idx,
+                clear_cache=clear_cache,
+                is_cls=is_cls,
+                markup=markup,
+                df=train_df)
+            if len(train_ds):
+                train_dl = TextDataLoader(train_ds, device=device, shuffle=True, batch_size=batch_size)
+        if valid_df_path is not None:
+            valid_ds = TextDataSet.create(
+                idx2labels_path,
+                valid_df_path,
+                idx2labels=train_ds.idx2label,
+                idx2cls=train_ds.idx2cls,
+                idx2cls_path=idx2cls_path,
+                min_char_len=min_char_len,
+                model_name=model_name,
+                max_sequence_length=max_sequence_length,
+                pad_idx=pad_idx,
+                clear_cache=False,
+                is_cls=is_cls,
+                markup=markup,
+                df=valid_df, tokenizer=train_ds.tokenizer)
+            valid_dl = TextDataLoader(valid_ds, device=device, batch_size=batch_size)
+
+        return cls(train_ds, train_dl, valid_ds, valid_dl)
+
+    def load(self):
+        if self.train_ds is not None:
+            self.train_ds.load()
+        if self.valid_ds is not None:
+            self.valid_ds.load()
+
+    def save(self):
+        if self.train_ds is not None:
+            self.train_ds.save()
+        if self.valid_ds is not None:
+            self.valid_ds.save()
+
+
+def get_data_loader_for_predict(data, df_path=None, df=None):
+    config = deepcopy(data.train_ds.config)
+    config["df_path"] = df_path
+    config["clear_cache"] = False
+    ds = TextDataSet.create(
+        idx2labels=data.train_ds.idx2label,
+        idx2cls=data.train_ds.idx2cls,
+        df=df, tokenizer=data.train_ds.tokenizer, **config)
+    return TextDataLoader(
+        ds, device=data.train_dl.device, batch_size=data.train_dl.batch_size, shuffle=False)
diff --git a/modules/data/bert_word_data.py b/modules/data/bert_word_data.py
deleted file mode 100644
index bc29807..0000000
--- a/modules/data/bert_word_data.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from modules.data import tokenization
-from modules.utils.utils import ipython_info
-import pandas as pd
-from tqdm import tqdm
-import json
-from .bert_data import InputFeatures, DataLoaderForTrain, DataLoaderForPredict
-
-
-def get_data(
-        df, tokenizer, label2idx=None, max_seq_len=424, pad="<pad>", cls2idx=None,
-        is_cls=False, is_meta=False):
-    if label2idx is None:
-        label2idx = {pad: 0, '[CLS]': 1, '[SEP]': 2}
-    features = []
-    all_args = []
-    if is_cls:
-        # Use joint model
-        if cls2idx is None:
-            cls2idx = dict()
-        all_args.extend([df["1"].tolist(), df["0"].tolist(), df["2"].tolist()])
-    else:
-        all_args.extend([df["1"].tolist(), df["0"].tolist()])
-    if is_meta:
-        all_args.append(df["3"].tolist())
-    total = len(df["0"].tolist())
-    cls = None
-    meta = None
-    for args in tqdm_notebook(enumerate(zip(*all_args)), total=total, leave=False):
-        if is_cls:
-            if is_meta:
-                idx, (text, labels, cls, meta) = args
-            else:
-                idx, (text, labels, cls) = args
-        else:
-            if is_meta:
-                idx, (text, labels, meta) = args
-            else:
-                idx, (text, labels) = args
-
-        tok_map = []
-        meta_tokens = []
-        if is_meta:
-            meta = json.loads(meta)
-            meta_tokens.append([0] * len(meta[0]))
-        bert_tokens = []
-        bert_labels = []
-        bert_tokens.append("[CLS]")
-        bert_labels.append("[CLS]")
-        orig_tokens = []
-        orig_tokens.extend(text.split())
-        labels = labels.split()
-        pad_idx = label2idx[pad]
-        assert len(orig_tokens) == len(labels)
-        prev_label = ""
-        for idx_, (orig_token, label) in enumerate(zip(orig_tokens, labels)):
-            prefix = "B_"
-            if label != "O":
-                label = label.split("_")[1]
-                if label == prev_label:
-                    prefix = "I_"
-                prev_label = label
-            else:
-                prev_label = label
-            tok_map.append(len(bert_tokens))
-            cur_tokens = tokenizer.tokenize(orig_token)
-            if max_seq_len - 1 < len(bert_tokens) + len(cur_tokens):
-                break
-
-            if is_meta:
-                meta_tokens.extend([meta[idx_]] * len(cur_tokens))
-            bert_tokens.extend(cur_tokens)
-            bert_label = [prefix + label] + ["I_" + label] * (len(cur_tokens) - 1)
-            bert_labels.extend(bert_label)
-        bert_tokens.append("[SEP]")
-        bert_labels.append("[SEP]")
-        if is_meta:
-            meta_tokens.append([0] * len(meta[0]))
-        orig_tokens = ["[CLS]"] + orig_tokens + ["[SEP]"]
-
-        input_ids = tokenizer.convert_tokens_to_ids(bert_tokens)
-        # labels = bert_labels
-        for l in labels:
-            if l not in label2idx:
-                label2idx[l] = len(label2idx)
-        labels_ids = [label2idx[l] for l in labels]
-
-        # The mask has 1 for real tokens and 0 for padding tokens. Only real
-        # tokens are attended to.
-        input_mask = [1] * len(input_ids)
-        labels_mask = [1] * len(labels_ids)
-        # Zero-pad up to the sequence length.
-        while len(input_ids) < max_seq_len:
-            input_ids.append(0)
-            input_mask.append(0)
-            labels_ids.append(pad_idx)
-            labels_mask.append(0)
-            tok_map.append(-1)
-            if is_meta:
-                meta_tokens.append([0] * len(meta[0]))
-        # assert len(input_ids) == len(bert_labels_ids)
-        input_type_ids = [0] * len(input_ids)
-        # For joint model
-        cls_idx = None
-        if is_cls:
-            if cls not in cls2idx:
-                cls2idx[cls] = len(cls2idx)
-            cls_idx = cls2idx[cls]
-        if is_meta:
-            meta = meta_tokens
-        features.append(InputFeatures(
-            # Bert data
-            bert_tokens=bert_tokens,
-            input_ids=input_ids,
-            input_mask=input_mask,
-            input_type_ids=input_type_ids,
-            # Origin data
-            tokens=orig_tokens,
-            labels=labels,
-            labels_ids=labels_ids,
-            labels_mask=labels_mask,
-            tok_map=tok_map,
-            # Joint data
-            cls=cls,
-            cls_idx=cls_idx,
-            # Meta data
-            meta=meta
-        ))
-        assert len(input_ids) == len(input_mask)
-        assert len(input_ids) == len(input_type_ids)
-        assert len(input_ids) == len(labels_ids)
-        assert len(input_ids) == len(labels_mask)
-    if is_cls:
-        
-        return features, (label2idx, cls2idx)
-    return features, label2idx
-
-
-def get_bert_data_loaders(train, valid, vocab_file, batch_size=16, cuda=True, is_cls=False,
-                          do_lower_case=False, max_seq_len=424, is_meta=False):
-    train = pd.read_csv(train)
-    valid = pd.read_csv(valid)
-
-    cls2idx = None
-
-    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
-    train_f, label2idx = get_data(train, tokenizer, is_cls=is_cls, max_seq_len=max_seq_len, is_meta=is_meta)
-    if is_cls:
-        label2idx, cls2idx = label2idx
-    train_dl = DataLoaderForTrain(
-        train_f, batch_size=batch_size, shuffle=True, cuda=cuda)
-    valid_f, label2idx = get_data(
-        valid, tokenizer, label2idx, cls2idx=cls2idx, is_cls=is_cls, max_seq_len=max_seq_len, is_meta=is_meta)
-    if is_cls:
-        label2idx, cls2idx = label2idx
-    valid_dl = DataLoaderForTrain(
-        valid_f, batch_size=batch_size, cuda=cuda, shuffle=False)
-    if is_cls:
-        return train_dl, valid_dl, tokenizer, label2idx, max_seq_len, cls2idx
-    return train_dl, valid_dl, tokenizer, label2idx, max_seq_len
-
-
-def get_bert_data_loader_for_predict(path, learner):
-    df = pd.read_csv(path)
-    f, _ = get_data(df, tokenizer=learner.data.tokenizer,
-                    label2idx=learner.data.label2idx, cls2idx=learner.data.cls2idx,
-                    is_cls=learner.data.is_cls,
-                    max_seq_len=learner.data.max_seq_len, is_meta=learner.data.is_meta)
-    dl = DataLoaderForPredict(
-        f, batch_size=learner.data.batch_size, shuffle=False,
-        cuda=True)
-
-    return dl
-
-
-class BertNerData(object):
-
-    def __init__(self, train_dl, valid_dl, tokenizer, label2idx, max_seq_len=424,
-                 cls2idx=None, batch_size=16, cuda=True, is_meta=False):
-        self.train_dl = train_dl
-        self.valid_dl = valid_dl
-        self.tokenizer = tokenizer
-        self.label2idx = label2idx
-        self.cls2idx = cls2idx
-        self.batch_size = batch_size
-        self.is_meta = is_meta
-        self.cuda = cuda
-        self.id2label = sorted(label2idx.keys(), key=lambda x: label2idx[x])
-        self.is_cls = False
-        self.max_seq_len = max_seq_len
-        if cls2idx is not None:
-            self.is_cls = True
-            self.id2cls = sorted(cls2idx.keys(), key=lambda x: cls2idx[x])
-
-    @classmethod
-    def create(cls,
-               train_path, valid_path, vocab_file, batch_size=16, cuda=True, is_cls=False,
-               data_type="bert_cased", max_seq_len=424, is_meta=False):
-        if ipython_info():
-            global tqdm_notebook
-            tqdm_notebook = tqdm
-        if data_type == "bert_cased":
-            do_lower_case = False
-            fn = get_bert_data_loaders
-        elif data_type == "bert_uncased":
-            do_lower_case = True
-            fn = get_bert_data_loaders
-        else:
-            raise NotImplementedError("No requested mode :(.")
-        return cls(*fn(
-            train_path, valid_path, vocab_file, batch_size, cuda, is_cls, do_lower_case, max_seq_len, is_meta),
-                   batch_size=batch_size, cuda=cuda, is_meta=is_meta)
diff --git a/modules/data/conll2003/__init__.py b/modules/data/conll2003/__init__.py
new file mode 100644
index 0000000..ce3f34a
--- /dev/null
+++ b/modules/data/conll2003/__init__.py
@@ -0,0 +1,4 @@
+from .prc import conll2003_preprocess
+
+
+__all__ = ["conll2003_preprocess"]
diff --git a/modules/data/conll2003/prc.py b/modules/data/conll2003/prc.py
new file mode 100644
index 0000000..7065064
--- /dev/null
+++ b/modules/data/conll2003/prc.py
@@ -0,0 +1,67 @@
+import pandas as pd
+from modules import tqdm
+import argparse
+import codecs
+import os
+
+
+def conll2003_preprocess(
+        data_dir, train_name="eng.train", dev_name="eng.testa", test_name="eng.testb"):
+    train_f = read_data(os.path.join(data_dir, train_name))
+    dev_f = read_data(os.path.join(data_dir, dev_name))
+    test_f = read_data(os.path.join(data_dir, test_name))
+
+    train = pd.DataFrame({"labels": [x[0] for x in train_f], "text": [x[1] for x in train_f]})
+    train["clf"] = train["labels"].apply(lambda x: all([y.split("_")[0] == "O" for y in x.split()]))
+    train.to_csv(os.path.join(data_dir, "{}.train.csv".format(train_name)), index=False, sep="\t")
+
+    dev = pd.DataFrame({"labels": [x[0] for x in dev_f], "text": [x[1] for x in dev_f]})
+    dev["clf"] = dev["labels"].apply(lambda x: all([y.split("_")[0] == "O" for y in x.split()]))
+    dev.to_csv(os.path.join(data_dir, "{}.dev.csv".format(dev_name)), index=False, sep="\t")
+
+    test_ = pd.DataFrame({"labels": [x[0] for x in test_f], "text": [x[1] for x in test_f]})
+    test_["clf"] = test_["labels"].apply(lambda x: all([y.split("_")[0] == "O" for y in x.split()]))
+    test_.to_csv(os.path.join(data_dir, "{}.dev.csv".format(test_name)), index=False, sep="\t")
+
+
+def read_data(input_file):
+    """Reads a BIO data."""
+    with codecs.open(input_file, "r", encoding="utf-8") as f:
+        lines = []
+        words = []
+        labels = []
+        f_lines = f.readlines()
+        for line in tqdm(f_lines, total=len(f_lines), desc="Process {}".format(input_file)):
+            contends = line.strip()
+            word = line.strip().split(' ')[0]
+            label = line.strip().split(' ')[-1]
+            if contends.startswith("-DOCSTART-"):
+                words.append('')
+                continue
+
+            if len(contends) == 0 and not len(words):
+                words.append("")
+
+            if len(contends) == 0 and words[-1] == '.':
+                lbl = ' '.join([label for label in labels if len(label) > 0])
+                w = ' '.join([word for word in words if len(word) > 0])
+                lines.append([lbl, w])
+                words = []
+                labels = []
+                continue
+            words.append(word)
+            labels.append(label.replace("-", "_"))
+        return lines
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_dir', type=str)
+    parser.add_argument('--train_name', type=str, default="eng.train")
+    parser.add_argument('--dev_name', type=str, default="eng.testa")
+    parser.add_argument('--test_name', type=str, default="eng.testb")
+    return vars(parser.parse_args())
+
+
+if __name__ == "__main__":
+    conll2003_preprocess(**parse_args())
diff --git a/modules/data/elmo_data.py b/modules/data/elmo_data.py
deleted file mode 100644
index dc808ff..0000000
--- a/modules/data/elmo_data.py
+++ /dev/null
@@ -1,444 +0,0 @@
-import numpy as np
-import codecs
-import json
-from torch.utils.data import DataLoader
-import os
-import torch
-import pandas as pd
-from modules.utils.utils import ipython_info
-from tqdm._tqdm_notebook import tqdm_notebook
-from tqdm import tqdm
-
-
-def read_list(sents, max_chars=None):
-    """
-    read raw text file. The format of the input is like, one sentence per line
-    words are separated by '\t'
-    :param sents:
-    :param max_chars: int, the number of maximum characters in a word, this
-      parameter is used when the model is configured with CNN word encoder.
-    :return:
-    """
-    dataset = []
-    textset = []
-    for sent in sents:
-        data = ['<bos>']
-        text = []
-        for token in sent:
-            text.append(token)
-            if max_chars is not None and len(token) + 2 > max_chars:
-                token = token[:max_chars - 2]
-            data.append(token)
-        data.append('<eos>')
-        dataset.append(data)
-        textset.append(text)
-    return dataset, textset
-
-
-class InputFeatures(object):
-    """A single set of features of data."""
-
-    def __init__(
-            self,
-            # Elmo data
-            input_ids, char_ids,
-            # Origin data
-            tokens, labels, labels_ids, cls=None, cls_idx=None):
-        """
-        Data has the following structure.
-        data[0]: list, tokens ids
-        ...
-        data[-1]: list, labels ids
-        """
-        self.data = []
-        # Elmo data
-        self.input_ids = input_ids
-        self.data.append(input_ids)
-
-        # Origin data
-        self.tokens = tokens
-        self.labels = labels
-        self.char_ids = char_ids
-        if char_ids is not None:
-            self.data.append(char_ids)
-        # Used for joint model
-        self.cls = cls
-        self.cls_idx = cls_idx
-        if cls is not None:
-            self.data.append(cls_idx)
-        # Labels data
-
-        self.labels_ids = labels_ids
-        self.data.append(labels_ids)
-
-
-class DataLoaderForTrain(DataLoader):
-
-    def __init__(self, data_set, w_pad_id, c_pad_id, max_chars, cuda, **kwargs):
-        super(DataLoaderForTrain, self).__init__(
-            dataset=data_set,
-            collate_fn=self.collate_fn,
-            **kwargs
-        )
-        self.w_pad_id = w_pad_id
-        self.c_pad_id = c_pad_id
-        self.l_pad_id = 0
-        self.max_chars = max_chars
-        self.cuda = cuda
-
-    def collate_fn(self, data):
-        batch_size = len(data)
-        lens = [len(x.labels) for x in data]
-        max_len = max(lens)
-        sorted_idx = np.argsort(lens)[::-1]
-        # Words prc
-        batch_w = None
-        if data[0].input_ids is not None:
-            batch_w = torch.LongTensor(batch_size, max_len).fill_(self.w_pad_id)
-            for i, idx in enumerate(sorted_idx):
-                x_i = data[idx].input_ids
-                for j, x_ij in enumerate(x_i):
-                    batch_w[i][j] = x_ij
-            if self.cuda:
-                batch_w = batch_w.cuda()
-        # Chars prc
-        batch_c = None
-        if data[0].char_ids is not None:
-            batch_c = torch.LongTensor(batch_size, max_len, self.max_chars).fill_(self.c_pad_id)
-            for i, idx in enumerate(sorted_idx):
-                x_i = data[idx].char_ids
-                for j, x_ij in enumerate(x_i):
-                    for k, c in enumerate(x_ij):
-                        batch_c[i][j][k] = c
-            if self.cuda:
-                batch_c = batch_c.cuda()
-
-        # Masks prc
-        masks = [torch.LongTensor(batch_size, max_len).fill_(0), [], []]
-
-        for i, idx in enumerate(sorted_idx):
-            x_i = data[idx].input_ids
-            for j in range(len(x_i)):
-                masks[0][i][j] = 1
-                if j + 1 < len(x_i):
-                    masks[1].append(i * max_len + j)
-                if j > 0:
-                    masks[2].append(i * max_len + j)
-
-        assert len(masks[1]) <= batch_size * max_len
-        assert len(masks[2]) <= batch_size * max_len
-
-        masks[1] = torch.LongTensor(masks[1])
-        masks[2] = torch.LongTensor(masks[2])
-        if self.cuda:
-            masks[0] = masks[0].cuda()
-            masks[1] = masks[1].cuda()
-            masks[2] = masks[2].cuda()
-
-        # Labels prc
-        batch_l = torch.LongTensor(batch_size, max_len).fill_(self.l_pad_id)
-        for i, idx in enumerate(sorted_idx):
-            x_i = data[idx].labels_ids
-            for j, x_ij in enumerate(x_i):
-                batch_l[i][j] = x_ij
-        if self.cuda:
-            batch_l = batch_l.cuda()
-
-        if data[0].cls_idx is not None:
-            batch_cls = torch.LongTensor([data[idx].cls_idx for idx in sorted_idx])
-            if self.cuda:
-                batch_cls = batch_cls.cuda()
-            return batch_w, batch_c, masks, batch_cls, masks[0], batch_l
-        return batch_w, batch_c, masks, masks[0], batch_l
-
-
-def get_data(df, config, label2idx=None, oov='<oov>', pad='<pad>', cls2idx=None, is_cls=False,
-             word_lexicon=None, char_lexicon=None, max_seq_len=424):
-    if label2idx is None:
-        label2idx = {pad: 0, '<bos>': 1, '<eos>': 2}
-    features = []
-    if is_cls:
-        # Use joint model
-        if cls2idx is None:
-            cls2idx = dict()
-        zip_args = zip(df["1"].tolist(), df["0"].tolist(), df["2"].tolist())
-    else:
-        zip_args = zip(df["1"].tolist(), df["0"].tolist())
-    cls = None
-    total = len(df["0"].tolist())
-    for args in tqdm_notebook(enumerate(zip_args), total=total, leave=False):
-        if is_cls:
-            idx, (text, labels, cls) = args
-        else:
-            idx, (text, labels) = args
-        text = text.split()
-        text = text[:max_seq_len - 2]
-        labels = labels.split()[:max_seq_len - 2]
-        labels = ['<bos>'] + labels + ['<eos>']
-        if config['token_embedder']['name'].lower() == 'cnn':
-            tokens, text = read_list([text], config['token_embedder']['max_characters_per_token'])
-        else:
-            tokens, text = read_list([text])
-        tokens, text = tokens[0], text[0]
-        input_ids = None
-        if word_lexicon is not None:
-            oov_id, pad_id = word_lexicon.get(oov, None), word_lexicon.get(pad, None)
-            assert oov_id is not None and pad_id is not None
-            input_ids = [word_lexicon.get(x, oov_id) for x in tokens]
-        char_ids = None
-        # get a batch of character id whose size is (batch x max_len x max_chars)
-        if char_lexicon is not None:
-            char_ids = []
-            bow_id, eow_id, oov_id, pad_id = [char_lexicon.get(key, None) for key in ('<eow>', '<bow>', oov, pad)]
-
-            assert bow_id is not None and eow_id is not None and oov_id is not None and pad_id is not None
-
-            if config['token_embedder']['name'].lower() == 'cnn':
-                max_chars = config['token_embedder']['max_characters_per_token']
-                assert max([len(w) for w in tokens]) + 2 <= max_chars
-            elif config['token_embedder']['name'].lower() == 'lstm':
-                # counting the <bow> and <eow>
-                pass
-            else:
-                raise ValueError('Unknown token_embedder: {0}'.format(config['token_embedder']['name']))
-            for token in tokens:
-                chars = [bow_id]
-                if token == '<bos>' or token == '<eos>':
-                    chars.append(char_lexicon.get(token))
-                    chars.append(eow_id)
-                else:
-                    for c in token:
-                        chars.append(char_lexicon.get(c, oov_id))
-                    chars.append(eow_id)
-                char_ids.append(chars)
-
-        for l in labels:
-            if l not in label2idx:
-                label2idx[l] = len(label2idx)
-        labels_ids = [label2idx[l] for l in labels]
-        # For joint model
-        cls_idx = None
-        if is_cls:
-            if cls not in cls2idx:
-                cls2idx[cls] = len(cls2idx)
-            cls_idx = cls2idx[cls]
-        features.append(InputFeatures(input_ids, char_ids, tokens, labels, labels_ids, cls=cls, cls_idx=cls_idx))
-    if is_cls:
-        return features, (label2idx, cls2idx)
-    return features, label2idx
-
-
-def get_elmo_data_loaders(train, valid, model_dir, config_name, batch_size, cuda, is_cls,
-                          oov='<oov>', pad='<pad>'):
-    train = pd.read_csv(train)
-    valid = pd.read_csv(valid)
-    with open(os.path.join(model_dir, config_name), 'r') as fin:
-        config = json.load(fin)
-    c_pad_id = None
-    char_lexicon = None
-    # For the model trained with character-based word encoder.
-    if config['token_embedder']['char_dim'] > 0:
-        char_lexicon = {}
-        with codecs.open(os.path.join(model_dir, 'char.dic'), 'r', encoding='utf-8') as fpi:
-            for line in fpi:
-                tokens = line.strip().split('\t')
-                if len(tokens) == 1:
-                    tokens.insert(0, '\u3000')
-                token, i = tokens
-                char_lexicon[token] = int(i)
-        c_pad_id = char_lexicon.get(pad)
-    w_pad_id = None
-    word_lexicon = None
-    # For the model trained with word form word encoder.
-    if config['token_embedder']['word_dim'] > 0:
-        word_lexicon = {}
-        with codecs.open(os.path.join(model_dir, 'word.dic'), 'r', encoding='utf-8') as fpi:
-            for line in fpi:
-                tokens = line.strip().split('\t')
-                if len(tokens) == 1:
-                    tokens.insert(0, '\u3000')
-                token, i = tokens
-                word_lexicon[token] = int(i)
-        w_pad_id = word_lexicon.get(pad)
-
-    max_chars = None
-    if config['token_embedder']['name'].lower() == 'cnn':
-        max_chars = config['token_embedder']['max_characters_per_token']
-    elif config['token_embedder']['name'].lower() == 'lstm':
-        # counting the <bow> and <eow>
-        pass
-    else:
-        raise ValueError('Unknown token_embedder: {0}'.format(config['token_embedder']['name']))
-
-    # Get train dataset
-    train_f, label2idx = get_data(
-        train, config, oov=oov, pad=pad, is_cls=is_cls, word_lexicon=word_lexicon, char_lexicon=char_lexicon)
-    cls2idx = None
-    if is_cls:
-        label2idx, cls2idx = label2idx
-    # Get train dataloader
-    train_dl = DataLoaderForTrain(
-        train_f, w_pad_id, c_pad_id, max_chars, batch_size=batch_size, shuffle=True, cuda=cuda)
-
-    # Get valid dataset
-    valid_f, label2idx = get_data(
-        valid, config, oov=oov, pad=pad, is_cls=is_cls, cls2idx=cls2idx,
-        word_lexicon=word_lexicon, char_lexicon=char_lexicon)
-    cls2idx = None
-    if is_cls:
-        label2idx, cls2idx = label2idx
-    # Get valid dataloader
-    valid_dl = DataLoaderForTrain(
-        valid_f, w_pad_id, c_pad_id, max_chars, batch_size=batch_size, shuffle=False, cuda=cuda)
-    if is_cls:
-        return train_dl, valid_dl, label2idx, word_lexicon, char_lexicon, cls2idx
-    return train_dl, valid_dl, label2idx, word_lexicon, char_lexicon
-
-
-class DataLoaderForPredict(DataLoader):
-
-    def __init__(self, data_set, w_pad_id, c_pad_id, max_chars, cuda, **kwargs):
-        super(DataLoaderForPredict, self).__init__(
-            dataset=data_set,
-            collate_fn=self.collate_fn,
-            **kwargs
-        )
-        self.w_pad_id = w_pad_id
-        self.c_pad_id = c_pad_id
-        self.l_pad_id = 0
-        self.max_chars = max_chars
-        self.cuda = cuda
-
-    def collate_fn(self, data):
-        batch_size = len(data)
-        lens = [len(x.labels) for x in data]
-        max_len = max(lens)
-        sorted_idx = np.argsort(lens)[::-1]
-        # Words prc
-        batch_w = None
-        if data[0].input_ids is not None:
-            batch_w = torch.LongTensor(batch_size, max_len).fill_(self.w_pad_id)
-            for i, idx in enumerate(sorted_idx):
-                x_i = data[idx].input_ids
-                for j, x_ij in enumerate(x_i):
-                    batch_w[i][j] = x_ij
-            if self.cuda:
-                batch_w = batch_w.cuda()
-        # Chars prc
-        batch_c = None
-        if data[0].char_ids is not None:
-            batch_c = torch.LongTensor(batch_size, max_len, self.max_chars).fill_(self.c_pad_id)
-            for i, idx in enumerate(sorted_idx):
-                x_i = data[idx].char_ids
-                for j, x_ij in enumerate(x_i):
-                    for k, c in enumerate(x_ij):
-                        batch_c[i][j][k] = c
-            if self.cuda:
-                batch_c = batch_c.cuda()
-
-        # Masks prc
-        masks = [torch.LongTensor(batch_size, max_len).fill_(0), [], []]
-
-        for i, idx in enumerate(sorted_idx):
-            x_i = data[idx].input_ids
-            for j in range(len(x_i)):
-                masks[0][i][j] = 1
-                if j + 1 < len(x_i):
-                    masks[1].append(i * max_len + j)
-                if j > 0:
-                    masks[2].append(i * max_len + j)
-
-        assert len(masks[1]) <= batch_size * max_len
-        assert len(masks[2]) <= batch_size * max_len
-
-        masks[1] = torch.LongTensor(masks[1])
-        masks[2] = torch.LongTensor(masks[2])
-        if self.cuda:
-            masks[0] = masks[0].cuda()
-            masks[1] = masks[1].cuda()
-            masks[2] = masks[2].cuda()
-
-        # Labels prc
-        batch_l = torch.LongTensor(batch_size, max_len).fill_(self.l_pad_id)
-        for i, idx in enumerate(sorted_idx):
-            x_i = data[idx].labels_ids
-            for j, x_ij in enumerate(x_i):
-                batch_l[i][j] = x_ij
-        sorted_idx = torch.LongTensor(list(sorted_idx))
-        if self.cuda:
-            batch_l = batch_l.cuda()
-            sorted_idx = sorted_idx.cuda()
-
-        if data[0].cls_idx is not None:
-            batch_cls = torch.LongTensor([data[idx].cls_idx for idx in sorted_idx])
-            if self.cuda:
-                batch_cls = batch_cls.cuda()
-            return batch_w, batch_c, masks, batch_cls, masks[0], batch_l
-        return (batch_w, batch_c, masks, masks[0], batch_l), sorted_idx
-
-
-def get_elmo_data_loader_for_predict(
-        valid, learner, oov='<oov>', pad='<pad>'):
-    valid = pd.read_csv(valid)
-    c_pad_id = None
-    char_lexicon = learner.data.char2idx
-    # For the model trained with character-based word encoder.
-    if char_lexicon is not None:
-        c_pad_id = char_lexicon.get(pad)
-    w_pad_id = None
-    word_lexicon = learner.data.word2idx
-    # For the model trained with word form word encoder.
-    if word_lexicon is not None:
-        w_pad_id = word_lexicon.get(pad)
-
-    max_chars = learner.data.train_dl.max_chars
-    cls2idx = learner.data.cls2idx
-    config = learner.model.encoder.embeddings.config
-    is_cls = learner.data.is_cls
-    cuda = learner.data.cuda
-    batch_size = learner.data.batch_size
-
-    # Get valid dataset
-    valid_f, label2idx = get_data(
-        valid, config, oov=oov, pad=pad, is_cls=is_cls, cls2idx=cls2idx,
-        word_lexicon=word_lexicon, char_lexicon=char_lexicon)
-    # Get valid dataloader
-    valid_dl = DataLoaderForPredict(
-        valid_f, w_pad_id, c_pad_id, max_chars, batch_size=batch_size, shuffle=False, cuda=cuda)
-    return valid_dl
-
-
-class ElmoNerData(object):
-
-    def __init__(self, train_dl, valid_dl, label2idx,
-                 word2idx=None, char2idx=None,
-                 cls2idx=None, batch_size=16, cuda=True):
-        self.train_dl = train_dl
-        self.valid_dl = valid_dl
-        self.label2idx = label2idx
-        self.word2idx = word2idx
-        self.char2idx = char2idx
-        self.cls2idx = cls2idx
-        self.batch_size = batch_size
-        self.cuda = cuda
-        self.id2label = sorted(label2idx.keys(), key=lambda x: label2idx[x])
-        if word2idx is not None:
-            self.idx2word = sorted(word2idx.keys(), key=lambda x: word2idx[x])
-        if char2idx is not None:
-            self.idx2char = sorted(char2idx.keys(), key=lambda x: char2idx[x])
-        self.is_cls = False
-        if cls2idx is not None:
-            self.is_cls = True
-            self.id2cls = sorted(cls2idx.keys(), key=lambda x: cls2idx[x])
-
-    @classmethod
-    def create(cls,
-               train_path, valid_path, model_dir, config_name, batch_size=16, cuda=True, is_cls=False,
-               oov='<oov>', pad='<pad>'):
-        if ipython_info():
-            global tqdm_notebook
-            tqdm_notebook = tqdm
-        fn = get_elmo_data_loaders
-        return cls(*fn(
-            train_path, valid_path, model_dir, config_name, batch_size, cuda, is_cls, oov, pad),
-                   batch_size=batch_size, cuda=cuda)
diff --git a/modules/data/fre/__init__.py b/modules/data/fre/__init__.py
new file mode 100644
index 0000000..3226aa2
--- /dev/null
+++ b/modules/data/fre/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+from .reader import Reader as FREReader
+from .prc import fact_ru_eval_preprocess
+
+__all__ = ["FREReader", "fact_ru_eval_preprocess"]
diff --git a/modules/data/fre/bilou/__init__.py b/modules/data/fre/bilou/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/modules/data/fre/bilou/__init__.py
@@ -0,0 +1 @@
+
diff --git a/modules/data/fre/bilou/from_bilou.py b/modules/data/fre/bilou/from_bilou.py
new file mode 100644
index 0000000..876ea9a
--- /dev/null
+++ b/modules/data/fre/bilou/from_bilou.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+
+def untag(list_of_tags, list_of_tokens):
+    """
+    :param list_of_tags:
+    :param list_of_tokens:
+    :return:
+    """
+    if len(list_of_tags) == len(list_of_tokens):
+        dict_of_final_ne = {}
+        ne_words = []
+        ne_tag = None
+
+        for index in range(len(list_of_tokens)):
+            if not ((ne_tag is not None) ^ (ne_words != [])):
+                current_tag = list_of_tags[index]
+                current_token = list_of_tokens[index]
+
+                if current_tag.startswith('B') or current_tag.startswith('I'):
+                    dict_of_final_ne, ne_words, ne_tag = __check_bi(
+                        dict_of_final_ne, ne_words, ne_tag, current_tag, current_token)
+                elif current_tag.startswith('L'):
+                    dict_of_final_ne, ne_words, ne_tag = __check_l(
+                        dict_of_final_ne, ne_words, ne_tag, current_tag, current_token)
+                elif current_tag.startswith('O'):
+                    dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag)
+
+                elif current_tag.startswith('U'):
+                    dict_of_final_ne, ne_words, ne_tag = __check_u(dict_of_final_ne, ne_words, ne_tag, current_tag,
+                                                                   current_token)
+                else:
+                    raise ValueError("tag contains no BILOU tags")
+            else:
+                if ne_tag is None:
+                    raise Exception('Somehow ne_tag is None and ne_words is not None')
+                else:
+                    raise Exception('Somehow ne_words is None and ne_tag is not None')
+
+        dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag)
+        return __to_output_format(dict_of_final_ne)
+    else:
+        raise ValueError('lengths are not equal')
+
+
+def __check_bi(dict_of_final_ne, ne_words, ne_tag, current_tag, current_token):
+    if ne_tag is None and ne_words == []:
+        ne_tag = current_tag[1:]
+        ne_words = [current_token]
+    else:
+        if current_tag.startswith('I') and ne_tag == current_tag[1:]:
+            ne_words.append(current_token)
+        else:
+            dict_of_final_ne, ne_words, ne_tag = __replace_by_new(dict_of_final_ne, ne_words, ne_tag, current_tag,
+                                                                  current_token)
+    return dict_of_final_ne, ne_words, ne_tag
+
+
+def __check_l(dict_of_final_ne, ne_words, ne_tag, current_tag, current_token):
+    if ne_tag == current_tag[1:]:
+        dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words+[current_token], ne_tag)
+    else:
+        dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag)
+        dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, [current_token], current_tag[1:])
+    return dict_of_final_ne, ne_words, ne_tag
+
+
+def __check_u(dict_of_final_ne, ne_words, ne_tag, current_tag, current_token):
+    dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag)
+    return __finish_ne_if_required(dict_of_final_ne, [current_token], current_tag[1:])
+
+
+def __replace_by_new(dict_of_final_ne, ne_words, ne_tag, current_tag, current_token):
+    dict_of_final_ne, ne_words, ne_tag = __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag)
+    ne_tag = current_tag[1:]
+    ne_words = [current_token]
+    return dict_of_final_ne, ne_words, ne_tag
+
+
+def __finish_ne_if_required(dict_of_final_ne, ne_words, ne_tag):
+    if ne_tag is not None and ne_words != []:
+        dict_of_final_ne[tuple(ne_words)] = ne_tag
+        ne_tag = None
+        ne_words = []
+    return dict_of_final_ne, ne_words, ne_tag
+
+
+def __to_output_format(dict_nes):
+    """
+    :param dict_nes:
+    :return:
+    """
+    list_of_results_for_output = []
+
+    for tokens_tuple, tag in dict_nes.items():
+        position = int(tokens_tuple[0].get_position())
+        length = int(tokens_tuple[-1].get_position()) + int(tokens_tuple[-1].get_length()) - position
+        list_of_results_for_output.append([tag, position, length])
+
+    return list_of_results_for_output
diff --git a/modules/data/fre/bilou/to_bilou.py b/modules/data/fre/bilou/to_bilou.py
new file mode 100644
index 0000000..762bca2
--- /dev/null
+++ b/modules/data/fre/bilou/to_bilou.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+from ..entity.taggedtoken import TaggedToken
+
+
+def get_tagged_tokens_from(dict_of_nes, token_list):
+    list_of_tagged_tokens = [TaggedToken('O', token_list[i]) for i in range(len(token_list))]
+    dict_of_tokens_with_indexes = {token_list[i].id: i for i in range(len(token_list))}
+
+    for ne in dict_of_nes.values():
+        for tokenid in ne['tokens_list']:
+            try:
+                tag = format_tag(tokenid, ne)
+            except ValueError:
+                tag = "O"
+            id_in_token_tuple = dict_of_tokens_with_indexes[tokenid]
+            token = token_list[id_in_token_tuple]
+            list_of_tagged_tokens[id_in_token_tuple] = TaggedToken(tag, token)
+    return list_of_tagged_tokens
+
+
+def format_tag(tokenid, ne):
+    bilou = __choose_bilou_tag_for(tokenid, ne['tokens_list'])
+    formatted_tag = __tag_to_fact_ru_eval_format(ne['tag'])
+    return "{}_{}".format(bilou, formatted_tag)
+
+
+def __choose_bilou_tag_for(token_id, token_list):
+    if len(token_list) == 1:
+        return 'B'
+    elif len(token_list) > 1:
+        if token_list.index(token_id) == 0:
+            return 'B'
+        else:
+            return 'I'
+
+
+def __tag_to_fact_ru_eval_format(tag):
+    if tag == 'Person':
+        return 'PER'
+    elif tag == 'Org':
+        return 'ORG'
+    elif tag == 'Location':
+        return 'LOC'
+    elif tag == 'LocOrg':
+        return 'LOC'
+    elif tag == 'Project':
+        return 'ORG'
+    else:
+        raise ValueError('tag ' + tag + " is not the right tag")
diff --git a/modules/data/fre/entity/__init__.py b/modules/data/fre/entity/__init__.py
new file mode 100644
index 0000000..40a96af
--- /dev/null
+++ b/modules/data/fre/entity/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/modules/data/fre/entity/document.py b/modules/data/fre/entity/document.py
new file mode 100644
index 0000000..441fbfa
--- /dev/null
+++ b/modules/data/fre/entity/document.py
@@ -0,0 +1,164 @@
+import codecs
+from .token import Token
+from .taggedtoken import TaggedToken
+from collections import defaultdict
+from ..bilou import to_bilou
+
+
+class Document(object):
+    def __init__(self, path, tagged=True, encoding="utf-8"):
+        self.path = path
+        self.tagged = tagged
+        self.encoding = encoding
+        self.tokens = []
+        self.tagged_tokens = []
+        self.load()
+
+    def to_text_tokens(self):
+        return [token.text for token in self.tokens]
+
+    def get_tags(self):
+        return [token.get_tag() for token in self.tagged_tokens]
+
+    def load(self):
+        self.tokens = self.__get_tokens_from_file()
+        if self.tagged:
+            self.tagged_tokens = self.__get_tagged_tokens_from()
+        else:
+            self.tagged_tokens = [TaggedToken(None, token) for token in self.tokens]
+        return self
+
+    def parse_file(self, path):
+        with codecs.open(path, 'r', encoding=self.encoding, errors="ignore") as file:
+            rows = file.read().split('\n')
+        return [row.split(' # ')[0].split() for row in rows if len(row) != 0]
+
+    def __get_tokens_from_file(self):
+        rows = self.parse_file(self.path + '.tokens')
+        tokens = []
+        for token_str in rows:
+            tokens.append(Token().from_sting(token_str))
+        return tokens
+
+    def __get_tagged_tokens_from(self):
+        span_dict = self.__span_id2token_ids(self.path + '.spans', [token.id for token in self.tokens])
+        object_dict = self.__to_dict_of_objects(self.path + '.objects')
+        dict_of_nes = self.__merge(object_dict, span_dict, self.tokens)
+        return to_bilou.get_tagged_tokens_from(dict_of_nes, self.tokens)
+
+    def __span_id2token_ids(self, span_file, token_ids):
+        span_list = self.parse_file(span_file)
+        dict_of_spans = {}
+        for span in span_list:
+            span_id = span[0]
+            span_start = span[4]
+            span_length_in_tokens = int(span[5])
+            list_of_token_of_spans = self.__find_tokens_for(span_start, span_length_in_tokens, token_ids)
+            dict_of_spans[span_id] = list_of_token_of_spans
+        return dict_of_spans
+
+    @staticmethod
+    def __find_tokens_for(start, length, token_ids):
+        list_of_tokens = []
+        index = token_ids.index(start)
+        for i in range(length):
+            list_of_tokens.append(token_ids[index + i])
+        return list_of_tokens
+
+    def __to_dict_of_objects(self, object_file):
+        object_list = self.parse_file(object_file)
+        dict_of_objects = {}
+        for obj in object_list:
+            object_id = obj[0]
+            object_tag = obj[1]
+            object_spans = obj[2:]
+            dict_of_objects[object_id] = {'tag': object_tag, 'spans': object_spans}
+        return dict_of_objects
+
+    def __merge(self, object_dict, span_dict, tokens):
+        ne_dict = self.__get_dict_of_nes(object_dict, span_dict)
+        return self.__clean(ne_dict, tokens)
+
+    @staticmethod
+    def __get_dict_of_nes(object_dict, span_dict):
+        ne_dict = defaultdict(set)
+        for obj_id, obj_values in object_dict.items():
+            for span in obj_values['spans']:
+                ne_dict[(obj_id, obj_values['tag'])].update(span_dict[span])
+        for ne in ne_dict:
+            ne_dict[ne] = sorted(list(set([int(i) for i in ne_dict[ne]])))
+        return ne_dict
+
+    def __clean(self, ne_dict, tokens):
+        sorted_nes = sorted(ne_dict.items(), key=self.__sort_by_tokens)
+        dict_of_tokens_by_id = {}
+        for i in range(len(tokens)):
+            dict_of_tokens_by_id[tokens[i].id] = i
+        result_nes = {}
+        if len(sorted_nes) != 0:
+            start_ne = sorted_nes[0]
+            for ne in sorted_nes:
+                if self.__not_intersect(start_ne[1], ne[1]):
+                    result_nes[start_ne[0][0]] = {
+                        'tokens_list': self.__check_order(start_ne[1], dict_of_tokens_by_id, tokens),
+                        'tag': start_ne[0][1]}
+                    start_ne = ne
+                else:
+                    result_tokens_list = self.__check_normal_form(start_ne[1], ne[1])
+                    start_ne = (start_ne[0], result_tokens_list)
+            result_nes[start_ne[0][0]] = {
+                'tokens_list': self.__check_order(start_ne[1], dict_of_tokens_by_id, tokens),
+                'tag': start_ne[0][1]}
+        return result_nes
+
+    @staticmethod
+    def __sort_by_tokens(tokens):
+        ids_as_int = [int(token_id) for token_id in tokens[1]]
+        return min(ids_as_int), -max(ids_as_int)
+
+    @staticmethod
+    def __not_intersect(start_ne, current_ne):
+        intersection = set.intersection(set(start_ne), set(current_ne))
+        return intersection == set()
+
+    def __check_normal_form(self, start_ne, ne):
+        all_tokens = set.union(set(start_ne), set(ne))
+        return self.__find_all_range_of_tokens(all_tokens)
+
+    @staticmethod
+    def __find_all_range_of_tokens(tokens):
+        tokens = sorted(tokens)
+        if (tokens[-1] - tokens[0] - len(tokens)) < 5:
+            return list(range(tokens[0], tokens[-1] + 1))
+        else:
+            return tokens
+
+    def __check_order(self, list_of_tokens, dict_of_tokens_by_id, tokens):
+        list_of_tokens = [str(i) for i in self.__find_all_range_of_tokens(list_of_tokens)]
+        result = []
+        for token in list_of_tokens:
+            if token in dict_of_tokens_by_id:
+                result.append((token, dict_of_tokens_by_id[token]))
+        result = sorted(result, key=self.__sort_by_position)
+        result = self.__add_quotation_marks(result, tokens)
+        return [r[0] for r in result]
+
+    @staticmethod
+    def __sort_by_position(result_tuple):
+        return result_tuple[1]
+
+    @staticmethod
+    def __add_quotation_marks(result, tokens):
+        result_tokens_texts = [tokens[token[1]].text for token in result]
+        prev_pos = result[0][1] - 1
+        next_pos = result[-1][1] + 1
+
+        if prev_pos >= 0 and tokens[prev_pos].text == '«' \
+                and '»' in result_tokens_texts and '«' not in result_tokens_texts:
+            result = [(tokens[prev_pos].id, prev_pos)] + result
+
+        if next_pos < len(tokens) and tokens[next_pos].text == '»' \
+                and '«' in result_tokens_texts and '»' not in result_tokens_texts:
+            result = result + [(tokens[next_pos].id, next_pos)]
+
+        return result
diff --git a/modules/data/fre/entity/taggedtoken.py b/modules/data/fre/entity/taggedtoken.py
new file mode 100644
index 0000000..af64c6b
--- /dev/null
+++ b/modules/data/fre/entity/taggedtoken.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+
+class TaggedToken(object):
+
+    @property
+    def text(self):
+        return self.__token.text
+
+    def __init__(self, tag, token):
+        self.__tag = tag
+        self.__token = token
+
+    def get_token(self):
+        return self.__token
+
+    def get_tag(self):
+        return self.__tag
+
+    def __repr__(self):
+        if self.__tag:
+            return "<" + self.__tag + "_" + str(self.__token) + ">"
+        else:
+            return "<None_" + str(self.__token) + ">"
diff --git a/modules/data/fre/entity/token.py b/modules/data/fre/entity/token.py
new file mode 100644
index 0000000..fb59212
--- /dev/null
+++ b/modules/data/fre/entity/token.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+
+class Token(object):
+    __token_id__ = 0
+
+    @property
+    def length(self):
+        return self.__length
+
+    @property
+    def position(self):
+        return self.__position
+
+    @property
+    def id(self):
+        return self.__id
+
+    @property
+    def text(self):
+        return self.__text
+
+    @property
+    def all(self):
+        return self.__id, self.__position, self.__length, self.__text
+
+    @property
+    def tag(self):
+        return self.tag
+
+    def __init__(self, token_id=None, position=None, length=None, text=None):
+        self.__id = token_id
+        if token_id is None:
+            self.__id = Token.__token_id__
+            Token.__token_id__ += 1
+        self.__position = position
+        self.__length = length
+        self.__text = text
+        self.__tag = None
+
+    def from_sting(self, string):
+        self.__id, self.__position, self.__length, self.__text = string
+        return self
+
+    def __len__(self):
+        return self.__length
+
+    def __str__(self):
+        return self.__text
+
+    def __repr__(self):
+        return "<<" + self.__id + "_" + self.__text + ">>"
diff --git a/modules/data/fre/prc.py b/modules/data/fre/prc.py
new file mode 100644
index 0000000..f94b514
--- /dev/null
+++ b/modules/data/fre/prc.py
@@ -0,0 +1,45 @@
+from modules.data.fre.reader import Reader
+import pandas as pd
+from modules import tqdm
+import argparse
+
+
+def fact_ru_eval_preprocess(dev_dir, test_dir, dev_df_path, test_df_path):
+    dev_reader = Reader(dev_dir)
+    dev_reader.read_dir()
+    dev_texts, dev_tags = dev_reader.split()
+    res_tags = []
+    res_tokens = []
+    for tag, tokens in tqdm(zip(dev_tags, dev_texts), total=len(dev_tags), desc="Process FactRuEval2016 dev set."):
+        if len(tag):
+            res_tags.append(tag)
+            res_tokens.append(tokens)
+    dev = pd.DataFrame({"labels": list(map(" ".join, res_tags)), "text": list(map(" ".join, res_tokens))})
+    dev["clf"] = dev["labels"].apply(lambda x: all([y.split("_")[0] == "O" for y in x.split()]))
+    dev.to_csv(dev_df_path, index=False, sep="\t")
+
+    test_reader = Reader(test_dir)
+    test_reader.read_dir()
+    test_texts, test_tags = test_reader.split()
+    res_tags = []
+    res_tokens = []
+    for tag, tokens in tqdm(zip(test_tags, test_texts), total=len(test_tags), desc="Process FactRuEval2016 test set."):
+        if len(tag):
+            res_tags.append(tag)
+            res_tokens.append(tokens)
+    valid = pd.DataFrame({"labels": list(map(" ".join, res_tags)), "text": list(map(" ".join, res_tokens))})
+    valid["clf"] = valid["labels"].apply(lambda x: all([y.split("_")[0] == "O" for y in x.split()]))
+    valid.to_csv(test_df_path, index=False, sep="\t")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-dd', '--dev_dir', type=str)
+    parser.add_argument('-td', '--test_dir', type=str)
+    parser.add_argument('-ddp', '--dev_df_path', type=str)
+    parser.add_argument('-tdp', '--test_df_path', type=str)
+    return vars(parser.parse_args())
+
+
+if __name__ == "__main__":
+    fact_ru_eval_preprocess(**parse_args())
diff --git a/modules/data/fre/reader.py b/modules/data/fre/reader.py
new file mode 100644
index 0000000..e2cb893
--- /dev/null
+++ b/modules/data/fre/reader.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+import pandas as pd
+from .utils import get_file_names
+from .entity.document import Document
+
+
+class Reader(object):
+
+    def __init__(self,
+                 dir_path,
+                 document_creator=Document,
+                 get_file_names_=get_file_names,
+                 tagged=True):
+        self.path = dir_path
+        self.tagged = tagged
+        self.documents = []
+        self.document_creator = document_creator
+        self.get_file_names = get_file_names_
+
+    def split(self, use_morph=False):
+        res_texts = []
+        res_tags = []
+        for doc in self.documents:
+            sent_tokens = []
+            sent_tags = []
+            for token in doc.tagged_tokens:
+                if token.get_tag() == "O" and token.text == ".":
+                    res_texts.append(tuple(sent_tokens))
+                    res_tags.append(tuple(sent_tags))
+                    sent_tokens = []
+                    sent_tags = []
+                else:
+                    text = token.text
+                    sent_tokens.append(text)
+                    sent_tags.append(token.get_tag())
+        if use_morph:
+            return res_texts, res_tags
+        return res_texts, res_tags
+
+    def to_data_frame(self, split=False):
+        if split:
+            docs = self.split()
+        else:
+            docs = []
+            for doc in self.documents:
+                docs.append([(token.text, token.get_tag()) for token in doc.tagged_tokens])
+
+        texts = []
+        tags = []
+        for sent in docs:
+            sample_text = []
+            sample_tag = []
+            for text, tag in sent:
+                sample_text.append(text)
+                sample_tag.append(tag)
+            texts.append(" ".join(sample_text))
+            tags.append(" ".join(sample_tag))
+        return pd.DataFrame({"texts": texts, "tags": tags}, columns=["texts", "tags"])
+
+    def read_dir(self):
+        for path in self.get_file_names(self.path):
+            self.documents.append(self.document_creator(path, self.tagged))
+
+    def get_text_tokens(self):
+        return [doc.to_text_tokens() for doc in self.documents]
+
+    def get_text_tags(self):
+        return [doc.get_tags() for doc in self.documents]
diff --git a/modules/data/fre/utils.py b/modules/data/fre/utils.py
new file mode 100644
index 0000000..029ccf9
--- /dev/null
+++ b/modules/data/fre/utils.py
@@ -0,0 +1,10 @@
+import os
+
+
+def get_file_names(path):
+    res = []
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            if file.endswith('.tokens'):
+                res.append(os.path.join(root, os.path.splitext(file)[0]))
+    return res
diff --git a/modules/data/tokenization.py b/modules/data/tokenization.py
deleted file mode 100644
index f1a37b4..0000000
--- a/modules/data/tokenization.py
+++ /dev/null
@@ -1,291 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tokenization classes."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import unicodedata
-import six
-
-
-def convert_to_unicode(text):
-    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text.decode("utf-8", "ignore")
-        elif isinstance(text, unicode):
-            return text
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-
-
-def printable_text(text):
-    """Returns text encoded in a way suitable for print or `tf.logging`."""
-
-    # These functions want `str` for both Python2 and Python3, but in one case
-    # it's a Unicode string and in the other it's a byte string.
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, unicode):
-            return text.encode("utf-8")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-
-
-def load_vocab(vocab_file):
-    """Loads a vocabulary file into a dictionary."""
-    vocab = collections.OrderedDict()
-    index = 0
-    with open(vocab_file, "r") as reader:
-        while True:
-            token = convert_to_unicode(reader.readline())
-            if not token:
-                break
-            token = token.strip()
-            vocab[token] = index
-            index += 1
-    return vocab
-
-
-def convert_tokens_to_ids(vocab, tokens):
-    """Converts a sequence of tokens into ids using the vocab."""
-    ids = []
-    for token in tokens:
-        ids.append(vocab[token])
-    return ids
-
-
-def whitespace_tokenize(text):
-    """Runs basic whitespace cleaning and splitting on a peice of text."""
-    text = text.strip()
-    if not text:
-        return []
-    tokens = text.split()
-    return tokens
-
-
-class FullTokenizer(object):
-    """Runs end-to-end tokenziation."""
-
-    def __init__(self, vocab_file, do_lower_case=True):
-        self.vocab = load_vocab(vocab_file)
-        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-
-    def tokenize(self, text):
-        split_tokens = []
-        for token in self.basic_tokenizer.tokenize(text):
-            for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                split_tokens.append(sub_token)
-
-        return split_tokens
-
-    def convert_tokens_to_ids(self, tokens):
-        return convert_tokens_to_ids(self.vocab, tokens)
-
-
-class BasicTokenizer(object):
-    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
-
-    def __init__(self, do_lower_case=True):
-        """Constructs a BasicTokenizer.
-
-        Args:
-          do_lower_case: Whether to lower case the input.
-        """
-        self.do_lower_case = do_lower_case
-
-    def tokenize(self, text):
-        """Tokenizes a piece of text."""
-        text = convert_to_unicode(text)
-        text = self._clean_text(text)
-        orig_tokens = whitespace_tokenize(text)
-        split_tokens = []
-        for token in orig_tokens:
-            if self.do_lower_case:
-                token = token.lower()
-                token = self._run_strip_accents(token)
-            split_tokens.extend(self._run_split_on_punc(token))
-
-        output_tokens = whitespace_tokenize(" ".join(split_tokens))
-        return output_tokens
-
-    def _run_strip_accents(self, text):
-        """Strips accents from a piece of text."""
-        text = unicodedata.normalize("NFD", text)
-        output = []
-        for char in text:
-            cat = unicodedata.category(char)
-            if cat == "Mn":
-                continue
-            output.append(char)
-        return "".join(output)
-
-    def _run_split_on_punc(self, text):
-        """Splits punctuation on a piece of text."""
-        chars = list(text)
-        i = 0
-        start_new_word = True
-        output = []
-        while i < len(chars):
-            char = chars[i]
-            if _is_punctuation(char):
-                output.append([char])
-                start_new_word = True
-            else:
-                if start_new_word:
-                    output.append([])
-                start_new_word = False
-                output[-1].append(char)
-            i += 1
-
-        return ["".join(x) for x in output]
-
-    def _clean_text(self, text):
-        """Performs invalid character removal and whitespace cleanup on text."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if cp == 0 or cp == 0xfffd or _is_control(char):
-                continue
-            if _is_whitespace(char):
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-
-
-class WordpieceTokenizer(object):
-    """Runs WordPiece tokenization."""
-
-    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
-        self.vocab = vocab
-        self.unk_token = unk_token
-        self.max_input_chars_per_word = max_input_chars_per_word
-
-    def tokenize(self, text):
-        """Tokenizes a piece of text into its word pieces.
-
-        This uses a greedy longest-match-first algorithm to perform tokenization
-        using the given vocabulary.
-
-        For example:
-          input = "unaffable"
-          output = ["un", "##aff", "##able"]
-
-        Args:
-          text: A single token or whitespace separated tokens. This should have
-            already been passed through `BasicTokenizer.
-
-        Returns:
-          A list of wordpiece tokens.
-        """
-
-        text = convert_to_unicode(text)
-
-        output_tokens = []
-        for token in whitespace_tokenize(text):
-            chars = list(token)
-            if len(chars) > self.max_input_chars_per_word:
-                output_tokens.append(self.unk_token)
-                continue
-
-            is_bad = False
-            start = 0
-            sub_tokens = []
-            while start < len(chars):
-                end = len(chars)
-                cur_substr = None
-                while start < end:
-                    substr = "".join(chars[start:end])
-                    if start > 0:
-                        substr = "##" + substr
-                    if substr in self.vocab:
-                        cur_substr = substr
-                        break
-                    end -= 1
-                if cur_substr is None:
-                    is_bad = True
-                    break
-                sub_tokens.append(cur_substr)
-                start = end
-
-            if is_bad:
-                output_tokens.append(self.unk_token)
-            else:
-                output_tokens.extend(sub_tokens)
-        return output_tokens
-
-
-def _is_whitespace(char):
-    """Checks whether `chars` is a whitespace character."""
-    # \t, \n, and \r are technically contorl characters but we treat them
-    # as whitespace since they are generally considered as such.
-    if char == " " or char == "\t" or char == "\n" or char == "\r":
-        return True
-    cat = unicodedata.category(char)
-    if cat == "Zs":
-        return True
-    return False
-
-
-def _is_control(char):
-    """Checks whether `chars` is a control character."""
-    # These are technically control characters but we count them as whitespace
-    # characters.
-    if char == "\t" or char == "\n" or char == "\r":
-        return False
-    cat = unicodedata.category(char)
-    if cat.startswith("C"):
-        return True
-    return False
-
-
-def _is_punctuation(char):
-    """Checks whether `chars` is a punctuation character."""
-    cp = ord(char)
-    # We treat all non-letter/number ASCII as punctuation.
-    # Characters such as "^", "$", and "`" are not in the Unicode
-    # Punctuation class but we treat them as punctuation anyways, for
-    # consistency.
-    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
-            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
-        return True
-    cat = unicodedata.category(char)
-    if cat.startswith("P"):
-        return True
-    return False
diff --git a/modules/layers/bert_modeling.py b/modules/layers/bert_modeling.py
deleted file mode 100644
index 9c6fa38..0000000
--- a/modules/layers/bert_modeling.py
+++ /dev/null
@@ -1,474 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""PyTorch BERT model."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import json
-import math
-import six
-import torch
-import torch.nn as nn
-from torch.nn import CrossEntropyLoss
-
-def gelu(x):
-    """Implementation of the gelu activation function.
-        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
-        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
-    """
-    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
-
-
-class BertConfig(object):
-    """Configuration class to store the configuration of a `BertModel`.
-    """
-    def __init__(self,
-                vocab_size,
-                hidden_size=768,
-                num_hidden_layers=12,
-                num_attention_heads=12,
-                intermediate_size=3072,
-                hidden_act="gelu",
-                hidden_dropout_prob=0.1,
-                attention_probs_dropout_prob=0.1,
-                max_position_embeddings=512,
-                type_vocab_size=16,
-                initializer_range=0.02):
-        """Constructs BertConfig.
-
-        Args:
-            vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
-            hidden_size: Size of the encoder layers and the pooler layer.
-            num_hidden_layers: Number of hidden layers in the Transformer encoder.
-            num_attention_heads: Number of attention heads for each attention layer in
-                the Transformer encoder.
-            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
-                layer in the Transformer encoder.
-            hidden_act: The non-linear activation function (function or string) in the
-                encoder and pooler.
-            hidden_dropout_prob: The dropout probabilitiy for all fully connected
-                layers in the embeddings, encoder, and pooler.
-            attention_probs_dropout_prob: The dropout ratio for the attention
-                probabilities.
-            max_position_embeddings: The maximum sequence length that this model might
-                ever be used with. Typically set this to something large just in case
-                (e.g., 512 or 1024 or 2048).
-            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
-                `BertModel`.
-            initializer_range: The sttdev of the truncated_normal_initializer for
-                initializing all weight matrices.
-        """
-        self.vocab_size = vocab_size
-        self.hidden_size = hidden_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.hidden_act = hidden_act
-        self.intermediate_size = intermediate_size
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_probs_dropout_prob = attention_probs_dropout_prob
-        self.max_position_embeddings = max_position_embeddings
-        self.type_vocab_size = type_vocab_size
-        self.initializer_range = initializer_range
-
-    @classmethod
-    def from_dict(cls, json_object):
-        """Constructs a `BertConfig` from a Python dictionary of parameters."""
-        config = BertConfig(vocab_size=None)
-        for (key, value) in six.iteritems(json_object):
-            config.__dict__[key] = value
-        return config
-
-    @classmethod
-    def from_json_file(cls, json_file):
-        """Constructs a `BertConfig` from a json file of parameters."""
-        with open(json_file, "r") as reader:
-            text = reader.read()
-        return cls.from_dict(json.loads(text))
-
-    def to_dict(self):
-        """Serializes this instance to a Python dictionary."""
-        output = copy.deepcopy(self.__dict__)
-        return output
-
-    def to_json_string(self):
-        """Serializes this instance to a JSON string."""
-        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
-
-
-class BERTLayerNorm(nn.Module):
-    def __init__(self, config, variance_epsilon=1e-12):
-        """Construct a layernorm module in the TF style (epsilon inside the square root).
-        """
-        super(BERTLayerNorm, self).__init__()
-        self.gamma = nn.Parameter(torch.ones(config.hidden_size))
-        self.beta = nn.Parameter(torch.zeros(config.hidden_size))
-        self.variance_epsilon = variance_epsilon
-
-    def forward(self, x):
-        u = x.mean(-1, keepdim=True)
-        s = (x - u).pow(2).mean(-1, keepdim=True)
-        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
-        return self.gamma * x + self.beta
-
-class BERTEmbeddings(nn.Module):
-    def __init__(self, config):
-        super(BERTEmbeddings, self).__init__()
-        """Construct the embedding module from word, position and token_type embeddings.
-        """
-        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
-        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
-        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
-
-        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
-        # any TensorFlow checkpoint file
-        self.LayerNorm = BERTLayerNorm(config)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-    def forward(self, input_ids, token_type_ids=None):
-        seq_length = input_ids.size(1)
-        position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
-        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
-        if token_type_ids is None:
-            token_type_ids = torch.zeros_like(input_ids)
-
-        words_embeddings = self.word_embeddings(input_ids)
-        position_embeddings = self.position_embeddings(position_ids)
-        token_type_embeddings = self.token_type_embeddings(token_type_ids)
-
-        embeddings = words_embeddings + position_embeddings + token_type_embeddings
-        embeddings = self.LayerNorm(embeddings)
-        embeddings = self.dropout(embeddings)
-        return embeddings
-
-
-class BERTSelfAttention(nn.Module):
-    def __init__(self, config):
-        super(BERTSelfAttention, self).__init__()
-        if config.hidden_size % config.num_attention_heads != 0:
-            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
-        self.num_attention_heads = config.num_attention_heads
-        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
-        self.all_head_size = self.num_attention_heads * self.attention_head_size
-
-        self.query = nn.Linear(config.hidden_size, self.all_head_size)
-        self.key = nn.Linear(config.hidden_size, self.all_head_size)
-        self.value = nn.Linear(config.hidden_size, self.all_head_size)
-
-        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
-
-    def transpose_for_scores(self, x):
-        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
-        x = x.view(*new_x_shape)
-        return x.permute(0, 2, 1, 3)
-
-    def forward(self, hidden_states, attention_mask):
-        mixed_query_layer = self.query(hidden_states)
-        mixed_key_layer = self.key(hidden_states)
-        mixed_value_layer = self.value(hidden_states)
-
-        query_layer = self.transpose_for_scores(mixed_query_layer)
-        key_layer = self.transpose_for_scores(mixed_key_layer)
-        value_layer = self.transpose_for_scores(mixed_value_layer)
-
-        # Take the dot product between "query" and "key" to get the raw attention scores.
-        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
-        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
-        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
-        attention_scores = attention_scores + attention_mask
-
-        # Normalize the attention scores to probabilities.
-        attention_probs = nn.Softmax(dim=-1)(attention_scores)
-
-        # This is actually dropping out entire tokens to attend to, which might
-        # seem a bit unusual, but is taken from the original Transformer paper.
-        attention_probs = self.dropout(attention_probs)
-
-        context_layer = torch.matmul(attention_probs, value_layer)
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
-        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
-        context_layer = context_layer.view(*new_context_layer_shape)
-        return context_layer
-
-
-class BERTSelfOutput(nn.Module):
-    def __init__(self, config):
-        super(BERTSelfOutput, self).__init__()
-        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        self.LayerNorm = BERTLayerNorm(config)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-    def forward(self, hidden_states, input_tensor):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.LayerNorm(hidden_states + input_tensor)
-        return hidden_states
-
-
-class BERTAttention(nn.Module):
-    def __init__(self, config):
-        super(BERTAttention, self).__init__()
-        self.self = BERTSelfAttention(config)
-        self.output = BERTSelfOutput(config)
-
-    def forward(self, input_tensor, attention_mask):
-        self_output = self.self(input_tensor, attention_mask)
-        attention_output = self.output(self_output, input_tensor)
-        return attention_output
-
-
-class BERTIntermediate(nn.Module):
-    def __init__(self, config):
-        super(BERTIntermediate, self).__init__()
-        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
-        self.intermediate_act_fn = gelu
-
-    def forward(self, hidden_states):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.intermediate_act_fn(hidden_states)
-        return hidden_states
-
-
-class BERTOutput(nn.Module):
-    def __init__(self, config):
-        super(BERTOutput, self).__init__()
-        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
-        self.LayerNorm = BERTLayerNorm(config)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-    def forward(self, hidden_states, input_tensor):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.LayerNorm(hidden_states + input_tensor)
-        return hidden_states
-
-
-class BERTLayer(nn.Module):
-    def __init__(self, config):
-        super(BERTLayer, self).__init__()
-        self.attention = BERTAttention(config)
-        self.intermediate = BERTIntermediate(config)
-        self.output = BERTOutput(config)
-
-    def forward(self, hidden_states, attention_mask):
-        attention_output = self.attention(hidden_states, attention_mask)
-        intermediate_output = self.intermediate(attention_output)
-        layer_output = self.output(intermediate_output, attention_output)
-        return layer_output
-
-
-class BERTEncoder(nn.Module):
-    def __init__(self, config):
-        super(BERTEncoder, self).__init__()
-        layer = BERTLayer(config)
-        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)])    
-
-    def forward(self, hidden_states, attention_mask):
-        all_encoder_layers = []
-        for layer_module in self.layer:
-            hidden_states = layer_module(hidden_states, attention_mask)
-            all_encoder_layers.append(hidden_states)
-        return all_encoder_layers
-
-
-class BERTPooler(nn.Module):
-    def __init__(self, config):
-        super(BERTPooler, self).__init__()
-        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        self.activation = nn.Tanh()
-
-    def forward(self, hidden_states):
-        # We "pool" the model by simply taking the hidden state corresponding
-        # to the first token.
-        first_token_tensor = hidden_states[:, 0]
-        pooled_output = self.dense(first_token_tensor)
-        pooled_output = self.activation(pooled_output)
-        return pooled_output
-
-
-class BertModel(nn.Module):
-    """BERT model ("Bidirectional Embedding Representations from a Transformer").
-
-    Example usage:
-    ```python
-    # Already been converted into WordPiece token ids
-    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
-    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
-    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]])
-
-    config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
-        num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
-
-    model = modeling.BertModel(config=config)
-    all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
-    ```
-    """
-    def __init__(self, config: BertConfig):
-        """Constructor for BertModel.
-
-        Args:
-            config: `BertConfig` instance.
-        """
-        super(BertModel, self).__init__()
-        self.embeddings = BERTEmbeddings(config)
-        self.encoder = BERTEncoder(config)
-        self.pooler = BERTPooler(config)
-
-    def forward(self, input_ids, token_type_ids=None, attention_mask=None):
-        if attention_mask is None:
-            attention_mask = torch.ones_like(input_ids)
-        if token_type_ids is None:
-            token_type_ids = torch.zeros_like(input_ids)
-
-        # We create a 3D attention mask from a 2D tensor mask.
-        # Sizes are [batch_size, 1, 1, to_seq_length]
-        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
-        # this attention mask is more simple than the triangular masking of causal attention
-        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
-        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
-
-        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
-        # masked positions, this operation will create a tensor which is 0.0 for
-        # positions we want to attend and -10000.0 for masked positions.
-        # Since we are adding it to the raw scores before the softmax, this is
-        # effectively the same as removing these entirely.
-        extended_attention_mask = extended_attention_mask.float()
-        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
-
-        embedding_output = self.embeddings(input_ids, token_type_ids)
-        all_encoder_layers = self.encoder(embedding_output, extended_attention_mask)
-        sequence_output = all_encoder_layers[-1]
-        pooled_output = self.pooler(sequence_output)
-        return all_encoder_layers, pooled_output
-
-class BertForSequenceClassification(nn.Module):
-    """BERT model for classification.
-    This module is composed of the BERT model with a linear layer on top of
-    the pooled output.
-
-    Example usage:
-    ```python
-    # Already been converted into WordPiece token ids
-    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
-    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
-    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]])
-
-    config = BertConfig(vocab_size=32000, hidden_size=512,
-        num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
-
-    num_labels = 2
-
-    model = BertForSequenceClassification(config, num_labels)
-    logits = model(input_ids, token_type_ids, input_mask)
-    ```
-    """
-    def __init__(self, config, num_labels):
-        super(BertForSequenceClassification, self).__init__()
-        self.bert = BertModel(config)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-        self.classifier = nn.Linear(config.hidden_size, num_labels)
-
-        def init_weights(module):
-            if isinstance(module, (nn.Linear, nn.Embedding)):
-                # Slightly different from the TF version which uses truncated_normal for initialization
-                # cf https://github.com/pytorch/pytorch/pull/5617
-                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
-            elif isinstance(module, BERTLayerNorm):
-                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
-                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
-            if isinstance(module, nn.Linear):
-                module.bias.data.zero_()
-        self.apply(init_weights)
-
-    def forward(self, input_ids, token_type_ids, attention_mask, labels=None):
-        _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask)
-        pooled_output = self.dropout(pooled_output)
-        logits = self.classifier(pooled_output)
-
-        if labels is not None:
-            loss_fct = CrossEntropyLoss()
-            loss = loss_fct(logits, labels)
-            return loss, logits
-        else:
-            return logits
-
-class BertForQuestionAnswering(nn.Module):
-    """BERT model for Question Answering (span extraction).
-    This module is composed of the BERT model with a linear layer on top of
-    the sequence output that computes start_logits and end_logits
-
-    Example usage:
-    ```python
-    # Already been converted into WordPiece token ids
-    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
-    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
-    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]])
-
-    config = BertConfig(vocab_size=32000, hidden_size=512,
-        num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
-
-    model = BertForQuestionAnswering(config)
-    start_logits, end_logits = model(input_ids, token_type_ids, input_mask)
-    ```
-    """
-    def __init__(self, config):
-        super(BertForQuestionAnswering, self).__init__()
-        self.bert = BertModel(config)
-        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
-        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
-        self.qa_outputs = nn.Linear(config.hidden_size, 2)
-
-        def init_weights(module):
-            if isinstance(module, (nn.Linear, nn.Embedding)):
-                # Slightly different from the TF version which uses truncated_normal for initialization
-                # cf https://github.com/pytorch/pytorch/pull/5617
-                module.weight.data.normal_(mean=0.0, std=config.initializer_range)
-            elif isinstance(module, BERTLayerNorm):
-                module.beta.data.normal_(mean=0.0, std=config.initializer_range)
-                module.gamma.data.normal_(mean=0.0, std=config.initializer_range)
-            if isinstance(module, nn.Linear):
-                module.bias.data.zero_()
-        self.apply(init_weights)
-
-    def forward(self, input_ids, token_type_ids, attention_mask, start_positions=None, end_positions=None):
-        all_encoder_layers, _ = self.bert(input_ids, token_type_ids, attention_mask)
-        sequence_output = all_encoder_layers[-1]
-        logits = self.qa_outputs(sequence_output)
-        start_logits, end_logits = logits.split(1, dim=-1)
-        start_logits = start_logits.squeeze(-1)
-        end_logits = end_logits.squeeze(-1)
-
-        if start_positions is not None and end_positions is not None:
-            # If we are on multi-GPU, split add a dimension
-            if len(start_positions.size()) > 1:
-                start_positions = start_positions.squeeze(-1)
-            if len(end_positions.size()) > 1:
-                end_positions = end_positions.squeeze(-1)
-            # sometimes the start/end positions are outside our model inputs, we ignore these terms
-            ignored_index = start_logits.size(1)
-            start_positions.clamp_(0, ignored_index)
-            end_positions.clamp_(0, ignored_index)
-
-            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
-            start_loss = loss_fct(start_logits, start_positions)
-            end_loss = loss_fct(end_logits, end_positions)
-            total_loss = (start_loss + end_loss) / 2
-            return total_loss
-        else:
-            return start_logits, end_logits
diff --git a/modules/layers/crf.py b/modules/layers/crf.py
index b9b824d..f241098 100644
--- a/modules/layers/crf.py
+++ b/modules/layers/crf.py
@@ -29,6 +29,9 @@ def sequence_mask(lens, max_len=None):
 
 
 class CRF(nn.Module):
+    def forward(self, *input_):
+        return self.viterbi_decode(*input_)
+
     def __init__(self, label_size):
         super(CRF, self).__init__()
 
@@ -43,7 +46,8 @@ def initialize(self):
         self.transition.data[:, self.end] = -100.0
         self.transition.data[self.start, :] = -100.0
 
-    def pad_logits(self, logits):
+    @staticmethod
+    def pad_logits(logits):
         # lens = lens.data
         batch_size, seq_len, label_num = logits.size()
         # pads = Variable(logits.data.new(batch_size, seq_len, 2).fill_(-1000.0),
@@ -83,7 +87,8 @@ def calc_binary_score(self, labels, lens):
 
         return score
 
-    def calc_unary_score(self, logits, labels, lens):
+    @staticmethod
+    def calc_unary_score(logits, labels, lens):
         labels_exp = labels.unsqueeze(-1)
         scores = torch.gather(logits, 2, labels_exp).squeeze(-1)
         mask = sequence_mask(lens).float()
diff --git a/modules/layers/decoders.py b/modules/layers/decoders.py
index f020e6f..452abed 100644
--- a/modules/layers/decoders.py
+++ b/modules/layers/decoders.py
@@ -2,7 +2,7 @@
 from torch.nn import functional
 from torch.autograd import Variable
 from torch import nn
-from .layers import Linears, MultiHeadAttention
+from .layers import Linears
 from .crf import CRF
 from .ncrf import NCRF
 
@@ -28,12 +28,10 @@ def forward_model(self, inputs):
         return output
 
     def forward(self, inputs, labels_mask):
-        self.eval()
         lens = labels_mask.sum(-1)
         logits = self.forward_model(inputs)
         logits = self.crf.pad_logits(logits)
         scores, preds = self.crf.viterbi_decode(logits, lens)
-        self.train()
         return preds
 
     def score(self, inputs, labels_mask, labels):
@@ -47,61 +45,14 @@ def score(self, inputs, labels_mask, labels):
 
     @classmethod
     def create(cls, label_size, input_dim, input_dropout=0.5):
-        return cls(CRF(label_size+2), label_size, input_dim, input_dropout)
-
-
-class AttnCRFDecoder(nn.Module):
-    def __init__(self,
-                 crf, label_size, input_dim, input_dropout=0.5,
-                 key_dim=64, val_dim=64, num_heads=3):
-        super(AttnCRFDecoder, self).__init__()
-        self.input_dim = input_dim
-        self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
-        self.linear = Linears(in_features=input_dim,
-                              out_features=label_size,
-                              hiddens=[input_dim // 2])
-        self.crf = crf
-        self.label_size = label_size
-
-    def forward_model(self, inputs, labels_mask=None):
-        batch_size, seq_len, input_dim = inputs.size()
-        inputs, _ = self.attn(inputs, inputs, inputs, labels_mask)
-        
-        output = inputs.contiguous().view(-1, self.input_dim)
-        # Fully-connected layer
-        output = self.linear.forward(output)
-        output = output.view(batch_size, seq_len, self.label_size)
-        return output
-
-    def forward(self, inputs, labels_mask):
-        self.eval()
-        lens = labels_mask.sum(-1)
-        logits = self.forward_model(inputs)
-        logits = self.crf.pad_logits(logits)
-        scores, preds = self.crf.viterbi_decode(logits, lens)
-        self.train()
-        return preds
-
-    def score(self, inputs, labels_mask, labels):
-        lens = labels_mask.sum(-1)
-        logits = self.forward_model(inputs)
-        logits = self.crf.pad_logits(logits)
-        norm_score = self.crf.calc_norm_score(logits, lens)
-        gold_score = self.crf.calc_gold_score(logits, labels, lens)
-        loglik = gold_score - norm_score
-        return -loglik.mean()
-
-    @classmethod
-    def create(cls, label_size, input_dim, input_dropout=0.5, key_dim=64, val_dim=64, num_heads=3):
-        return cls(CRF(label_size+2), label_size, input_dim, input_dropout,
-                   key_dim, val_dim, num_heads)
+        return cls(CRF(label_size + 2), label_size, input_dim, input_dropout)
 
 
 class NMTDecoder(nn.Module):
     def __init__(self,
                  label_size,
                  embedding_dim=64, hidden_dim=256, rnn_layers=1,
-                 dropout_p=0.1, pad_idx=0, use_cuda=True):
+                 dropout_p=0.1, pad_idx=0):
         super(NMTDecoder, self).__init__()
         self.slot_size = label_size
         self.pad_idx = pad_idx
@@ -118,11 +69,6 @@ def __init__(self,
 
         self.loss = nn.CrossEntropyLoss(ignore_index=pad_idx)
 
-        self.use_cuda = use_cuda
-
-        if use_cuda:
-            self.cuda()
-
         self.init_weights()
 
     def init_weights(self):
@@ -185,9 +131,9 @@ def forward_model(self, encoder_outputs, input_mask):
             aligned = aligns[i].unsqueeze(1)
             # input, context, aligned encoder hidden, hidden
             _, hidden = self.lstm(torch.cat((embedded, context, aligned), 2))
-            
+
             # print(hidden[0].shape, context.transpose(0, 1).shape)
-            
+
             concated = torch.cat((hidden[0], context.transpose(0, 1)), 2)
             score = self.slot_out(concated.squeeze(0))
             softmaxed = functional.log_softmax(score)
@@ -212,144 +158,17 @@ def score(self, encoder_outputs, input_mask, labels_ids):
 
     @classmethod
     def create(cls, label_size,
-               embedding_dim=64, hidden_dim=256, rnn_layers=1, dropout_p=0.1, pad_idx=0, use_cuda=True):
+               embedding_dim=64, hidden_dim=256, rnn_layers=1, dropout_p=0.1, pad_idx=0):
         return cls(label_size=label_size,
                    embedding_dim=embedding_dim, hidden_dim=hidden_dim,
-                   rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx, use_cuda=use_cuda)
-
-
-class NMTCRFDecoder(nn.Module):
-    def __init__(self,
-                 label_size, crf,
-                 embedding_dim=64, hidden_dim=256, rnn_layers=1,
-                 dropout_p=0.1, pad_idx=0, use_cuda=True):
-        super(NMTCRFDecoder, self).__init__()
-        self.slot_size = label_size
-        self.pad_idx = pad_idx
-        self.embedding_dim = embedding_dim
-        self.hidden_dim = hidden_dim
-        self.rnn_layers = rnn_layers
-        self.dropout_p = dropout_p
-        self.embedding = nn.Embedding(self.slot_size, self.embedding_dim)
-        self.lstm = nn.LSTM(self.embedding_dim + self.hidden_dim * 2,
-                            self.hidden_dim, self.rnn_layers,
-                            batch_first=True)
-        self.attn = nn.Linear(self.hidden_dim, self.hidden_dim)
-        self.slot_out = nn.Linear(self.hidden_dim * 2, self.slot_size)
-
-        self.loss = nn.CrossEntropyLoss(ignore_index=pad_idx)
-        self.crf = crf
-
-        self.use_cuda = use_cuda
-
-        if use_cuda:
-            self.cuda()
-
-        self.init_weights()
-
-    def init_weights(self):
-        nn.init.xavier_normal(self.embedding.weight)
-        nn.init.xavier_normal(self.attn.weight)
-        nn.init.xavier_normal(self.slot_out.weight)
-
-    def attention(self, hidden, encoder_outputs, input_mask):
-        """
-        hidden : 1,B,D
-        encoder_outputs : B,T,D
-        input_mask : B,T # ByteTensor
-        """
-        input_mask = input_mask == 0
-        hidden = hidden.squeeze(0).unsqueeze(2)
-
-        # B
-        batch_size = encoder_outputs.size(0)
-        # T
-        max_len = encoder_outputs.size(1)
-        # B*T,D -> B*T,D
-        energies = self.attn(encoder_outputs.contiguous().view(batch_size * max_len, -1))
-        energies = energies.view(batch_size, max_len, -1)
-        # B,T,D * B,D,1 --> B,1,T
-        attn_energies = energies.bmm(hidden).transpose(1, 2)
-        # PAD masking
-        attn_energies = attn_energies.squeeze(1).masked_fill(input_mask, -1e12)
-
-        # B,T
-        alpha = functional.softmax(attn_energies)
-        # B,1,T
-        alpha = alpha.unsqueeze(1)
-        # B,1,T * B,T,D => B,1,D
-        context = alpha.bmm(encoder_outputs)
-        # B,1,D
-        return context
-
-    def forward_model(self, encoder_outputs, input_mask):
-        real_context = []
-
-        for idx, o in enumerate(encoder_outputs):
-            real_length = input_mask[idx].sum().cpu().data.tolist()
-            real_context.append(o[real_length - 1])
-        context = torch.cat(real_context).view(encoder_outputs.size(0), -1).unsqueeze(1)
-
-        batch_size = encoder_outputs.size(0)
-
-        input_mask = input_mask == 0
-        # Get the embedding of the current input word
-
-        embedded = Variable(torch.zeros(batch_size, self.embedding_dim))
-        if self.use_cuda:
-            embedded = embedded.cuda()
-        embedded = embedded.unsqueeze(1)
-        decode = []
-        aligns = encoder_outputs.transpose(0, 1)
-        length = encoder_outputs.size(1)
-        for i in range(length):
-            # B,1,D
-            aligned = aligns[i].unsqueeze(1)
-            # input, context, aligned encoder hidden, hidden
-            # print(embedded.shape, context.shape, aligned.shape)
-            _, hidden = self.lstm(torch.cat((embedded, context, aligned), 2))
-
-            # print(hidden[0].shape, context.transpose(0, 1).shape)
-
-            concated = torch.cat((hidden[0], context.transpose(0, 1)), 2)
-            score = self.slot_out(concated.squeeze(0))
-            softmaxed = functional.log_softmax(score)
-            decode.append(softmaxed)
-            _, input = torch.max(softmaxed, 1)
-            embedded = self.embedding(input.unsqueeze(1))
-
-            context = self.attention(hidden[0], encoder_outputs, input_mask)
-        slot_scores = torch.cat(decode, 1)
-
-        # return slot_scores.view(batch_size * length, -1)
-        return slot_scores.view(batch_size, length, -1)
-
-    def forward(self, encoder_outputs, input_mask):
-        scores = self.forward_model(encoder_outputs, input_mask)
-
-        return self.crf.forward(scores, input_mask)
-
-    def score(self, encoder_outputs, input_mask, labels_ids):
-        scores = self.forward_model(encoder_outputs, input_mask)
-        crf_score = self.crf.score(scores, input_mask, labels_ids)
-        batch_size = encoder_outputs.shape[0]
-        len_ = encoder_outputs.shape[1]
-        return self.loss(scores.view(batch_size * len_, -1), labels_ids.view(-1)) + crf_score
-
-    @classmethod
-    def create(cls, label_size,
-               embedding_dim=64, hidden_dim=256, rnn_layers=1, dropout_p=0.1, pad_idx=0, use_cuda=True):
-        crf = CRFDecoder.create(label_size, label_size, input_dropout=dropout_p)
-        return cls(label_size=label_size, crf=crf,
-                   embedding_dim=embedding_dim, hidden_dim=hidden_dim,
-                   rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx, use_cuda=use_cuda)
+                   rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx)
 
 
 class PoolingLinearClassifier(nn.Module):
     """Create a linear classifier with pooling."""
 
     def __init__(self, input_dim, intent_size, input_dropout=0.5):
-        super().__init__()
+        super(PoolingLinearClassifier, self).__init__()
         self.input_dim = input_dim
         self.intent_size = intent_size
         self.input_dropout = input_dropout
@@ -371,61 +190,11 @@ def forward(self, output):
         return self.linear(x)
 
 
-class AttnCRFJointDecoder(nn.Module):
-    def __init__(self,
-                 crf, label_size, input_dim, intent_size, input_dropout=0.5,
-                 key_dim=64, val_dim=64, num_heads=3):
-        super(AttnCRFJointDecoder, self).__init__()
-        self.input_dim = input_dim
-        self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
-        self.linear = Linears(in_features=input_dim,
-                              out_features=label_size,
-                              hiddens=[input_dim // 2])
-        self.crf = crf
-        self.label_size = label_size
-        self.intent_size = intent_size
-        self.intent_out = PoolingLinearClassifier(input_dim, intent_size, input_dropout)
-        self.intent_loss = nn.CrossEntropyLoss()
-
-    def forward_model(self, inputs, labels_mask=None):
-        batch_size, seq_len, input_dim = inputs.size()
-        inputs, hidden = self.attn(inputs, inputs, inputs, labels_mask)
-        intent_output = self.intent_out(inputs)
-        output = inputs.contiguous().view(-1, self.input_dim)
-        # Fully-connected layer
-        output = self.linear.forward(output)
-        output = output.view(batch_size, seq_len, self.label_size)
-        return output, intent_output
-
-    def forward(self, inputs, labels_mask):
-        self.eval()
-        lens = labels_mask.sum(-1)
-        logits, intent_output = self.forward_model(inputs)
-        logits = self.crf.pad_logits(logits)
-        scores, preds = self.crf.viterbi_decode(logits, lens)
-        self.train()
-        return preds, intent_output.argmax(-1)
-
-    def score(self, inputs, labels_mask, labels, cls_ids):
-        lens = labels_mask.sum(-1)
-        logits, intent_output = self.forward_model(inputs)
-        logits = self.crf.pad_logits(logits)
-        norm_score = self.crf.calc_norm_score(logits, lens)
-        gold_score = self.crf.calc_gold_score(logits, labels, lens)
-        loglik = gold_score - norm_score
-        return -loglik.mean() + self.intent_loss(intent_output, cls_ids)
-
-    @classmethod
-    def create(cls, label_size, input_dim, intent_size, input_dropout=0.5, key_dim=64, val_dim=64, num_heads=3):
-        return cls(CRF(label_size + 2), label_size, input_dim, intent_size, input_dropout,
-                   key_dim, val_dim, num_heads)
-
-
 class NMTJointDecoder(nn.Module):
     def __init__(self,
                  label_size, intent_size,
                  embedding_dim=64, hidden_dim=256, rnn_layers=1,
-                 dropout_p=0.1, pad_idx=0, use_cuda=True):
+                 dropout_p=0.1, pad_idx=0):
         super(NMTJointDecoder, self).__init__()
         self.slot_size = label_size
         self.intent_size = intent_size
@@ -450,11 +219,6 @@ def __init__(self,
             hiddens=[hidden_dim // 2],
             activation="relu")
 
-        self.use_cuda = use_cuda
-
-        if use_cuda:
-            self.cuda()
-
         self.init_weights()
 
     def init_weights(self):
@@ -553,127 +317,14 @@ def score(self, encoder_outputs, input_mask, labels_ids, cls_ids):
 
     @classmethod
     def create(cls, label_size, intent_size,
-               embedding_dim=64, hidden_dim=256, rnn_layers=1, dropout_p=0.1, pad_idx=0, use_cuda=True):
+               embedding_dim=64, hidden_dim=256, rnn_layers=1, dropout_p=0.1, pad_idx=0):
         return cls(label_size=label_size, intent_size=intent_size,
                    embedding_dim=embedding_dim, hidden_dim=hidden_dim,
-                   rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx, use_cuda=use_cuda)
+                   rnn_layers=rnn_layers, dropout_p=dropout_p, pad_idx=pad_idx)
 
 
-class AttnNCRFJointDecoder(nn.Module):
-    def __init__(self,
-                 crf, label_size, input_dim, intent_size, input_dropout=0.5,
-                 key_dim=64, val_dim=64, num_heads=3, nbest=8):
-        super(AttnNCRFJointDecoder, self).__init__()
-        self.input_dim = input_dim
-        self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
-        self.linear = Linears(in_features=input_dim,
-                              out_features=label_size,
-                              hiddens=[input_dim // 2])
-        self.crf = crf
-        self.label_size = label_size
-        self.intent_size = intent_size
-        self.intent_out = PoolingLinearClassifier(input_dim, intent_size, input_dropout)
-        self.intent_loss = nn.CrossEntropyLoss()
-        self.nbest = nbest
-
-    def forward_model(self, inputs, labels_mask=None):
-        batch_size, seq_len, input_dim = inputs.size()
-        inputs, hidden = self.attn(inputs, inputs, inputs, labels_mask)
-        intent_output = self.intent_out(inputs)
-        output = inputs.contiguous().view(-1, self.input_dim)
-        # Fully-connected layer
-        output = self.linear.forward(output)
-        output = output.view(batch_size, seq_len, self.label_size)
-        return output, intent_output
-
-    def forward(self, inputs, labels_mask):
-        self.eval()
-        logits, intent_output = self.forward_model(inputs)
-        _, preds = self.crf._viterbi_decode_nbest(logits, labels_mask, self.nbest)
-        # print(preds.shape)
-        preds = preds[:, :, 0]
-        """for idx in range(len(preds)):
-            for idx_ in range(len(preds[0])):
-                if preds[idx][idx_] > 0:
-                    preds[idx][idx_] -= 1
-                else:
-                    raise"""
-        # print(preds)
-        self.train()
-        return preds, intent_output.argmax(-1)
-
-    def score(self, inputs, labels_mask, labels, cls_ids):
-        logits, intent_output = self.forward_model(inputs)
-        crf_score = self.crf.neg_log_likelihood_loss(logits, labels_mask, labels) / logits.size(0)
-        return crf_score + self.intent_loss(intent_output, cls_ids)
-
-    @classmethod
-    def create(cls, label_size, input_dim, intent_size, input_dropout=0.5, key_dim=64,
-               val_dim=64, num_heads=3, use_cuda=True, nbest=8):
-        return cls(NCRF(label_size, use_cuda), label_size + 2, input_dim, intent_size, input_dropout,
-                   key_dim, val_dim, num_heads, nbest)
-
-
-class AttnNCRFDecoder(nn.Module):
-    def __init__(self,
-                 crf, label_size, input_dim, input_dropout=0.5,
-                 key_dim=64, val_dim=64, num_heads=3, nbest=8):
-        super(AttnNCRFDecoder, self).__init__()
-        self.input_dim = input_dim
-        self.attn = MultiHeadAttention(key_dim, val_dim, input_dim, num_heads, input_dropout)
-        self.linear = Linears(in_features=input_dim,
-                              out_features=label_size,
-                              hiddens=[input_dim // 2])
-        self.nbest = nbest
-        self.crf = crf
-        self.label_size = label_size
-
-    def forward_model(self, inputs, labels_mask=None):
-        batch_size, seq_len, input_dim = inputs.size()
-        inputs, _ = self.attn(inputs, inputs, inputs, labels_mask)
-
-        output = inputs.contiguous().view(-1, self.input_dim)
-        # Fully-connected layer
-        output = self.linear.forward(output)
-        output = output.view(batch_size, seq_len, self.label_size)
-        return output
-
-    def forward(self, inputs, labels_mask):
-        self.eval()
-        logits = self.forward_model(inputs)
-        _, preds = self.crf._viterbi_decode_nbest(logits, labels_mask, self.nbest)
-        # print(preds.shape)
-        preds = preds[:, :, 0]
-        self.train()
-        return preds
-
-    def score(self, inputs, labels_mask, labels):
-        logits = self.forward_model(inputs)
-        crf_score = self.crf.neg_log_likelihood_loss(logits, labels_mask, labels) / logits.size(0)
-        return crf_score
-
-    @classmethod
-    def create(cls, label_size, input_dim, input_dropout=0.5, key_dim=64,
-               val_dim=64, num_heads=3, use_cuda=True, nbest=8):
-        return cls(NCRF(label_size, use_cuda), label_size + 2, input_dim, input_dropout,
-                   key_dim, val_dim, num_heads, nbest)
-
-    
 class NCRFDecoder(nn.Module):
 
-    # TODO: TRY TO FIX THIS SHIT (get attribute error)
-    def get_config(self):
-        config = {
-            "name": "NCRFDecoder",
-            "params": {
-                "label_size": self.label_size,
-                "input_dim": self.input_dim,
-                "input_dropout": self.dropout.p,
-                "nbest": self.nbest
-            }
-        }
-        return config
-
     def __init__(self,
                  crf, label_size, input_dim, input_dropout=0.5, nbest=8):
         super(NCRFDecoder, self).__init__()
@@ -686,7 +337,7 @@ def __init__(self,
         self.crf = crf
         self.label_size = label_size
 
-    def forward_model(self, inputs, labels_mask=None):
+    def forward_model(self, inputs):
         batch_size, seq_len, input_dim = inputs.size()
         inputs = self.dropout(inputs)
 
@@ -697,12 +348,9 @@ def forward_model(self, inputs, labels_mask=None):
         return output
 
     def forward(self, inputs, labels_mask):
-        self.eval()
         logits = self.forward_model(inputs)
         _, preds = self.crf._viterbi_decode_nbest(logits, labels_mask, self.nbest)
-        # print(preds.shape)
         preds = preds[:, :, 0]
-        self.train()
         return preds
 
     def score(self, inputs, labels_mask, labels):
@@ -715,5 +363,22 @@ def from_config(cls, config):
         return cls.create(**config)
 
     @classmethod
-    def create(cls, label_size, input_dim, input_dropout=0.5, use_cuda=True, nbest=8):
-        return cls(NCRF(label_size, use_cuda), label_size + 2, input_dim, input_dropout, nbest)
+    def create(cls, label_size, input_dim, input_dropout=0.5, nbest=8, device="cuda"):
+        return cls(NCRF(label_size, device), label_size + 2, input_dim, input_dropout, nbest)
+
+
+class ClassDecoder(nn.Module):
+
+    def __init__(self, intent_size, input_dim, input_dropout=0.3):
+        super(ClassDecoder, self).__init__()
+        self.intent_loss = nn.CrossEntropyLoss()
+        self.intent_size = intent_size
+        self.input_dropout = input_dropout
+        self.input_dim = input_dim
+        self.intent_out = PoolingLinearClassifier(input_dim, intent_size, input_dropout)
+
+    def forward(self, inputs):
+        return self.intent_out(inputs).argmax(-1)
+
+    def score(self, inputs, cls_ids):
+        return self.intent_loss(self.intent_out(inputs), cls_ids)
diff --git a/modules/layers/embedders.py b/modules/layers/embedders.py
index 72af61f..99c530c 100644
--- a/modules/layers/embedders.py
+++ b/modules/layers/embedders.py
@@ -1,94 +1,63 @@
-from modules.layers import bert_modeling
+from pytorch_pretrained_bert import BertModel
 import torch
-from torch import nn
 
 
-class BertEmbedder(nn.Module):
-
-    # @property
-    def get_config(self):
-        config = {
-            "name": "BertEmbedder",
-            "params": {
-                "bert_config_file": self.bert_config_file,
-                "init_checkpoint_pt": self.init_checkpoint_pt,
-                "freeze": self.is_freeze,
-                "embedding_dim": self.embedding_dim,
-                "use_cuda": self.use_cuda,
-                "bert_mode": self.bert_mode
-            }
-        }
-        return config
-
-    def __init__(self, model, bert_config_file, init_checkpoint_pt,
-                 freeze=True, embedding_dim=768, use_cuda=True, bert_mode="weighted",):
-        super(BertEmbedder, self).__init__()
-        self.bert_config_file = bert_config_file
-        self.init_checkpoint_pt = init_checkpoint_pt
-        self.is_freeze = freeze
-        self.embedding_dim = embedding_dim
+class BERTEmbedder(torch.nn.Module):
+    def __init__(self, model, config):
+        super(BERTEmbedder, self).__init__()
+        self.config = config
         self.model = model
-        self.use_cuda = use_cuda
-        self.bert_mode = bert_mode
-        if self.bert_mode == "weighted":
-            self.bert_weights = nn.Parameter(torch.FloatTensor(12, 1))
-            self.bert_gamma = nn.Parameter(torch.FloatTensor(1, 1))
+        if self.config["mode"] == "weighted":
+            self.bert_weights = torch.nn.Parameter(torch.FloatTensor(12, 1))
+            self.bert_gamma = torch.nn.Parameter(torch.FloatTensor(1, 1))
+        self.init_weights()
 
-        if use_cuda:
-            self.cuda()
+    def init_weights(self):
+        if self.config["mode"] == "weighted":
+            torch.nn.init.xavier_normal(self.bert_gamma)
+            torch.nn.init.xavier_normal(self.bert_weights)
 
-        self.init_weights()
+    @classmethod
+    def create(
+            cls, model_name='bert-base-multilingual-cased',
+            device="cuda", mode="weighted",
+            is_freeze=True):
+        config = {
+            "model_name": model_name,
+            "device": device,
+            "mode": mode,
+            "is_freeze": is_freeze
+        }
+        model = BertModel.from_pretrained(model_name)
+        model.to(device)
+        model.train()
+        self = cls(model, config)
+        if is_freeze:
+            self.freeze()
+        return self
 
     @classmethod
     def from_config(cls, config):
         return cls.create(**config)
 
-    def init_weights(self):
-        if self.bert_mode == "weighted":
-            nn.init.xavier_normal(self.bert_gamma)
-            nn.init.xavier_normal(self.bert_weights)
-
-    def forward(self, *batch):
-        input_ids, input_mask, input_type_ids = batch[:3]
-        all_encoder_layers, _ = self.model(input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
-        if self.bert_mode == "last":
-            return all_encoder_layers[-1]
-        elif self.bert_mode == "weighted":
-            all_encoder_layers = torch.stack([a * b for a, b in zip(all_encoder_layers, self.bert_weights)])
-            return self.bert_gamma * torch.sum(all_encoder_layers, dim=0)
+    def forward(self, batch):
+        """
+        batch has the following structure:
+            data[0]: list, tokens ids
+            data[1]: list, tokens mask
+            data[2]: list, tokens type ids (for bert)
+            data[3]: list, bert labels ids
+        """
+        encoded_layers, _ = self.model(
+            input_ids=batch[0],
+            token_type_ids=batch[2],
+            attention_mask=batch[1],
+            output_all_encoded_layers=self.config["mode"] == "weighted")
+        if self.config["mode"] == "weighted":
+            encoded_layers = torch.stack([a * b for a, b in zip(encoded_layers, self.bert_weights)])
+            return self.bert_gamma * torch.sum(encoded_layers, dim=0)
+        return encoded_layers
 
     def freeze(self):
-        self.model.eval()
-
-    def unfreeze(self):
-        self.model.train()
-
-    def get_n_trainable_params(self):
-        pp = 0
-        for p in list(self.parameters()):
-            if p.requires_grad:
-                num = 1
-                for s in list(p.size()):
-                    num = num * s
-                pp += num
-        return pp
-
-    @classmethod
-    def create(cls,
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, use_cuda=True,
-               bert_mode="weighted", freeze=True):
-        bert_config = bert_modeling.BertConfig.from_json_file(bert_config_file)
-        model = bert_modeling.BertModel(bert_config)
-        if use_cuda:
-            device = torch.device("cuda")
-            map_location = "cuda"
-        else:
-            map_location = "cpu"
-            device = torch.device("cpu")
-        model.load_state_dict(torch.load(init_checkpoint_pt, map_location=map_location))
-        model = model.to(device)
-        model = cls(model=model, embedding_dim=embedding_dim, use_cuda=use_cuda, bert_mode=bert_mode,
-                    bert_config_file=bert_config_file, init_checkpoint_pt=init_checkpoint_pt, freeze=freeze)
-        if freeze:
-            model.freeze()
-        return model
+        for param in self.model.parameters():
+            param.requires_grad = False
diff --git a/modules/layers/encoders.py b/modules/layers/encoders.py
deleted file mode 100644
index 19b82b0..0000000
--- a/modules/layers/encoders.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from torch import nn
-import torch
-
-
-class BertMetaBiLSTMEncoder(nn.Module):
-
-    def __init__(self, embeddings, meta_embeddings=None,
-                 hidden_dim=128, rnn_layers=1, dropout=0.5, use_cuda=True):
-        super(BertMetaBiLSTMEncoder, self).__init__()
-        self.embeddings = embeddings
-        self.meta_embeddings = meta_embeddings
-        self.hidden_dim = hidden_dim
-        self.rnn_layers = rnn_layers
-        self.use_cuda = use_cuda
-        self.dropout = nn.Dropout(dropout)
-        meta_dim = 0
-        if self.meta_embeddings:
-            meta_dim = meta_embeddings.embedding_dim
-        self.lstm = nn.LSTM(
-            self.embeddings.embedding_dim + meta_dim,
-            hidden_dim // 2,
-            rnn_layers, batch_first=True, bidirectional=True)
-        if use_cuda:
-            self.cuda()
-        self.init_weights()
-        self.output_dim = hidden_dim
-        self.hidden = None
-
-    def init_weights(self):
-        for param in self.parameters():
-            if len(param.shape) >= 2:
-                nn.init.orthogonal_(param.data)
-            else:
-                nn.init.normal_(param.data)
-
-    def forward(self, batch):
-        input_mask = batch[1]
-        output = self.embeddings(*batch)
-        if self.meta_embeddings:
-            output = torch.cat((output, self.meta_embeddings(*batch)), dim=-1)
-        output = self.dropout(output)
-        lens = input_mask.sum(-1)
-        output = nn.utils.rnn.pack_padded_sequence(
-            output, lens.tolist(), batch_first=True)
-        output, self.hidden = self.lstm(output)
-        output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
-        output = torch.cat((output, batch[3]), dim=-1)
-        return output, self.hidden
-
-    def get_n_trainable_params(self):
-        pp = 0
-        for p in list(self.parameters()):
-            if p.requires_grad:
-                num = 1
-                for s in list(p.size()):
-                    num = num * s
-                pp += num
-        return pp
-
-    @classmethod
-    def create(cls, embeddings, meta_embeddings=None,
-               hidden_dim=128, rnn_layers=1, dropout=0.5, use_cuda=True):
-        model = cls(
-            embeddings, meta_embeddings, hidden_dim, rnn_layers, dropout, use_cuda=use_cuda)
-        return model
diff --git a/modules/layers/layers.py b/modules/layers/layers.py
index 6058def..d3c020e 100644
--- a/modules/layers/layers.py
+++ b/modules/layers/layers.py
@@ -3,9 +3,38 @@
 import torch
 from torch import nn
 from torch.nn import init
+from torch.nn.utils import rnn as rnn_utils
 import math
 
 
+class BiLSTM(nn.Module):
+
+    def __init__(self, embedding_size=768, hidden_dim=512, rnn_layers=1, dropout=0.5):
+        super(BiLSTM, self).__init__()
+        self.embedding_size = embedding_size
+        self.hidden_dim = hidden_dim
+        self.rnn_layers = rnn_layers
+        self.dropout = nn.Dropout(dropout)
+        self.lstm = nn.LSTM(
+            embedding_size,
+            hidden_dim // 2,
+            rnn_layers, batch_first=True, bidirectional=True)
+
+    def forward(self, input_, input_mask):
+        length = input_mask.sum(-1)
+        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
+        input_ = input_[sorted_idx]
+        packed_input = rnn_utils.pack_padded_sequence(input_, sorted_lengths.data.tolist(), batch_first=True)
+        output, (hidden, _) = self.lstm(packed_input)
+        padded_outputs = rnn_utils.pad_packed_sequence(output, batch_first=True)[0]
+        _, reversed_idx = torch.sort(sorted_idx)
+        return padded_outputs[reversed_idx], hidden[:, reversed_idx]
+
+    @classmethod
+    def create(cls, *args, **kwargs):
+        return cls(*args, **kwargs)
+
+
 class Linear(nn.Linear):
     def __init__(self,
                  in_features: int,
@@ -57,6 +86,7 @@ def forward(self, q, k, v, attn_mask=None):
         # v: [b_size x len_v x d_v] note: (len_k == len_v)
         attn = torch.bmm(q, k.transpose(1, 2)) / self.scale_factor  # attn: [b_size x len_q x len_k]
         if attn_mask is not None:
+            print(attn_mask.size(), attn.size())
             assert attn_mask.size() == attn.size()
             attn.data.masked_fill_(attn_mask, -float('inf'))
 
diff --git a/modules/layers/ncrf.py b/modules/layers/ncrf.py
index cdf490d..912f99c 100644
--- a/modules/layers/ncrf.py
+++ b/modules/layers/ncrf.py
@@ -16,6 +16,7 @@
 import torch.autograd as autograd
 import torch.nn as nn
 import torch.nn.functional as F
+
 START_TAG = -2
 STOP_TAG = -1
 
@@ -31,27 +32,29 @@ def log_sum_exp(vec, m_size):
         batch_size, hidden_dim
     """
     _, idx = torch.max(vec, 1)  # B * 1 * M
-    max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size)  # B * M
-    return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size)  # B * M
+    max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size)
+    # B * M
+    return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1,
+                                                                                                                m_size)
 
 
 class NCRF(nn.Module):
 
-    def __init__(self, tagset_size, gpu):
+    def __init__(self, tagset_size, device):
         super(NCRF, self).__init__()
         print("build CRF...")
-        self.gpu = gpu
+        self.device = device
         # Matrix of transition parameters.  Entry i,j is the score of transitioning *to* i *from* j.
         self.tagset_size = tagset_size
         # # We add 2 here, because of START_TAG and STOP_TAG
         # # transitions (f_tag_size, t_tag_size), transition value from f_tag to t_tag
-        init_transitions = torch.zeros(self.tagset_size+2, self.tagset_size+2)
-        init_transitions[:,START_TAG] = -10000.0
-        init_transitions[STOP_TAG,:] = -10000.0
-        init_transitions[:,0] = -10000.0
-        init_transitions[0,:] = -10000.0
-        if self.gpu:
-            init_transitions = init_transitions.cuda()
+        init_transitions = torch.zeros(self.tagset_size + 2, self.tagset_size + 2)
+        init_transitions[:, START_TAG] = -10000.0
+        init_transitions[STOP_TAG, :] = -10000.0
+        init_transitions[:, 0] = -10000.0
+        init_transitions[0, :] = -10000.0
+        if self.device:
+            init_transitions = init_transitions.to(device)
         self.transitions = nn.Parameter(init_transitions)
 
         # self.transitions = nn.Parameter(torch.Tensor(self.tagset_size+2, self.tagset_size+2))
@@ -67,13 +70,13 @@ def _calculate_PZ(self, feats, mask):
         seq_len = feats.size(1)
         tag_size = feats.size(2)
         # print feats.view(seq_len, tag_size)
-        assert(tag_size == self.tagset_size+2)
-        mask = mask.transpose(1,0).contiguous()
+        assert (tag_size == self.tagset_size + 2)
+        mask = mask.transpose(1, 0).contiguous()
         ins_num = seq_len * batch_size
-        ## be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
-        feats = feats.transpose(1,0).contiguous().view(ins_num,1, tag_size).expand(ins_num, tag_size, tag_size)
-        ## need to consider start
-        scores = feats + self.transitions.view(1,tag_size,tag_size).expand(ins_num, tag_size, tag_size)
+        # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+        feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+        # need to consider start
+        scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
         scores = scores.view(seq_len, batch_size, tag_size, tag_size)
         # build iter
         seq_iter = enumerate(scores)
@@ -81,7 +84,7 @@ def _calculate_PZ(self, feats, mask):
         # only need start from start_tag
         partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size, 1)  # bat_size * to_target_size
 
-        ## add start score (from start to all tag, duplicate to batch_size)
+        # add start score (from start to all tag, duplicate to batch_size)
         # partition = partition + self.transitions[START_TAG,:].view(1, tag_size, 1).expand(batch_size, tag_size, 1)
         # iter over last scores
         for idx, cur_values in seq_iter:
@@ -89,24 +92,27 @@ def _calculate_PZ(self, feats, mask):
             # partition: previous results log(exp(from_target)), #(batch_size * from_target)
             # cur_values: bat_size * from_target * to_target
 
-            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size,
+                                                                                                  tag_size)
             cur_partition = log_sum_exp(cur_values, tag_size)
             # print cur_partition.data
 
-                # (bat_size * from_target * to_target) -> (bat_size * to_target)
-            # partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
+            # (bat_size * from_target * to_target) -> (bat_size * to_target)
             mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)
 
-            ## effective updated partition part, only keep the partition value of mask value = 1
+            # effective updated partition part, only keep the partition value of mask value = 1
             mask_idx = mask_idx.byte()
             masked_cur_partition = cur_partition.masked_select(mask_idx)
-            ## let mask_idx broadcastable, to disable warning
+            # let mask_idx broadcastable, to disable warning
             mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)
 
-            ## replace the partition where the maskvalue=1, other partition value keeps the same
+            # replace the partition where the maskvalue=1, other partition value keeps the same
             partition.masked_scatter_(mask_idx, masked_cur_partition)
-        # until the last state, add transition score for all partition (and do log_sum_exp) then select the value in STOP_TAG
-        cur_values = self.transitions.view(1,tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+        # until the last state, add transition score
+        # for all partition (and do log_sum_exp) then select the value in STOP_TAG
+        cur_values = self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size,
+                                                                         tag_size) + partition.contiguous().view(
+            batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
         cur_partition = log_sum_exp(cur_values, tag_size)
         final_partition = cur_partition[:, STOP_TAG]
         return final_partition.sum(), scores
@@ -123,26 +129,26 @@ def _viterbi_decode(self, feats, mask):
         batch_size = feats.size(0)
         seq_len = feats.size(1)
         tag_size = feats.size(2)
-        assert(tag_size == self.tagset_size+2)
-        ## calculate sentence length for each sentence
-        length_mask = torch.sum(mask.long(), dim = 1).view(batch_size,1).long()
-        ## mask to (seq_len, batch_size)
-        mask = mask.transpose(1,0).contiguous()
+        assert (tag_size == self.tagset_size + 2)
+        # calculate sentence length for each sentence
+        length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+        # mask to (seq_len, batch_size)
+        mask = mask.transpose(1, 0).contiguous()
         ins_num = seq_len * batch_size
-        ## be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
-        feats = feats.transpose(1,0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
-        ## need to consider start
-        scores = feats + self.transitions.view(1,tag_size,tag_size).expand(ins_num, tag_size, tag_size)
+        # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+        feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+        # need to consider start
+        scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
         scores = scores.view(seq_len, batch_size, tag_size, tag_size)
 
         # build iter
         seq_iter = enumerate(scores)
-        ## record the position of best score
+        # record the position of best score
         back_points = list()
         partition_history = list()
-        ##  reverse mask (bug for mask = 1- mask, use this as alternative choice)
+        #  reverse mask (bug for mask = 1- mask, use this as alternative choice)
         # mask = 1 + (-1)*mask
-        mask =  (1 - mask.long()).byte()
+        mask = (1 - mask.long()).byte()
         _, inivalues = next(seq_iter)  # bat_size * from_target_size * to_target_size
         # only need start from start_tag
         partition = inivalues[:, START_TAG, :].clone().view(batch_size, tag_size)  # bat_size * to_target_size
@@ -153,8 +159,9 @@ def _viterbi_decode(self, feats, mask):
             # previous to_target is current from_target
             # partition: previous results log(exp(from_target)), #(batch_size * from_target)
             # cur_values: batch_size * from_target * to_target
-            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
-            ## forscores, cur_bp = torch.max(cur_values[:,:-2,:], 1) # do not consider START_TAG/STOP_TAG
+            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size,
+                                                                                                  tag_size)
+            # forscores, cur_bp = torch.max(cur_values[:,:-2,:], 1) # do not consider START_TAG/STOP_TAG
             # print "cur value:", cur_values.size()
             partition, cur_bp = torch.max(cur_values, 1)
             # print "partsize:",partition.size()
@@ -163,46 +170,50 @@ def _viterbi_decode(self, feats, mask):
             # print cur_bp
             # print "one best, ",idx
             partition_history.append(partition)
-            ## cur_bp: (batch_size, tag_size) max source score position in current tag
-            ## set padded label as 0, which will be filtered in post processing
+            # cur_bp: (batch_size, tag_size) max source score position in current tag
+            # set padded label as 0, which will be filtered in post processing
             cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
             back_points.append(cur_bp)
         # exit(0)
-        ### add score to final STOP_TAG
-        partition_history = torch.cat(partition_history, 0).view(seq_len, batch_size, -1).transpose(1,0).contiguous() ## (batch_size, seq_len. tag_size)
-        ### get the last position for each setences, and select the last partitions using gather()
-        last_position = length_mask.view(batch_size,1,1).expand(batch_size, 1, tag_size) -1
-        last_partition = torch.gather(partition_history, 1, last_position).view(batch_size,tag_size,1)
-        ### calculate the score from last partition to end state (and then select the STOP_TAG from it)
-        last_values = last_partition.expand(batch_size, tag_size, tag_size) + self.transitions.view(1,tag_size, tag_size).expand(batch_size, tag_size, tag_size)
+        # add score to final STOP_TAG
+        partition_history = torch.cat(partition_history, 0).view(seq_len, batch_size, -1).transpose(1,
+                                                                                                    0).contiguous()
+        # (batch_size, seq_len. tag_size)
+        # get the last position for each setences, and select the last partitions using gather()
+        last_position = length_mask.view(batch_size, 1, 1).expand(batch_size, 1, tag_size) - 1
+        last_partition = torch.gather(partition_history, 1, last_position).view(batch_size, tag_size, 1)
+        # calculate the score from last partition to end state (and then select the STOP_TAG from it)
+        last_values = last_partition.expand(batch_size, tag_size, tag_size) + self.transitions.view(1, tag_size,
+                                                                                                    tag_size).expand(
+            batch_size, tag_size, tag_size)
         _, last_bp = torch.max(last_values, 1)
         pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size)).long()
-        if self.gpu:
-            pad_zero = pad_zero.cuda()
+        if self.device:
+            pad_zero = pad_zero.to(self.device)
         back_points.append(pad_zero)
-        back_points  =  torch.cat(back_points).view(seq_len, batch_size, tag_size)
+        back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size)
 
-        ## select end ids in STOP_TAG
+        # select end ids in STOP_TAG
         pointer = last_bp[:, STOP_TAG]
-        insert_last = pointer.contiguous().view(batch_size,1,1).expand(batch_size,1, tag_size)
-        back_points = back_points.transpose(1,0).contiguous()
-        ## move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
+        insert_last = pointer.contiguous().view(batch_size, 1, 1).expand(batch_size, 1, tag_size)
+        back_points = back_points.transpose(1, 0).contiguous()
+        # move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
         # print "lp:",last_position
         # print "il:",insert_last
         back_points.scatter_(1, last_position, insert_last)
         # print "bp:",back_points
         # exit(0)
-        back_points = back_points.transpose(1,0).contiguous()
-        ## decode from the end, padded position ids are 0, which will be filtered if following evaluation
+        back_points = back_points.transpose(1, 0).contiguous()
+        # decode from the end, padded position ids are 0, which will be filtered if following evaluation
         decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size))
-        if self.gpu:
-            decode_idx = decode_idx.cuda()
+        if self.device:
+            decode_idx = decode_idx.to(self.device)
         decode_idx[-1] = pointer.data
-        for idx in range(len(back_points)-2, -1, -1):
+        for idx in range(len(back_points) - 2, -1, -1):
             pointer = torch.gather(back_points[idx], 1, pointer.contiguous().view(batch_size, 1))
             decode_idx[idx] = pointer.data
         path_score = None
-        decode_idx = decode_idx.transpose(1,0)
+        decode_idx = decode_idx.transpose(1, 0)
         return path_score, decode_idx
 
     def forward(self, feats):
@@ -222,56 +233,53 @@ def _score_sentence(self, scores, mask, tags):
         batch_size = scores.size(1)
         seq_len = scores.size(0)
         tag_size = scores.size(2)
-        ## convert tag value into a new format, recorded label bigram information to index
+        # convert tag value into a new format, recorded label bigram information to index
         new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
-        if self.gpu:
-            new_tags = new_tags.cuda()
+        if self.device:
+            new_tags = new_tags.to(self.device)
         for idx in range(seq_len):
             if idx == 0:
-                ## start -> first score
-                new_tags[:,0] =  (tag_size - 2)*tag_size + tags[:,0]
+                # start -> first score
+                new_tags[:, 0] = (tag_size - 2) * tag_size + tags[:, 0]
 
             else:
-                new_tags[:,idx] =  tags[:,idx-1]*tag_size + tags[:,idx]
+                new_tags[:, idx] = tags[:, idx - 1] * tag_size + tags[:, idx]
 
-        ## transition for label to STOP_TAG
-        end_transition = self.transitions[:,STOP_TAG].contiguous().view(1, tag_size).expand(batch_size, tag_size)
-        ## length for batch,  last word position = length - 1
-        length_mask = torch.sum(mask.long(), dim = 1).view(batch_size,1).long()
-        ## index the label id of last word
+        # transition for label to STOP_TAG
+        end_transition = self.transitions[:, STOP_TAG].contiguous().view(1, tag_size).expand(batch_size, tag_size)
+        # length for batch,  last word position = length - 1
+        length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+        # index the label id of last word
         end_ids = torch.gather(tags, 1, length_mask - 1)
 
-        ## index the transition score for end_id to STOP_TAG
+        # index the transition score for end_id to STOP_TAG
         end_energy = torch.gather(end_transition, 1, end_ids)
 
-        ## convert tag as (seq_len, batch_size, 1)
-        new_tags = new_tags.transpose(1,0).contiguous().view(seq_len, batch_size, 1)
-        ### need convert tags id to search from 400 positions of scores
-        tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size)  # seq_len * bat_size
-        ## mask transpose to (seq_len, batch_size)
+        # convert tag as (seq_len, batch_size, 1)
+        new_tags = new_tags.transpose(1, 0).contiguous().view(seq_len, batch_size, 1)
+        # need convert tags id to search from 400 positions of scores
+        tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len,
+                                                                                         batch_size)
+        # seq_len * bat_size
+        # mask transpose to (seq_len, batch_size)
         mask = mask.byte()
-        tg_energy = tg_energy.masked_select(mask.transpose(1,0))
+        tg_energy = tg_energy.masked_select(mask.transpose(1, 0))
 
-        # ## calculate the score from START_TAG to first label
+        # # calculate the score from START_TAG to first label
         # start_transition = self.transitions[START_TAG,:].view(1, tag_size).expand(batch_size, tag_size)
         # start_energy = torch.gather(start_transition, 1, tags[0,:])
 
-        ## add all score together
+        # add all score together
         # gold_score = start_energy.sum() + tg_energy.sum() + end_energy.sum()
         gold_score = tg_energy.sum() + end_energy.sum()
         return gold_score
 
     def neg_log_likelihood_loss(self, feats, mask, tags):
         # nonegative log likelihood
-        batch_size = feats.size(0)
         forward_score, scores = self._calculate_PZ(feats, mask)
         gold_score = self._score_sentence(scores, mask, tags)
-        # print "batch, f:", forward_score.data[0], " g:", gold_score.data[0], " dis:", forward_score.data[0] - gold_score.data[0]
-        # exit(0)
         return forward_score - gold_score
 
-
-
     def _viterbi_decode_nbest(self, feats, mask, nbest):
         """
             input:
@@ -285,95 +293,105 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
         batch_size = feats.size(0)
         seq_len = feats.size(1)
         tag_size = feats.size(2)
-        assert(tag_size == self.tagset_size+2)
-        ## calculate sentence length for each sentence
-        length_mask = torch.sum(mask.long(), dim = 1).view(batch_size,1).long()
-        ## mask to (seq_len, batch_size)
-        mask = mask.transpose(1,0).contiguous()
+        assert (tag_size == self.tagset_size + 2)
+        # calculate sentence length for each sentence
+        length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+        # mask to (seq_len, batch_size)
+        mask = mask.transpose(1, 0).contiguous()
         ins_num = seq_len * batch_size
-        ## be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
-        feats = feats.transpose(1,0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
-        ## need to consider start
-        scores = feats + self.transitions.view(1,tag_size,tag_size).expand(ins_num, tag_size, tag_size)
+        # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+        feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+        # need to consider start
+        scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
         scores = scores.view(seq_len, batch_size, tag_size, tag_size)
 
         # build iter
         seq_iter = enumerate(scores)
-        ## record the position of best score
+        # record the position of best score
         back_points = list()
         partition_history = list()
-        ##  reverse mask (bug for mask = 1- mask, use this as alternative choice)
+        #  reverse mask (bug for mask = 1- mask, use this as alternative choice)
         # mask = 1 + (-1)*mask
-        mask =  (1 - mask.long()).byte()
+        mask = (1 - mask.long()).byte()
         _, inivalues = next(seq_iter)  # bat_size * from_target_size * to_target_size
         # only need start from start_tag
         partition = inivalues[:, START_TAG, :].clone()  # bat_size * to_target_size
-        ## initial partition [batch_size, tag_size]
+        # initial partition [batch_size, tag_size]
         partition_history.append(partition.view(batch_size, tag_size, 1).expand(batch_size, tag_size, nbest))
         # iter over last scores
         for idx, cur_values in seq_iter:
             if idx == 1:
-                cur_values = cur_values.view(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+                cur_values = cur_values.view(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size,
+                                                                                                           tag_size,
+                                                                                                           1).expand(
+                    batch_size, tag_size, tag_size)
             else:
                 # previous to_target is current from_target
                 # partition: previous results log(exp(from_target)), #(batch_size * nbest * from_target)
                 # cur_values: batch_size * from_target * to_target
-                cur_values = cur_values.view(batch_size, tag_size, 1, tag_size).expand(batch_size, tag_size, nbest, tag_size) + partition.contiguous().view(batch_size, tag_size, nbest, 1).expand(batch_size, tag_size, nbest, tag_size)
-                ## compare all nbest and all from target
-                cur_values = cur_values.view(batch_size, tag_size*nbest, tag_size)
+                cur_values = cur_values.view(batch_size, tag_size, 1, tag_size).expand(
+                    batch_size, tag_size, nbest, tag_size) + partition.contiguous().view(
+                    batch_size, tag_size, nbest, 1).expand(batch_size, tag_size, nbest, tag_size)
+                # compare all nbest and all from target
+                cur_values = cur_values.view(batch_size, tag_size * nbest, tag_size)
                 # print "cur size:",cur_values.size()
             partition, cur_bp = torch.topk(cur_values, nbest, 1)
-            ## cur_bp/partition: [batch_size, nbest, tag_size], id should be normize through nbest in following backtrace step
+            # cur_bp/partition: [batch_size, nbest, tag_size],
+            # id should be normize through nbest in following backtrace step
             # print partition[:,0,:]
             # print cur_bp[:,0,:]
             # print "nbest, ",idx
             if idx == 1:
-                cur_bp = cur_bp*nbest
-            partition = partition.transpose(2,1)
-            cur_bp = cur_bp.transpose(2,1)
+                cur_bp = cur_bp * nbest
+            partition = partition.transpose(2, 1)
+            cur_bp = cur_bp.transpose(2, 1)
 
             # print partition
             # exit(0)
-            #partition: (batch_size * to_target * nbest)
-            #cur_bp: (batch_size * to_target * nbest) Notice the cur_bp number is the whole position of tag_size*nbest, need to convert when decode
+            # partition: (batch_size * to_target * nbest)
+            # cur_bp: (batch_size * to_target * nbest)
+            # Notice the cur_bp number is the whole position of tag_size*nbest, need to convert when decode
             partition_history.append(partition)
-            ## cur_bp: (batch_size,nbest, tag_size) topn source score position in current tag
-            ## set padded label as 0, which will be filtered in post processing
-            ## mask[idx] ? mask[idx-1]
+            # cur_bp: (batch_size,nbest, tag_size) topn source score position in current tag
+            # set padded label as 0, which will be filtered in post processing
+            # mask[idx] ? mask[idx-1]
             cur_bp.masked_fill_(mask[idx].view(batch_size, 1, 1).expand(batch_size, tag_size, nbest), 0)
             # print cur_bp[0]
             back_points.append(cur_bp)
-        ### add score to final STOP_TAG
-        partition_history = torch.cat(partition_history,0).view(seq_len, batch_size, tag_size, nbest).transpose(1,0).contiguous() ## (batch_size, seq_len, nbest, tag_size)
-        ### get the last position for each setences, and select the last partitions using gather()
-        last_position = length_mask.view(batch_size,1,1,1).expand(batch_size, 1, tag_size, nbest) - 1
+        # add score to final STOP_TAG
+        partition_history = torch.cat(partition_history, 0).view(
+            seq_len, batch_size, tag_size, nbest).transpose(1, 0).contiguous()
+        # (batch_size, seq_len, nbest, tag_size)
+        # get the last position for each setences, and select the last partitions using gather()
+        last_position = length_mask.view(batch_size, 1, 1, 1).expand(batch_size, 1, tag_size, nbest) - 1
         last_partition = torch.gather(partition_history, 1, last_position).view(batch_size, tag_size, nbest, 1)
-        ### calculate the score from last partition to end state (and then select the STOP_TAG from it)
-        last_values = last_partition.expand(batch_size, tag_size, nbest, tag_size) + self.transitions.view(1, tag_size, 1, tag_size).expand(batch_size, tag_size, nbest, tag_size)
-        last_values = last_values.view(batch_size, tag_size*nbest, tag_size)
+        # calculate the score from last partition to end state (and then select the STOP_TAG from it)
+        last_values = last_partition.expand(batch_size, tag_size, nbest, tag_size) + self.transitions.view(
+            1, tag_size, 1, tag_size).expand(batch_size, tag_size, nbest, tag_size)
+        last_values = last_values.view(batch_size, tag_size * nbest, tag_size)
         end_partition, end_bp = torch.topk(last_values, nbest, 1)
-        ## end_partition: (batch, nbest, tag_size)
-        end_bp = end_bp.transpose(2,1)
+        # end_partition: (batch, nbest, tag_size)
+        end_bp = end_bp.transpose(2, 1)
         # end_bp: (batch, tag_size, nbest)
         pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size, nbest)).long()
-        if self.gpu:
-            pad_zero = pad_zero.cuda()
+        if self.device:
+            pad_zero = pad_zero.to(self.device)
         back_points.append(pad_zero)
         back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size, nbest)
 
-        ## select end ids in STOP_TAG
-        pointer = end_bp[:, STOP_TAG, :] ## (batch_size, nbest)
+        # select end ids in STOP_TAG
+        pointer = end_bp[:, STOP_TAG, :]  # (batch_size, nbest)
         insert_last = pointer.contiguous().view(batch_size, 1, 1, nbest).expand(batch_size, 1, tag_size, nbest)
-        back_points = back_points.transpose(1,0).contiguous()
-        ## move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
+        back_points = back_points.transpose(1, 0).contiguous()
+        # move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
         # print "lp:",last_position
         # print "il:",insert_last[0]
         # exit(0)
-        ## copy the ids of last position:insert_last to back_points, though the last_position index
-        ## last_position includes the length of batch sentences
+        # copy the ids of last position:insert_last to back_points, though the last_position index
+        # last_position includes the length of batch sentences
         # print "old:", back_points[9,0,:,:]
         back_points.scatter_(1, last_position, insert_last)
-        ## back_points: [batch_size, seq_length, tag_size, nbest]
+        # back_points: [batch_size, seq_length, tag_size, nbest]
         # print "new:", back_points[9,0,:,:]
         # exit(0)
         # print pointer[2]
@@ -384,42 +402,36 @@ def _viterbi_decode_nbest(self, feats, mask, nbest):
         x,x,6,0,0,0,0,0,0,0
         '''
 
-        back_points = back_points.transpose(1,0).contiguous()
+        back_points = back_points.transpose(1, 0).contiguous()
         # print back_points[0]
-        ## back_points: (seq_len, batch, tag_size, nbest)
-        ## decode from the end, padded position ids are 0, which will be filtered in following evaluation
+        # back_points: (seq_len, batch, tag_size, nbest)
+        # decode from the end, padded position ids are 0, which will be filtered in following evaluation
         decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size, nbest))
-        if self.gpu:
-            decode_idx = decode_idx.cuda()
-        decode_idx[-1] = pointer.data/nbest
+        if self.device:
+            decode_idx = decode_idx.to(self.device)
+        decode_idx[-1] = pointer.data / nbest
         # print "pointer-1:",pointer[2]
         # exit(0)
         # use old mask, let 0 means has token
-        for idx in range(len(back_points)-2, -1, -1):
+        for idx in range(len(back_points) - 2, -1, -1):
             # print "pointer: ",idx,  pointer[3]
             # print "back:",back_points[idx][3]
             # print "mask:",mask[idx+1,3]
-            new_pointer = torch.gather(back_points[idx].view(batch_size, tag_size*nbest), 1, pointer.contiguous().view(batch_size,nbest))
-            decode_idx[idx] = new_pointer.data/nbest
+            new_pointer = torch.gather(back_points[idx].view(batch_size, tag_size * nbest), 1,
+                                       pointer.contiguous().view(batch_size, nbest))
+            decode_idx[idx] = new_pointer.data / nbest
             # # use new pointer to remember the last end nbest ids for non longest
-            pointer = new_pointer + pointer.contiguous().view(batch_size,nbest)*mask[idx].view(batch_size,1).expand(batch_size, nbest).long()
+            pointer = new_pointer + pointer.contiguous().view(batch_size, nbest) * mask[idx].view(batch_size, 1).expand(
+                batch_size, nbest).long()
 
-        # exit(0)
-        path_score = None
-        decode_idx = decode_idx.transpose(1,0)
-        ## decode_idx: [batch, seq_len, nbest]
-        # print decode_idx[:,:,0]
-        # print "nbest:",nbest
-        # print "diff:", decode_idx[:,:,0]- decode_idx[:,:,4]
-        # print decode_idx[:,0,:]
-        # exit(0)
+        decode_idx = decode_idx.transpose(1, 0)
 
-        ### calculate probability for each sequence
+        # calculate probability for each sequence
         scores = end_partition[:, :, STOP_TAG]
-        ## scores: [batch_size, nbest]
-        max_scores,_ = torch.max(scores, 1)
-        minus_scores = scores - max_scores.view(batch_size,1).expand(batch_size, nbest)
+        # scores: [batch_size, nbest]
+        max_scores, _ = torch.max(scores, 1)
+        minus_scores = scores - max_scores.view(batch_size, 1).expand(batch_size, nbest)
         path_score = F.softmax(minus_scores, 1)
-        ## path_score: [batch_size, nbest]
+        # path_score: [batch_size, nbest]
         # exit(0)
         return path_score, decode_idx
diff --git a/modules/models/__init__.py b/modules/models/__init__.py
index 1345775..e69de29 100644
--- a/modules/models/__init__.py
+++ b/modules/models/__init__.py
@@ -1,4 +0,0 @@
-from .bert_models import BertBiLSTMCRF, BertBiLSTMAttnCRF
-
-
-__all__ = ["BertBiLSTMCRF", "BertBiLSTMAttnCRF"]
diff --git a/modules/models/bert_models.py b/modules/models/bert_models.py
index 019f3eb..d52d82e 100644
--- a/modules/models/bert_models.py
+++ b/modules/models/bert_models.py
@@ -1,34 +1,22 @@
-from modules.layers.encoders import *
 from modules.layers.decoders import *
 from modules.layers.embedders import *
+from modules.layers.layers import BiLSTM, MultiHeadAttention
 import abc
-import sys
-from .released_models import released_models
 
 
-class NerModel(nn.Module, metaclass=abc.ABCMeta):
-
-    """Base class for all Models"""
-    def __init__(self, encoder, decoder, use_cuda=True):
-        super(NerModel, self).__init__()
-        self.encoder = encoder
-        self.decoder = decoder
-        self.use_cuda = use_cuda
-        if use_cuda:
-            self.cuda()
+class BERTNerModel(nn.Module, metaclass=abc.ABCMeta):
+    """Base class for all BERT Models"""
 
     @abc.abstractmethod
-    def forward(self, *batch):
-        # return self.decoder(self.encoder(batch))
+    def forward(self, batch):
         raise NotImplementedError("abstract method forward must be implemented")
 
     @abc.abstractmethod
-    def score(self, *batch):
-        # return self.decoder.score(self.encoder(batch))
+    def score(self, batch):
         raise NotImplementedError("abstract method score must be implemented")
 
     @abc.abstractmethod
-    def create(self, *args):
+    def create(self, *args, **kwargs):
         raise NotImplementedError("abstract method create must be implemented")
 
     def get_n_trainable_params(self):
@@ -41,390 +29,595 @@ def get_n_trainable_params(self):
                 pp += num
         return pp
 
-    def get_config(self):
-        try:
-            config = {
-                "name": self.__class__.__name__,
-                "params": {
-                    "encoder": self.encoder.get_config(),
-                    "decoder": self.decoder.get_config(),
-                    "use_cuda": self.use_cuda
-                }
-            }
-        except AttributeError:
-            config = {}
-            print("config is empty :(. Maybe for this model from_config has not implemented yet.", file=sys.stderr)
-        except NotImplemented:
-            config = {}
-            print("config is empty :(. Maybe for this model from_config has not implemented yet.", file=sys.stderr)
-        return config
+
+class BERTBiLSTMCRF(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, crf, device="cuda"):
+        super(BERTBiLSTMCRF, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.crf = crf
+        self.to(device)
+
+    def forward(self, batch):
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.forward(output, labels_mask)
+
+    def score(self, batch):
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
-    def from_config(cls, config):
-        encoder = released_models["encoder"].from_config(**config["encoder"]["params"])
-        decoder = released_models["decoder"].from_config(**config["decoder"]["params"])
-        return cls(encoder, decoder, config["use_cuda"])
+    def create(cls,
+               label_size,
+               # BertEmbedder params
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM params
+               embedding_size=768, hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
+               # Global params
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+                embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        crf = CRFDecoder.create(label_size, hidden_dim, crf_dropout)
+        return cls(embeddings, lstm, crf, device)
 
 
-class BertBiLSTMCRF(NerModel):
+class BERTBiLSTMNCRF(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, crf, device="cuda"):
+        super(BERTBiLSTMNCRF, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.forward(output, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # CRFDecoder params
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM params
+               embedding_size=768, hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = CRFDecoder.create(label_size, encoder.output_dim, input_dropout)
-        return cls(encoder, decoder, use_cuda)
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+                embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        crf = NCRFDecoder.create(
+            label_size, hidden_dim, crf_dropout, nbest, device=device)
+        return cls(embeddings, lstm, crf, device)
 
 
-class BertBiLSTMAttnCRF(NerModel):
+class BERTAttnCRF(BERTNerModel):
+
+    def __init__(self, embeddings, attn, crf, device="cuda"):
+        super(BERTAttnCRF, self).__init__()
+        self.embeddings = embeddings
+        self.attn = attn
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.forward(output, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnCRFDecoder.create(
-            label_size, encoder.output_dim, input_dropout, key_dim, val_dim, num_heads)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertAttnCRF(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        attn = MultiHeadAttention(key_dim, val_dim, embedding_size, num_heads, attn_dropout)
+        crf = CRFDecoder.create(
+            label_size, embedding_size, crf_dropout)
+        return cls(embeddings, attn, crf, device)
+
+
+class BERTAttnNCRF(BERTNerModel):
+
+    def __init__(self, embeddings, attn, crf, device="cuda"):
+        super(BERTAttnNCRF, self).__init__()
+        self.embeddings = embeddings
+        self.attn = attn
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(*batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.forward(output, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # AttnCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
                # Global params
-               use_cuda=True):
-        encoder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        decoder = AttnCRFDecoder.create(
-            label_size, embedding_dim, input_dropout, key_dim, val_dim, num_heads)
-        return cls(encoder, decoder, use_cuda)
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        attn = MultiHeadAttention(key_dim, val_dim, embedding_size, num_heads, attn_dropout)
+        crf = NCRFDecoder.create(
+            label_size, embedding_size, crf_dropout, nbest=nbest, device=device)
+        return cls(embeddings, attn, crf, device)
 
 
-class BertBiLSTMAttnNMT(NerModel):
-    """Reused from https://github.com/DSKSD/RNN-for-Joint-NLU"""
+class BERTBiLSTMAttnCRF(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, attn, crf, device="cuda"):
+        super(BERTBiLSTMAttnCRF, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.attn = attn
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.forward(output, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTDecoder.create(
-            label_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMAttnNMTCRF(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+            embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = CRFDecoder.create(
+            label_size, hidden_dim, crf_dropout)
+        return cls(embeddings, lstm, attn, crf, device)
+
+
+class BERTBiLSTMAttnNCRF(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, attn, crf, device="cuda"):
+        super(BERTBiLSTMAttnNCRF, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.attn = attn
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.forward(output, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.score(output, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
+               # Global params
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+            embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = NCRFDecoder.create(
+            label_size, hidden_dim, crf_dropout, nbest=nbest, device=device)
+        return cls(embeddings, lstm, attn, crf, device)
+
+
+class BERTBiLSTMAttnNCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, attn, crf, clf, device="cuda"):
+        super(BERTBiLSTMAttnNCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.attn = attn
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
+
+    def forward(self, batch):
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.forward(output, labels_mask), self.clf(output)
+
+    def score(self, batch):
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
+
+    @classmethod
+    def create(cls,
+               label_size, intent_size,
+               # BertEmbedder params
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
+               # Clf params
+               clf_dropout=0.3,
+               # Global params
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+            embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = NCRFDecoder.create(
+            label_size, hidden_dim, crf_dropout, nbest=nbest, device=device)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, lstm, attn, crf, clf, device)
+
+
+class BERTBiLSTMAttnCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, attn, crf, clf, device="cuda"):
+        super(BERTBiLSTMAttnCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.attn = attn
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
+
+    def forward(self, batch):
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.forward(output, labels_mask), self.clf(output)
+
+    def score(self, batch):
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        output, _ = self.attn(output, output, output, None)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
+
+    @classmethod
+    def create(cls,
+               label_size, intent_size,
+               # BertEmbedder params
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
+               # Clf params
+               clf_dropout=0.3,
+               # Global params
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+            embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = CRFDecoder.create(
+            label_size, hidden_dim, crf_dropout)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, lstm, attn, crf, clf, device)
+
+
+class BERTBiLSTMCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, crf, clf, device="cuda"):
+        super(BERTBiLSTMCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
+
+    def forward(self, batch):
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.forward(output, labels_mask), self.clf(output)
+
+    def score(self, batch):
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
+
+    @classmethod
+    def create(cls,
+               label_size, intent_size,
+               # BertEmbedder params
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM params
+               embedding_size=768, hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
+               # Clf params
+               clf_dropout=0.3,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTCRFDecoder.create(
-            label_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMAttnCRFJoint(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+                embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        crf = CRFDecoder.create(label_size, hidden_dim, crf_dropout)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, lstm, crf, clf, device)
+
+
+class BERTBiLSTMNCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, lstm, crf, clf, device="cuda"):
+        super(BERTBiLSTMNCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.lstm = lstm
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.forward(output, labels_mask), self.clf(output)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.lstm.forward(input_embeddings, labels_mask)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
 
     @classmethod
     def create(cls,
                label_size, intent_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM params
+               embedding_size=768, hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5, nbest=1,
+               # Clf params
+               clf_dropout=0.3,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnCRFJointDecoder.create(
-            label_size, encoder.output_dim, intent_size, input_dropout, key_dim, val_dim, num_heads)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMAttnNMTJoint(NerModel):
-    """Reused from https://github.com/DSKSD/RNN-for-Joint-NLU"""
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        lstm = BiLSTM.create(
+                embedding_size=embedding_size, hidden_dim=hidden_dim, rnn_layers=rnn_layers, dropout=lstm_dropout)
+        crf = NCRFDecoder.create(label_size, hidden_dim, crf_dropout, nbest=nbest, device=device)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, lstm, crf, clf, device)
+
+
+class BERTAttnCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, attn, crf, clf, device="cuda"):
+        super(BERTAttnCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.attn = attn
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.forward(output, labels_mask), self.clf(output)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
 
     @classmethod
     def create(cls,
                label_size, intent_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # CRFDecoder params
+               crf_dropout=0.5,
+               # Clf params
+               clf_dropout=0.3,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTJointDecoder.create(
-            label_size, intent_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMAttnNCRFJoint(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = CRFDecoder.create(
+            label_size, hidden_dim, crf_dropout)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, attn, crf, clf, device)
+
+
+class BERTAttnNCRFJoint(BERTNerModel):
+
+    def __init__(self, embeddings, attn, crf, clf, device="cuda"):
+        super(BERTAttnNCRFJoint, self).__init__()
+        self.embeddings = embeddings
+        self.attn = attn
+        self.crf = crf
+        self.clf = clf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.forward(output, labels_mask), self.clf(output)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
+        input_, labels_mask, input_type_ids, labels, cls_ids = batch
+        input_embeddings = self.embeddings(batch)
+        output, _ = self.attn(input_embeddings, input_embeddings, input_embeddings, None)
+        return self.crf.score(output, labels_mask, labels) + self.clf.score(output, cls_ids)
 
     @classmethod
     def create(cls,
                label_size, intent_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnNCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               # BiLSTM
+               hidden_dim=512, rnn_layers=1, lstm_dropout=0.3,
+               # Attn params
+               embedding_size=768, key_dim=64, val_dim=64, num_heads=3, attn_dropout=0.3,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
+               # Clf params
+               clf_dropout=0.3,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None,
-               # NCRFpp
-               nbest=8):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnNCRFJointDecoder.create(
-            label_size, encoder.output_dim, intent_size, input_dropout, key_dim, val_dim, num_heads, use_cuda,
-            nbest=nbest)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMAttnNCRF(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        attn = MultiHeadAttention(key_dim, val_dim, hidden_dim, num_heads, attn_dropout)
+        crf = NCRFDecoder.create(
+            label_size, hidden_dim, crf_dropout, nbest=nbest, device=device)
+        clf = ClassDecoder(intent_size, hidden_dim, clf_dropout)
+        return cls(embeddings, attn, crf, clf, device)
+
+
+class BERTNCRF(BERTNerModel):
+
+    def __init__(self, embeddings, crf, device="cuda"):
+        super(BERTNCRF, self).__init__()
+        self.embeddings = embeddings
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        return self.crf.forward(input_embeddings, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        return self.crf.score(input_embeddings, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnNCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               embedding_size=768,
+               # NCRFDecoder params
+               crf_dropout=0.5, nbest=1,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None,
-               # NCRFpp
-               nbest=8):
-        embedder = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda, bert_mode, freeze)
-        if meta_dim is None:
-            encoder = BertBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        else:
-            encoder = BertMetaBiLSTMEncoder.create(embedder, meta_dim, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnNCRFDecoder.create(
-            label_size, encoder.output_dim, input_dropout, key_dim, val_dim, num_heads, nbest)
-        return cls(encoder, decoder, use_cuda)
-
-
-class BertBiLSTMNCRF(NerModel):
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        crf = NCRFDecoder.create(
+            label_size, embedding_size, crf_dropout, nbest=nbest, device=device)
+        return cls(embeddings, crf, device)
+
+
+class BERTCRF(BERTNerModel):
+
+    def __init__(self, embeddings, crf, device="cuda"):
+        super(BERTCRF, self).__init__()
+        self.embeddings = embeddings
+        self.crf = crf
+        self.to(device)
 
     def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
+        input_, labels_mask, input_type_ids = batch[:3]
+        input_embeddings = self.embeddings(batch)
+        return self.crf.forward(input_embeddings, labels_mask)
 
     def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
+        input_, labels_mask, input_type_ids, labels = batch
+        input_embeddings = self.embeddings(batch)
+        return self.crf.score(input_embeddings, labels_mask, labels)
 
     @classmethod
     def create(cls,
                label_size,
                # BertEmbedder params
-               bert_config_file, init_checkpoint_pt, embedding_dim=768, bert_mode="weighted",
-               freeze=True,
-               # BertBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               input_dropout=0.5,
-               output_dropout=0.4,
+               model_name='bert-base-multilingual-cased', mode="weighted", is_freeze=True,
+               embedding_size=768,
+               # NCRFDecoder params
+               crf_dropout=0.5,
                # Global params
-               use_cuda=True,
-               # Meta
-               meta_dim=None,
-               vocab_meta_dim=None,
-               # NCRFpp
-               nbest=8):
-        embeddings = BertEmbedder.create(
-            bert_config_file, init_checkpoint_pt, embedding_dim, use_cuda,
-            bert_mode, freeze)
-        meta_embeddings = None
-        if meta_dim is not None and vocab_meta_dim is not None:
-            meta_embeddings = nn.Embedding(vocab_meta_dim, meta_dim, padding_idx=0)
-        encoder = BertMetaBiLSTMEncoder.create(
-            embeddings, meta_embeddings, enc_hidden_dim, rnn_layers, input_dropout, use_cuda)
-        decoder = NCRFDecoder.create(
-            label_size, encoder.output_dim, output_dropout, nbest)
-        return cls(encoder, decoder, use_cuda)
+               device="cuda"):
+        embeddings = BERTEmbedder.create(model_name=model_name, device=device, mode=mode, is_freeze=is_freeze)
+        crf = CRFDecoder.create(
+            label_size, embedding_size, crf_dropout)
+        return cls(embeddings, crf, device)
diff --git a/modules/models/elmo_models.py b/modules/models/elmo_models.py
deleted file mode 100644
index 2a38d06..0000000
--- a/modules/models/elmo_models.py
+++ /dev/null
@@ -1,227 +0,0 @@
-from modules.layers.encoders import *
-from modules.layers.decoders import *
-from modules.layers.embedders import *
-import abc
-
-
-class NerModel(nn.Module, metaclass=abc.ABCMeta):
-    """Base class for all Models"""
-    def __init__(self, encoder, decoder, use_cuda=True):
-        super(NerModel, self).__init__()
-        self.encoder = encoder
-        self.decoder = decoder
-        if use_cuda:
-            self.cuda()
-
-    @abc.abstractmethod
-    def forward(self, *batch):
-        # return self.decoder(self.encoder(batch))
-        raise NotImplementedError("abstract method forward must be implemented")
-
-    @abc.abstractmethod
-    def score(self, *batch):
-        # return self.decoder.score(self.encoder(batch))
-        raise NotImplementedError("abstract method score must be implemented")
-
-    @abc.abstractmethod
-    def create(self, *args):
-        raise NotImplementedError("abstract method create must be implemented")
-
-    def get_n_trainable_params(self):
-        pp = 0
-        for p in list(self.parameters()):
-            if p.requires_grad:
-                num = 1
-                for s in list(p.size()):
-                    num = num * s
-                pp += num
-        return pp
-
-
-class ElmoBiLSTMCRF(NerModel):
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
-
-    @classmethod
-    def create(cls,
-               label_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # CRFDecoder params
-               input_dropout=0.5,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = CRFDecoder.create(label_size, encoder.output_dim, input_dropout)
-        return cls(encoder, decoder, use_cuda)
-
-
-class ElmoBiLSTMAttnCRF(NerModel):
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
-
-    @classmethod
-    def create(cls,
-               label_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnCRFDecoder.create(
-            label_size, encoder.output_dim, input_dropout, key_dim, val_dim, num_heads)
-        return cls(encoder, decoder, use_cuda)
-
-
-class ElmoBiLSTMAttnNMT(NerModel):
-    """Reused from https://github.com/DSKSD/RNN-for-Joint-NLU"""
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
-
-    @classmethod
-    def create(cls,
-               label_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTDecoder.create(
-            label_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
-
-
-class ElmoBiLSTMAttnNMTCRF(NerModel):
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1])
-
-    @classmethod
-    def create(cls,
-               label_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTCRFDecoder.create(
-            label_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
-
-
-class ElmoBiLSTMAttnCRFJoint(NerModel):
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
-
-    @classmethod
-    def create(cls,
-               label_size, intent_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # AttnCRFDecoder params
-               key_dim=64, val_dim=64, num_heads=3,
-               input_dropout=0.5,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = AttnCRFJointDecoder.create(
-            label_size, encoder.output_dim, intent_size, input_dropout, key_dim, val_dim, num_heads)
-        return cls(encoder, decoder, use_cuda)
-
-
-class ElmoBiLSTMAttnNMTJoint(NerModel):
-    """Reused from https://github.com/DSKSD/RNN-for-Joint-NLU"""
-
-    def forward(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder(output, batch[-2])
-
-    def score(self, batch):
-        output, _ = self.encoder(batch)
-        return self.decoder.score(output, batch[-2], batch[-1], batch[-3])
-
-    @classmethod
-    def create(cls,
-               label_size, intent_size,
-               # ElmoEmbedder params
-               model_dir, config_name, embedding_dim=1024, elmo_mode="avg",
-               freeze=True,
-               # ElmoBiLSTMEncoder params
-               enc_hidden_dim=128, rnn_layers=1,
-               # NMTDecoder params
-               dec_embedding_dim=64, dec_hidden_dim=256, dec_rnn_layers=1,
-               input_dropout=0.5, pad_idx=0,
-               # Global params
-               use_cuda=True):
-        embedder = ElmoEmbedder.create(
-            model_dir, config_name, embedding_dim, use_cuda, elmo_mode, freeze)
-        encoder = ElmoBiLSTMEncoder.create(embedder, enc_hidden_dim, rnn_layers, use_cuda)
-        decoder = NMTJointDecoder.create(
-            label_size, intent_size, dec_embedding_dim, dec_hidden_dim,
-            dec_rnn_layers, input_dropout, pad_idx, use_cuda)
-        return cls(encoder, decoder, use_cuda)
diff --git a/modules/models/released_models.py b/modules/models/released_models.py
deleted file mode 100644
index c8bc0a6..0000000
--- a/modules/models/released_models.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# from modules.layers.encoders import *
-# from modules.layers.decoders import *
-
-
-# released_models = {
-#    "BertBiLSTMNCRF": {
-#        "encoder": BertBiLSTMEncoder,
-#        "decoder": NCRFDecoder
-#    }
-# }
diff --git a/modules/train/clr.py b/modules/train/clr.py
deleted file mode 100644
index a1e4109..0000000
--- a/modules/train/clr.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Reused from https://github.com/anandsaha/pytorch.cyclic.learning.rate/blob/master/cls.py
-import numpy as np
-
-
-class CyclicLR(object):
-    """Sets the learning rate of each parameter group according to
-    cyclical learning rate policy (CLR). The policy cycles the learning
-    rate between two boundaries with a constant frequency, as detailed in
-    the paper `Cyclical Learning Rates for Training Neural Networks`_.
-    The distance between the two boundaries can be scaled on a per-iteration
-    or per-cycle basis.
-    Cyclical learning rate policy changes the learning rate after every batch.
-    `batch_step` should be called after a batch has been used for training.
-    To resume training, save `last_batch_iteration` and use it to instantiate `CycleLR`.
-    This class has three built-in policies, as put forth in the paper:
-    "triangular":
-        A basic triangular cycle w/ no amplitude scaling.
-    "triangular2":
-        A basic triangular cycle that scales initial amplitude by half each cycle.
-    "exp_range":
-        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
-        cycle iteration.
-    This implementation was adapted from the github repo: `bckenstler/CLR`_
-    Args:
-        optimizer (Optimizer): Wrapped optimizer.
-        base_lr (float or list): Initial learning rate which is the
-            lower boundary in the cycle for eachparam groups.
-            Default: 0.001
-        max_lr (float or list): Upper boundaries in the cycle for
-            each parameter group. Functionally,
-            it defines the cycle amplitude (max_lr - base_lr).
-            The lr at any cycle is the sum of base_lr
-            and some scaling of the amplitude; therefore
-            max_lr may not actually be reached depending on
-            scaling function. Default: 0.006
-        step_size (int): Number of training iterations per
-            half cycle. Authors suggest setting step_size
-            2-8 x training iterations in epoch. Default: 2000
-        mode (str): One of {triangular, triangular2, exp_range}.
-            Values correspond to policies detailed above.
-            If scale_fn is not None, this argument is ignored.
-            Default: 'triangular'
-        gamma (float): Constant in 'exp_range' scaling function:
-            gamma**(cycle iterations)
-            Default: 1.0
-        scale_fn (function): Custom scaling policy defined by a single
-            argument lambda function, where
-            0 <= scale_fn(x) <= 1 for all x >= 0.
-            mode paramater is ignored
-            Default: None
-        scale_mode (str): {'cycle', 'iterations'}.
-            Defines whether scale_fn is evaluated on
-            cycle number or cycle iterations (training
-            iterations since start of cycle).
-            Default: 'cycle'
-        last_batch_iteration (int): The index of the last batch. Default: -1
-    Example:
-        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
-        >>> scheduler = torch.optim.CyclicLR(optimizer)
-        >>> data_loader = torch.utils.data.DataLoader(...)
-        >>> for epoch in range(10):
-        >>>     for batch in data_loader:
-        >>>         scheduler.batch_step()
-        >>>         train_batch(...)
-    .. _Cyclical Learning Rates for Training Neural Networks: https://arxiv.org/abs/1506.01186
-    .. _bckenstler/CLR: https://github.com/bckenstler/CLR
-    """
-
-    def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3,
-                 step_size=2000, mode='triangular', gamma=1.,
-                 scale_fn=None, scale_mode='cycle', last_batch_iteration=-1,
-                 decay_rate=0.9, decay_step=10000):
-
-        self.optimizer = optimizer
-
-        if isinstance(base_lr, list) or isinstance(base_lr, tuple):
-            if len(base_lr) != len(optimizer.param_groups):
-                raise ValueError("expected {} base_lr, got {}".format(
-                    len(optimizer.param_groups), len(base_lr)))
-            self.base_lrs = list(base_lr)
-        else:
-            self.base_lrs = [base_lr] * len(optimizer.param_groups)
-
-        if isinstance(max_lr, list) or isinstance(max_lr, tuple):
-            if len(max_lr) != len(optimizer.param_groups):
-                raise ValueError("expected {} max_lr, got {}".format(
-                    len(optimizer.param_groups), len(max_lr)))
-            self.max_lrs = list(max_lr)
-        else:
-            self.max_lrs = [max_lr] * len(optimizer.param_groups)
-        
-        self.base_lrs = self.max_lrs
-        self.step_size = step_size
-
-        if mode not in ['triangular', 'triangular2', 'exp_range'] \
-                and scale_fn is None:
-            raise ValueError('mode is invalid and scale_fn is None')
-
-        self.mode = mode
-        self.gamma = gamma
-
-        if scale_fn is None:
-            if self.mode == 'triangular':
-                self.scale_fn = self._triangular_scale_fn
-                self.scale_mode = 'cycle'
-            elif self.mode == 'triangular2':
-                self.scale_fn = self._triangular2_scale_fn
-                self.scale_mode = 'cycle'
-            elif self.mode == 'exp_range':
-                self.scale_fn = self._exp_range_scale_fn
-                self.scale_mode = 'iterations'
-        else:
-            self.scale_fn = scale_fn
-            self.scale_mode = scale_mode
-        self.decay_rate = decay_rate
-        self.decay_step = decay_step
-        self.step(last_batch_iteration + 1)
-        self.last_batch_iteration = last_batch_iteration
-
-    def step(self, batch_iteration=None):
-        if batch_iteration is None:
-            batch_iteration = self.last_batch_iteration + 1
-        self.last_batch_iteration = batch_iteration
-        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
-            param_group['lr'] = lr
-        self.update_max_lr()
-
-    def update_max_lr(self):
-        # learning rate decay
-        self.max_lrs = [
-            lr * self.decay_rate ** (self.last_batch_iteration / self.decay_step) for lr in self.base_lrs]
-
-    def _triangular_scale_fn(self, x):
-        return 1.
-
-    def _triangular2_scale_fn(self, x):
-        return 1 / (2. ** (x - 1))
-
-    def _exp_range_scale_fn(self, x):
-        return self.gamma**(x)
-
-    def get_lr(self):
-        step_size = float(self.step_size)
-        cycle = np.floor(1 + self.last_batch_iteration / (2 * step_size))
-        x = np.abs(self.last_batch_iteration / step_size - 2 * cycle + 1)
-
-        lrs = []
-        param_lrs = zip(self.optimizer.param_groups, self.base_lrs, self.max_lrs)
-        for param_group, base_lr, max_lr in param_lrs:
-            base_height = (max_lr - base_lr) * np.maximum(0, (1 - x))
-            if self.scale_mode == 'cycle':
-                lr = base_lr + base_height * self.scale_fn(cycle)
-            else:
-                lr = base_lr + base_height * self.scale_fn(self.last_batch_iteration)
-            lrs.append(lr)
-        return lrs
-    
-    @property
-    def lr(self):
-        return self.get_lr()[-1]
diff --git a/modules/train/train.py b/modules/train/train.py
index 97f62c4..6481ebc 100644
--- a/modules/train/train.py
+++ b/modules/train/train.py
@@ -1,18 +1,13 @@
-from tqdm import tqdm
+from modules import tqdm
 from sklearn_crfsuite.metrics import flat_classification_report
 import logging
 import torch
-from modules.utils.plot_metrics import get_mean_max_metric
 from .optimization import BertAdam
-import json
-from modules.data.bert_data import BertNerData
-from modules.models.released_models import released_models
+from modules.analyze_utils.plot_metrics import get_mean_max_metric
+from modules.data.bert_data import get_data_loader_for_predict
 
 
-logging.basicConfig(level=logging.INFO)
-
-
-def train_step(dl, model, optimizer, lr_scheduler=None, clip=None, num_epoch=1):
+def train_step(dl, model, optimizer, num_epoch=1):
     model.train()
     epoch_loss = 0
     idx = 0
@@ -22,18 +17,12 @@ def train_step(dl, model, optimizer, lr_scheduler=None, clip=None, num_epoch=1):
         model.zero_grad()
         loss = model.score(batch)
         loss.backward()
-        if clip is not None:
-            _ = torch.nn.utils.clip_grad_norm(model.parameters(), clip)
         optimizer.step()
         optimizer.zero_grad()
         loss = loss.data.cpu().tolist()
         epoch_loss += loss
         pr.set_description("train loss: {}".format(epoch_loss / idx))
-        if lr_scheduler is not None:
-            lr_scheduler.step()
-        # torch.cuda.empty_cache()
-    if lr_scheduler is not None:
-        logging.info("\nlr after epoch: {}".format(lr_scheduler.lr))
+        torch.cuda.empty_cache()
     logging.info("\nepoch {}, average train epoch loss={:.5}\n".format(
         num_epoch, epoch_loss / idx))
 
@@ -89,11 +78,11 @@ def validate_step(dl, model, id2label, sup_labels, id2cls=None):
     preds_cpu_cls, targets_cpu_cls = [], []
     for batch in tqdm(dl, total=len(dl), leave=False):
         idx += 1
-        labels_mask, labels_ids = batch[-2:]
+        labels_mask, labels_ids = batch[1], batch[3]
         preds = model.forward(batch)
         if id2cls is not None:
             preds, preds_cls = preds
-            preds_cpu_, targets_cpu_ = transformed_result_cls([preds_cls], [batch[-3]], id2cls)
+            preds_cpu_, targets_cpu_ = transformed_result_cls([preds_cls], [batch[-1]], id2cls)
             preds_cpu_cls.extend(preds_cpu_)
             targets_cpu_cls.extend(targets_cpu_)
         preds_cpu_, targets_cpu_ = transformed_result([preds], [labels_mask], id2label, [labels_ids])
@@ -111,22 +100,16 @@ def predict(dl, model, id2label, id2cls=None):
     idx = 0
     preds_cpu = []
     preds_cpu_cls = []
-    for batch, sorted_idx in tqdm(dl, total=len(dl), leave=False):
+    for batch in tqdm(dl, total=len(dl), leave=False, desc="Predicting"):
         idx += 1
-        labels_mask, labels_ids = batch[-2:]
+        labels_mask, labels_ids = batch[1], batch[3]
         preds = model.forward(batch)
         if id2cls is not None:
             preds, preds_cls = preds
             preds_cpu_ = transformed_result_cls([preds_cls], [preds_cls], id2cls, False)
             preds_cpu_cls.extend(preds_cpu_)
-        bs = batch[0].shape[0]
-        unsorted_mask = [0] * bs
-        unsorted_pred = [0] * bs
-        for idx, sidx in enumerate(sorted_idx):
-            unsorted_pred[sidx] = preds[idx]
-            unsorted_mask[sidx] = labels_mask[idx]
-        
-        preds_cpu_ = transformed_result([unsorted_pred], [unsorted_mask], id2label)
+
+        preds_cpu_ = transformed_result([preds], [labels_mask], id2label)
         preds_cpu.extend(preds_cpu_)
     if id2cls is not None:
         return preds_cpu, preds_cpu_cls
@@ -135,36 +118,15 @@ def predict(dl, model, id2label, id2cls=None):
 
 class NerLearner(object):
 
-    @property
-    def config(self):
-        config = {
-            "data": self.data.config,
-            "model": self.model.config,
-            "learner": {
-                "best_model_path": self.best_model_path,
-                "lr": self.lr,
-                "betas": self.betas,
-                "clip": self.clip,
-                "verbose": self.verbose,
-                "sup_labels": self.sup_labels,
-                "t_total": self.t_total,
-                "warmup": self.warmup,
-                "weight_decay": self.weight_decay,
-                "validate_every": self.validate_every,
-                "schedule": self.schedule,
-                "e": self.e
-            }
-        }
-        return config
-
-    def __init__(self, model, data, best_model_path, lr=0.001, betas=[0.8, 0.9], clip=5,
+    def __init__(self, model, data, best_model_path, lr=0.001, betas=[0.8, 0.9], clip=1.0,
                  verbose=True, sup_labels=None, t_total=-1, warmup=0.1, weight_decay=0.01,
                  validate_every=1, schedule="warmup_linear", e=1e-6):
+        logging.basicConfig(level=logging.INFO)
         self.model = model
         self.optimizer = BertAdam(model, lr, t_total=t_total, b1=betas[0], b2=betas[1], max_grad_norm=clip)
         self.optimizer_defaults = dict(
-            model=model, lr=lr, warmup=warmup, t_total=t_total, schedule="warmup_linear",
-            b1=betas[0], b2=betas[1], e=1e-6, weight_decay=weight_decay,
+            model=model, lr=lr, warmup=warmup, t_total=t_total, schedule=schedule,
+            b1=betas[0], b2=betas[1], e=e, weight_decay=weight_decay,
             max_grad_norm=clip)
 
         self.lr = lr
@@ -179,32 +141,14 @@ def __init__(self, model, data, best_model_path, lr=0.001, betas=[0.8, 0.9], cli
         self.data = data
         self.e = e
         if sup_labels is None:
-            sup_labels = data.id2label[1:]
+            sup_labels = data.train_ds.idx2label[4:]
         self.sup_labels = sup_labels
         self.best_model_path = best_model_path
         self.verbose = verbose
         self.history = []
         self.cls_history = []
         self.epoch = 0
-        self.clip = clip
         self.best_target_metric = 0.
-        self.lr_scheduler = None
-
-    def save_config(self, path):
-        with open(path, "w") as file:
-            json.dump(self.config, file)
-
-    @classmethod
-    def from_config(cls, path, for_train=True):
-        with open(path, "r") as file:
-            config = json.load(file)
-        data = BertNerData.from_config(config["data"], for_train)
-        name = config["model"]["name"]
-        # TODO: release all models (now only for BertBiLSTMNCRF)
-        if name not in released_models:
-            raise NotImplemented("from_config is implemented only for {} model :(".format(config["name"]))
-        model = released_models[name].from_config(**config["model"]["params"])
-        return cls(data, model, **config["learner"])
 
     def fit(self, epochs=100, resume_history=True, target_metric="f1"):
         if not resume_history:
@@ -224,20 +168,22 @@ def fit(self, epochs=100, resume_history=True, target_metric="f1"):
             pass
 
     def fit_one_cycle(self, epoch, target_metric="f1"):
-        train_step(self.data.train_dl, self.model, self.optimizer, self.lr_scheduler, self.clip, epoch)
+        train_step(self.data.train_dl, self.model, self.optimizer, epoch)
         if epoch % self.validate_every == 0:
-            if self.data.is_cls:
-                rep, rep_cls = validate_step(self.data.valid_dl, self.model, self.data.id2label, self.sup_labels,
-                                             self.data.id2cls)
+            if self.data.train_ds.is_cls:
+                rep, rep_cls = validate_step(
+                    self.data.valid_dl, self.model, self.data.train_ds.idx2label, self.sup_labels,
+                    self.data.train_ds.idx2cls)
                 self.cls_history.append(rep_cls)
             else:
-                rep = validate_step(self.data.valid_dl, self.model, self.data.id2label, self.sup_labels)
+                rep = validate_step(
+                    self.data.valid_dl, self.model, self.data.train_ds.idx2label, self.sup_labels)
             self.history.append(rep)
         idx, metric = get_mean_max_metric(self.history, target_metric, True)
         if self.verbose:
             logging.info("on epoch {} by max_{}: {}".format(idx, target_metric, metric))
             print(self.history[-1])
-            if self.data.is_cls:
+            if self.data.train_ds.is_cls:
                 logging.info("on epoch {} classification report:")
                 print(self.cls_history[-1])
         # Store best model
@@ -247,10 +193,12 @@ def fit_one_cycle(self, epoch, target_metric="f1"):
                 logging.info("Saving new best model...")
             self.save_model()
 
-    def predict(self, dl):
-        if self.data.is_cls:
-            return predict(dl, self.model, self.data.id2label, self.data.id2cls)
-        return predict(dl, self.model, self.data.id2label)
+    def predict(self, dl=None, df_path=None, df=None):
+        if dl is None:
+            dl = get_data_loader_for_predict(self.data, df_path, df)
+        if self.data.train_ds.is_cls:
+            return predict(dl, self.model, self.data.train_ds.idx2label, self.data.train_ds.idx2cls)
+        return predict(dl, self.model, self.data.train_ds.idx2label)
     
     def save_model(self, path=None):
         path = path if path else self.best_model_path
diff --git a/modules/utils.py b/modules/utils.py
new file mode 100644
index 0000000..744f853
--- /dev/null
+++ b/modules/utils.py
@@ -0,0 +1,69 @@
+import os
+import json
+import numpy
+import bson
+import sys
+
+
+def ipython_info():
+    ip = False
+    if 'ipykernel' in sys.modules:
+        ip = 'notebook'
+    elif 'IPython' in sys.modules:
+        ip = 'terminal'
+    return ip
+
+
+def get_tqdm():
+    if ipython_info() == "terminal":
+        from tqdm import tqdm
+        return tqdm
+    else:
+        try:
+            from tqdm import tqdm_notebook
+            return tqdm_notebook
+        except:
+            from tqdm import tqdm
+            return tqdm
+
+
+class JsonEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, numpy.integer):
+            return int(obj)
+        elif isinstance(obj, numpy.floating):
+            return float(obj)
+        elif isinstance(obj, numpy.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, bson.ObjectId):
+            return str(obj)
+        else:
+            return super(JsonEncoder, self).default(obj)
+
+
+def jsonify(data):
+    return json.dumps(data, cls=JsonEncoder)
+
+
+def read_config(config):
+    if isinstance(config, str):
+        with open(config, "r", encoding="utf-8") as f:
+            config = json.load(f)
+    return config
+
+
+def save_config(config, path):
+    with open(path, "w") as file:
+        json.dump(config, file, cls=JsonEncoder)
+
+
+def if_none(origin, other):
+    return other if origin is None else origin
+
+
+def get_files_path_from_dir(path):
+    f = []
+    for dir_path, dir_names, filenames in os.walk(path):
+        for f_name in filenames:
+            f.append(dir_path + "/" + f_name)
+    return f
diff --git a/requirements.txt b/requirements.txt
index b7127e3..ca20303 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,10 @@
-torch
-tqdm
-tensorflow
+bson
 pandas
+scikit-learn
+sklearn-crfsuite
+tqdm
+rusenttokenize
 numpy
-six
-gensim
-elmoformanylangs
\ No newline at end of file
+nltk
+torch
+matplotlib
\ No newline at end of file