In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
from neuroner import neuromodel
from shutil import copyfile, copy
import random
import os

Using TensorFlow backend.


In [2]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

### Functions in charge of split the data into an indicate percent

In [5]:
def make_dev_split(data, devPercent):
    index = int(round(devPercent*len(data)))
    shuffled = data[:]
    random.shuffle(shuffled)
    return shuffled[index:], shuffled[:index]

In [6]:
def make_train_valid_files(data_path, valid_percent):
    train_dir = os.path.join(os.getcwd(),"data/train/")
    valid_dir = os.path.join(os.getcwd(),"data/valid/")
    file_names = list(set([os.path.splitext(os.path.join(os.path.abspath(data_path), path))[0] for path in os.listdir(data_path)]))
    train, valid = make_dev_split(file_names, valid_percent)
    for path in train:
        copyfile(path+".ann", train_dir+os.path.basename(path)+".ann")
        copyfile(path+".txt", train_dir+os.path.basename(path)+".txt")    
    for path in valid:
        copyfile(path+".ann", valid_dir+os.path.basename(path)+".ann")
        copyfile(path+".txt", valid_dir+os.path.basename(path)+".txt")

### Function in charge of postprocessing the files resulting from the prediction of the NeuroNER model.

In [None]:
def postprocess(input_file, output_file, clear_ending_points=False, clear_prefixes = False, prefixes = []):
    file = open(output_file, mode='w', encoding='utf8')
    for x in open(input_file).readlines():
        if (len(x)>1):
            token, doc, start, end, e0, e1 = x.split()
            if (clear_ending_points and len(token)>1 and token[-1]=='.'):
                token = token[:-1]
                end = str(int(end)-1)
            if (clear_prefixes and len(token)>1 and token[-1]!='.'):
                for prefix in prefixes:
                    if prefix in token:
                        token = token[len(prefix):]
                        start = str(int(start)+len(prefix))
            file.write(token+" "+doc+" "+start+" "+end+" "+e0+" "+e1+"\n")
        else:
            file.write("\n")
    file.close()

## Split de datasets

Just in case you need to split de dataset into a training set and validation set

In [6]:
# data_path = './data/'
# make_train_valid_files(data_path, 0.3)

## Instantiating and training the NeuroNER model
All parameters are taken from the file "parameters.ini". These parameters can be modified when a NeuroNER model is instanciated.

In [3]:
nn = neuromodel.NeuroNER()

{'train_model': 1, 'use_pretrained_model': 0, 'pretrained_model_folder': '', 'output_scores': 0, 'dataset_text_folder': './data/', 'main_evaluation_mode': 'token', 'output_folder': './output', 'use_character_lstm': 1, 'character_embedding_dimension': 25, 'character_lstm_hidden_state_dimension': 25, 'token_pretrained_embedding_filepath': '', 'token_embedding_dimension': 300, 'token_lstm_hidden_state_dimension': 200, 'use_crf': 1, 'patience': 100, 'maximum_number_of_epochs': 100, 'optimizer': 'adam', 'learning_rate': 0.001, 'gradient_clipping_value': 5.0, 'dropout_rate': 0.5, 'number_of_cpu_threads': 8, 'number_of_gpus': 1, 'experiment_name': 'test', 'tagging_format': 'bioes', 'tokenizer': 'spacy', 'spacylanguage': 'es', 'remap_unknown_tokens_to_unk': 1, 'load_only_pretrained_token_embeddings': 0, 'check_for_lowercase': 1, 'check_for_digits_replaced_with_zeros': 1, 'freeze_token_embeddings': 0, 'debug': 0, 'verbose': 0, 'plot_format': 'png', 'reload_character_embeddings': 1, 'reload_char

In [None]:
nn.fit()


Starting epoch 0
Training completed in 0.00 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.0000    0.0000    0.0000      5545
                    CENTRO_SALUD     0.0004    0.1379    0.0008        29
              CORREO_ELECTRONICO     0.0091    0.6414    0.0180       474
          EDAD_SUJETO_ASISTENCIA     0.0000    0.0000    0.0000      2069
    FAMILIARES_SUJETO_ASISTENCIA     0.0018    0.0931    0.0035       376
                          FECHAS     0.0007    0.0020    0.0010      1514
                        HOSPITAL     0.0045    0.0455    0.0083      1100
                ID_ASEGURAMIENTO     0.0114    0.6224    0.0224      1160
         ID_CONTACTO_ASISTENCIAL     0.0054    0.9870    0.0107        77
    ID_EMPLEO_PERSONAL_SANITARIO     0.0000    0.0000    0.0000         0
            ID_SUJETO_ASISTENCIA     0.0000    0.0000    0.0000       600
ID_TITULACION_PERSONAL_SAN

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/001_train.txt > ./output/_2019-05-15_14-01-31-113906/001_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/001_valid.txt > ./output/_2019-05-15_14-01-31-113906/001_valid.txt_conll_evaluation.txt
Formatting 001_train set from CONLL to BRAT... Done.
Formatting 001_valid set from CONLL to BRAT... Done.
The last 0 epochs have not shown improvements on the validation set.

Starting epoch 2
Training completed in 1177.56 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9896    0.9933    0.9914      5545
                    CENTRO_SALUD     1.0000    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/003_train.txt > ./output/_2019-05-15_14-01-31-113906/003_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/003_valid.txt > ./output/_2019-05-15_14-01-31-113906/003_valid.txt_conll_evaluation.txt
Formatting 003_train set from CONLL to BRAT... Done.
Formatting 003_valid set from CONLL to BRAT... Done.
The last 0 epochs have not shown improvements on the validation set.

Starting epoch 4
Training completed in 1089.71 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9975    0.9973    0.9974      5545
                    CENTRO_SALUD     1.0000    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/005_train.txt > ./output/_2019-05-15_14-01-31-113906/005_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/005_valid.txt > ./output/_2019-05-15_14-01-31-113906/005_valid.txt_conll_evaluation.txt
Formatting 005_train set from CONLL to BRAT... Done.
Formatting 005_valid set from CONLL to BRAT... Done.
The last 0 epochs have not shown improvements on the validation set.

Starting epoch 6
Training completed in 714.43 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9960    0.9984    0.9972      5545
                    CENTRO_SALUD     0.7436    1

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/007_train.txt > ./output/_2019-05-15_14-01-31-113906/007_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/007_valid.txt > ./output/_2019-05-15_14-01-31-113906/007_valid.txt_conll_evaluation.txt
The last 2 epochs have not shown improvements on the validation set.

Starting epoch 8
Training completed in 714.60 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9937    0.9982    0.9960      5545
                    CENTRO_SALUD     1.0000    0.9655    0.9825        29
              CORREO_ELECTRONICO     0.9958    0.9958    0.9958       474
      

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/009_train.txt > ./output/_2019-05-15_14-01-31-113906/009_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/009_valid.txt > ./output/_2019-05-15_14-01-31-113906/009_valid.txt_conll_evaluation.txt
The last 4 epochs have not shown improvements on the validation set.

Starting epoch 10
Training completed in 829.10 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9978    0.9973    0.9976      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9937    0.9947       474
     

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/011_train.txt > ./output/_2019-05-15_14-01-31-113906/011_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/011_valid.txt > ./output/_2019-05-15_14-01-31-113906/011_valid.txt_conll_evaluation.txt
The last 6 epochs have not shown improvements on the validation set.

Starting epoch 12
Training completed in 2936.40 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9973    0.9966    0.9969      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9979    0.9916    0.9947       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/013_train.txt > ./output/_2019-05-15_14-01-31-113906/013_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/013_valid.txt > ./output/_2019-05-15_14-01-31-113906/013_valid.txt_conll_evaluation.txt
Formatting 013_train set from CONLL to BRAT... Done.
Formatting 013_valid set from CONLL to BRAT... Done.
The last 0 epochs have not shown improvements on the validation set.

Starting epoch 14
Training completed in 2760.09 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9984    0.9980    0.9982      5545
                    CENTRO_SALUD     1.0000   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/015_train.txt > ./output/_2019-05-15_14-01-31-113906/015_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/015_valid.txt > ./output/_2019-05-15_14-01-31-113906/015_valid.txt_conll_evaluation.txt
The last 2 epochs have not shown improvements on the validation set.

Starting epoch 16
Training completed in 2520.01 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9993    0.9975    0.9984      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     1.0000    0.9958    0.9979       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/017_train.txt > ./output/_2019-05-15_14-01-31-113906/017_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/017_valid.txt > ./output/_2019-05-15_14-01-31-113906/017_valid.txt_conll_evaluation.txt
The last 4 epochs have not shown improvements on the validation set.

Starting epoch 18
Training completed in 2857.33 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9953    0.9989    0.9971      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9979    0.9916    0.9947       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/019_train.txt > ./output/_2019-05-15_14-01-31-113906/019_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/019_valid.txt > ./output/_2019-05-15_14-01-31-113906/019_valid.txt_conll_evaluation.txt
The last 6 epochs have not shown improvements on the validation set.

Starting epoch 20
Training completed in 2435.62 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9982    0.9995    0.9988      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     1.0000    0.9916    0.9958       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/021_train.txt > ./output/_2019-05-15_14-01-31-113906/021_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/021_valid.txt > ./output/_2019-05-15_14-01-31-113906/021_valid.txt_conll_evaluation.txt
The last 8 epochs have not shown improvements on the validation set.

Starting epoch 22
Training completed in 2607.56 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9980    0.9993    0.9986      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9937    0.9958    0.9947       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/023_train.txt > ./output/_2019-05-15_14-01-31-113906/023_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/023_valid.txt > ./output/_2019-05-15_14-01-31-113906/023_valid.txt_conll_evaluation.txt
The last 10 epochs have not shown improvements on the validation set.

Starting epoch 24
Training completed in 2462.39 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9987    0.9980    0.9984      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9958    0.9958       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/025_train.txt > ./output/_2019-05-15_14-01-31-113906/025_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/025_valid.txt > ./output/_2019-05-15_14-01-31-113906/025_valid.txt_conll_evaluation.txt
The last 12 epochs have not shown improvements on the validation set.

Starting epoch 26
Training completed in 2327.76 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9959    0.9993    0.9976      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    1.0000    0.9979       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/027_train.txt > ./output/_2019-05-15_14-01-31-113906/027_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/027_valid.txt > ./output/_2019-05-15_14-01-31-113906/027_valid.txt_conll_evaluation.txt
The last 14 epochs have not shown improvements on the validation set.

Starting epoch 28
Training completed in 3375.14 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9955    0.9995    0.9975      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9937    1.0000    0.9968       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/029_train.txt > ./output/_2019-05-15_14-01-31-113906/029_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/029_valid.txt > ./output/_2019-05-15_14-01-31-113906/029_valid.txt_conll_evaluation.txt
The last 16 epochs have not shown improvements on the validation set.

Starting epoch 30
Training completed in 3838.71 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9948    0.9991    0.9969      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9979    0.9937    0.9958       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/031_train.txt > ./output/_2019-05-15_14-01-31-113906/031_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/031_valid.txt > ./output/_2019-05-15_14-01-31-113906/031_valid.txt_conll_evaluation.txt
The last 18 epochs have not shown improvements on the validation set.

Starting epoch 32
Training completed in 3637.59 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9964    0.9962    0.9963      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9916    1.0000    0.9958       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/033_train.txt > ./output/_2019-05-15_14-01-31-113906/033_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/033_valid.txt > ./output/_2019-05-15_14-01-31-113906/033_valid.txt_conll_evaluation.txt
The last 20 epochs have not shown improvements on the validation set.

Starting epoch 34
Training completed in 715.00 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9993    0.9973    0.9983      5545
                    CENTRO_SALUD     0.9667    1.0000    0.9831        29
              CORREO_ELECTRONICO     0.9937    0.9958    0.9947       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/035_train.txt > ./output/_2019-05-15_14-01-31-113906/035_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/035_valid.txt > ./output/_2019-05-15_14-01-31-113906/035_valid.txt_conll_evaluation.txt
The last 22 epochs have not shown improvements on the validation set.

Starting epoch 36
Training completed in 715.26 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9998    0.9975    0.9986      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9895    0.9937    0.9916       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/037_train.txt > ./output/_2019-05-15_14-01-31-113906/037_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/037_valid.txt > ./output/_2019-05-15_14-01-31-113906/037_valid.txt_conll_evaluation.txt
The last 24 epochs have not shown improvements on the validation set.

Starting epoch 38
Training completed in 715.37 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9969    0.9978    0.9974      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9875    1.0000    0.9937       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/039_train.txt > ./output/_2019-05-15_14-01-31-113906/039_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/039_valid.txt > ./output/_2019-05-15_14-01-31-113906/039_valid.txt_conll_evaluation.txt
The last 26 epochs have not shown improvements on the validation set.

Starting epoch 40
Training completed in 715.50 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9986    0.9986    0.9986      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9958    0.9958       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/041_train.txt > ./output/_2019-05-15_14-01-31-113906/041_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/041_valid.txt > ./output/_2019-05-15_14-01-31-113906/041_valid.txt_conll_evaluation.txt
The last 28 epochs have not shown improvements on the validation set.

Starting epoch 42
Training completed in 975.75 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9984    0.9973    0.9978      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9979    0.9979    0.9979       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/043_train.txt > ./output/_2019-05-15_14-01-31-113906/043_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/043_valid.txt > ./output/_2019-05-15_14-01-31-113906/043_valid.txt_conll_evaluation.txt
The last 30 epochs have not shown improvements on the validation set.

Starting epoch 44
Training completed in 1489.95 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9978    0.9991    0.9985      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9937    0.9958    0.9947       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/045_train.txt > ./output/_2019-05-15_14-01-31-113906/045_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/045_valid.txt > ./output/_2019-05-15_14-01-31-113906/045_valid.txt_conll_evaluation.txt
The last 32 epochs have not shown improvements on the validation set.

Starting epoch 46
Training completed in 1519.08 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9984    0.9960    0.9972      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9979    0.9916    0.9947       474
   

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/047_train.txt > ./output/_2019-05-15_14-01-31-113906/047_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/047_valid.txt > ./output/_2019-05-15_14-01-31-113906/047_valid.txt_conll_evaluation.txt
The last 34 epochs have not shown improvements on the validation set.

Starting epoch 48
Training completed in 927.34 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9971    0.9980    0.9976      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9937    0.9947       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/049_train.txt > ./output/_2019-05-15_14-01-31-113906/049_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/049_valid.txt > ./output/_2019-05-15_14-01-31-113906/049_valid.txt_conll_evaluation.txt
The last 36 epochs have not shown improvements on the validation set.

Starting epoch 50
Training completed in 966.79 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9975    0.9980    0.9977      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9937    0.9979    0.9958       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/051_train.txt > ./output/_2019-05-15_14-01-31-113906/051_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/051_valid.txt > ./output/_2019-05-15_14-01-31-113906/051_valid.txt_conll_evaluation.txt
The last 38 epochs have not shown improvements on the validation set.

Starting epoch 52
Training completed in 962.60 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9977    0.9959    0.9968      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9854    1.0000    0.9927       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/053_train.txt > ./output/_2019-05-15_14-01-31-113906/053_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/053_valid.txt > ./output/_2019-05-15_14-01-31-113906/053_valid.txt_conll_evaluation.txt
The last 40 epochs have not shown improvements on the validation set.

Starting epoch 54
Training completed in 924.89 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9980    0.9978    0.9979      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9916    0.9916    0.9916       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/055_train.txt > ./output/_2019-05-15_14-01-31-113906/055_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/055_valid.txt > ./output/_2019-05-15_14-01-31-113906/055_valid.txt_conll_evaluation.txt
The last 42 epochs have not shown improvements on the validation set.

Starting epoch 56
Training completed in 963.76 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9980    0.9980    0.9980      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9957    0.9810    0.9883       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/057_train.txt > ./output/_2019-05-15_14-01-31-113906/057_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/057_valid.txt > ./output/_2019-05-15_14-01-31-113906/057_valid.txt_conll_evaluation.txt
The last 44 epochs have not shown improvements on the validation set.

Starting epoch 58
Training completed in 963.42 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9969    0.9975    0.9972      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9916    0.9937       474
    

Generating plots for the valid set
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/059_train.txt > ./output/_2019-05-15_14-01-31-113906/059_train.txt_conll_evaluation.txt
/usr/local/lib/python3.6/dist-packages/neuroner
shell_command: perl /usr/local/lib/python3.6/dist-packages/neuroner/conlleval < ./output/_2019-05-15_14-01-31-113906/059_valid.txt > ./output/_2019-05-15_14-01-31-113906/059_valid.txt_conll_evaluation.txt
The last 46 epochs have not shown improvements on the validation set.

Starting epoch 60
Training completed in 924.94 seconds
Evaluate model on the train set
                                  precision    recall  f1-score   support

                           CALLE     0.9964    0.9982    0.9973      5545
                    CENTRO_SALUD     1.0000    1.0000    1.0000        29
              CORREO_ELECTRONICO     0.9958    0.9937    0.9947       474
    

## Postprocess task

In [None]:
run = "" # the path on which the output data from NeuroNER is allocated
original_brat = "./data/deploy/"  # the path where we locate the test dataset
path_to_process = "./output/"+run+"/000_deploy.txt" # the path of the output file on which the entities are predicted by NeuroNER model
path_to_save = "./output/"+run+"/000_deploy_postprocessed.txt" # the path of the output file on which the post-processed data are saved
path_to_brat = "./data/deploy/" # the path on which the processed file is going to be saved on brat format
prefixes = ['nhc/', 'NHC/','nhc:', 'NHC:', 'nhc-', 'NHC-', 'cp:', 'CP:', 'cp-', 'CP-', 'nacimiento:'] # prefixes to be cleaned

In [None]:
postprocess(path_to_process, path_to_save, clear_ending_points=True, clear_prefixes=True, prefixes=prefixes)

In [None]:
neuromodel.conll_to_brat.conll_to_brat(path_to_save, path_to_save, original_brat, path_to_brat, overwrite=True)

Once the execution has finished, the final output should be placed in the folder indicated by "path_to_brat"