In [1]:
import json
from pathlib import Path
import numpy as np
from copy import deepcopy
import pandas as pd

from deeppavlov.core.commands.train import read_data_by_config, train_evaluate_model_from_config
from deeppavlov.core.commands.infer import interact_model, build_model_from_config
from deeppavlov.core.commands.utils import expand_path
from deeppavlov.core.common.params import from_params
from deeppavlov.core.common.errors import ConfigError

In [2]:
# read unlabelled data for label propagation
def read_unlabelled_data(UNLABELLED_DATA_PATH):
    with open(UNLABELLED_DATA_PATH, "r") as f:
        unlabelled_data = f.read().splitlines()
    unlabelled_data = [x for x in unlabelled_data if x != '']
    return unlabelled_data

In [3]:
def make_pl_config(CONFIG_PATH):
    config_path_pl = Path(CONFIG_PATH).parent / Path(Path(CONFIG_PATH).stem + "_pl.json")

    with open(CONFIG_PATH, "r") as f:
        config = json.load(f)
    
    config_pl = deepcopy(config)
    config_pl["dataset_reader"]["train"] = Path(config_pl["dataset_reader"].get("train", "train.csv")).stem + "_pl.csv"
    
    with open(config_path_pl, "w") as f:
        json.dump(config_pl, f, indent=2)
    
    return config, config_pl

In [4]:
def save_extended_data(config, samples, labels, new_config = None):
    train_data = read_data_by_config(deepcopy(config))
    
    for i in range(len(samples)):
        train_data["train"].append((samples[i], labels[i]))
    df = pd.DataFrame(train_data["train"], 
                      columns=[config["dataset_reader"]["x"], 
                               config["dataset_reader"]["y"]])
    df[config["dataset_reader"]["y"]] = df[config["dataset_reader"]["y"]].apply(
        lambda x: config["dataset_reader"].get("class_sep", ",").join(x))
    
    if new_config is not None:
        config = new_config
    file = expand_path(Path(config["dataset_reader"]["data_path"]) / 
                       Path(config["dataset_reader"]["train"]))

    if config["dataset_reader"].get("format", "csv") == "csv":
        keys = ('sep', 'header', 'names')
        df.to_csv(file, 
                  index=False,
                  sep=config["dataset_reader"].get("sep", ",")
                 )
    elif config["dataset_reader"].get("format", "csv") == "json":
        keys = ('orient', 'lines')
        df.to_json(file, 
                  index=False,
                  orient=config["dataset_reader"].get("orient", None),
                  lines=config["dataset_reader"].get("lines", False)
                  )
    else:
        raise ConfigError("Can not work with current data format")

In [5]:
# manually given parameters for pseudo-labeling

# path to config file
CONFIG_PATH = "../deeppavlov/configs/classifiers/yahoo_answers_L31_fulltext.json"
# path to file with unlabelled data
UNLABELLED_DATA_PATH = "../download/YahooAnswers/yahoo_answers_data/question_L6.txt"
# number of samples that are going to be labelled during one iteration of label propagation
ONE_ITERATION_PORTION = 2000
# number of iterations
N_ITERATIONS = 10
CLASSES_VOCAB_ID_IN_PIPE = 0
CONFIDENT_PROBA = 0.9

In [6]:
# read unlabelled dataset
unlabelled_data = read_unlabelled_data(UNLABELLED_DATA_PATH)
# read config, compose new one, save it
config, config_pl = make_pl_config(CONFIG_PATH)
# save initial dataset as extended
save_extended_data(config, [], [], new_config=config_pl)

In [None]:
available_unlabelled_ids = np.arange(len(unlabelled_data))

np.random.seed(42)

for i in range(N_ITERATIONS):
    samples = []
    labels = []
    
    ids_to_label = available_unlabelled_ids[
        np.random.randint(low=0, 
                          high=len(available_unlabelled_ids), 
                          size=ONE_ITERATION_PORTION)]
    available_unlabelled_ids = np.delete(available_unlabelled_ids, ids_to_label)
    train_evaluate_model_from_config(deepcopy(config_pl))
    model = build_model_from_config(deepcopy(config_pl))
    classes = np.array(list(from_params(
        deepcopy(config_pl["chainer"]["pipe"][CLASSES_VOCAB_ID_IN_PIPE])).keys()))

    for j, sample_id in enumerate(ids_to_label):
        prediction = model([unlabelled_data[sample_id]])[0]
        if len(np.where(np.array(prediction) > CONFIDENT_PROBA)[0]):
            samples.append(unlabelled_data[sample_id])
            labels.append(classes[np.where(np.array(prediction) > CONFIDENT_PROBA)])
    
    print("Iteration {}: add {} samples to train dataset".format(i, len(samples)))
    save_extended_data(config_pl, samples, labels)

2018-11-09 11:09:18.792 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/yahoo_answers_classes.dict]
[nltk_data] Downloading package punkt to /home/dilyara/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/dilyara/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to
[nltk_data]     /home/dilyara/nltk_data...
[nltk_data]   Package perluniprops is already up-to-date!
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data]     /home/dilyara/nltk_data...
[nltk_data]   Package nonbreaking_prefixes is already up-to-date!
Using TensorFlow backend.
2018-11-09 11:09:20.16 INFO in 'tensorflow'['tf_logging'] at line 159: Using /tmp/tfhub_modules to cache modules.
2018-11-09 11:09:20.476 DEBUG in 'tensorflow'['tf_logging'

2018-11-09 11:09:20.578 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/bias:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/bias
2018-11-09 11:09:20.581 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/kernel
2018-11-09 11:09:20.585 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel
2018-11-09 11:09:20.588 DEBUG in '

{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.61, "sets_accuracy": 0.5012, "f1_macro": 0.4685}, "time_spent": "0:00:20", "epochs_done": 0, "batches_seen": 0, "train_examples_seen": 0, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 1, "batches_seen": 4, "train_examples_seen": 3613, "metrics": {"roc_auc": 0.5523, "sets_accuracy": 0.5408, "f1_macro": 0.5266}, "time_spent": "0:06:41", "loss": 2.6326469630002975}}


  'precision', 'predicted', average, warn_for)
2018-11-09 11:16:19.815 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.5954, "sets_accuracy": 0.4442, "f1_macro": 0.3076}, "time_spent": "0:06:58", "epochs_done": 1, "batches_seen": 4, "train_examples_seen": 3613, "impatience": 1, "patience_limit": 5}}
{"train": {"epochs_done": 2, "batches_seen": 8, "train_examples_seen": 7226, "metrics": {"roc_auc": 0.478, "sets_accuracy": 0.4791, "f1_macro": 0.4776}, "time_spent": "0:13:08", "loss": 0.7315232902765274}}


2018-11-09 11:22:46.678 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.5555, "sets_accuracy": 0.603, "f1_macro": 0.4783}, "time_spent": "0:13:25", "epochs_done": 2, "batches_seen": 8, "train_examples_seen": 7226, "impatience": 2, "patience_limit": 5}}
{"train": {"epochs_done": 3, "batches_seen": 12, "train_examples_seen": 10839, "metrics": {"roc_auc": 0.6096, "sets_accuracy": 0.6308, "f1_macro": 0.5706}, "time_spent": "0:19:45", "loss": 0.7229214012622833}}


2018-11-09 11:29:23.750 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.5433, "sets_accuracy": 0.5732, "f1_macro": 0.4921}, "time_spent": "0:20:02", "epochs_done": 3, "batches_seen": 12, "train_examples_seen": 10839, "impatience": 3, "patience_limit": 5}}
{"train": {"epochs_done": 4, "batches_seen": 16, "train_examples_seen": 14452, "metrics": {"roc_auc": 0.605, "sets_accuracy": 0.6371, "f1_macro": 0.5758}, "time_spent": "0:26:09", "loss": 0.7257255017757416}}


2018-11-09 11:35:46.633 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.5526, "sets_accuracy": 0.5881, "f1_macro": 0.4869}, "time_spent": "0:26:25", "epochs_done": 4, "batches_seen": 16, "train_examples_seen": 14452, "impatience": 4, "patience_limit": 5}}
{"train": {"epochs_done": 5, "batches_seen": 20, "train_examples_seen": 18065, "metrics": {"roc_auc": 0.6174, "sets_accuracy": 0.6402, "f1_macro": 0.5877}, "time_spent": "0:32:31", "loss": 0.7147586047649384}}


2018-11-09 11:42:08.549 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61
2018-11-09 11:42:08.550 INFO in 'deeppavlov.core.commands.train'['train'] at line 536: Ran out of patience
2018-11-09 11:42:08.552 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 100: [loading vocabulary from /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/yahoo_answers_classes.dict]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.5578, "sets_accuracy": 0.5931, "f1_macro": 0.5037}, "time_spent": "0:32:47", "epochs_done": 5, "batches_seen": 20, "train_examples_seen": 18065, "impatience": 5, "patience_limit": 5}}


2018-11-09 11:42:08.792 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/scaling:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/scaling
2018-11-09 11:42:08.795 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/weights:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/weights
2018-11-09 11:42:08.798 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/CNN/W_cnn_0:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/CNN/W_cnn_0
2018-11-09 11:42:08.801 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/CNN/W_cnn_1:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/CNN/W_cnn_1
2018-11-09 11:42:08.803 DEBU

2018-11-09 11:42:09.27 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias
2018-11-09 11:42:09.29 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel
2018-11-09 11:42:09.32 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel
2018-11-09 11:42:09.34 DEBUG in 'tens

{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.61, "sets_accuracy": 0.5012, "f1_macro": 0.4685}, "time_spent": "0:00:17"}}


2018-11-09 11:42:28.352 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 100: [loading vocabulary from /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/yahoo_answers_classes.dict]


{"test": {"eval_examples_count": 100, "metrics": {"roc_auc": 0.6341, "sets_accuracy": 0.53, "f1_macro": 0.5296}, "time_spent": "0:00:02"}}


2018-11-09 11:42:28.597 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/scaling:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/scaling
2018-11-09 11:42:28.599 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/weights:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/weights
2018-11-09 11:42:28.602 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/CNN/W_cnn_0:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/CNN/W_cnn_0
2018-11-09 11:42:28.605 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/CNN/W_cnn_1:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/CNN/W_cnn_1
2018-11-09 11:42:28.608 DEBU

2018-11-09 11:42:28.678 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias
2018-11-09 11:42:28.681 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel
2018-11-09 11:42:28.684 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel
2018-11-09 11:42:28.687 DEBUG in '

Iteration 0: add 0 samples to train dataset


2018-11-09 11:47:05.647 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 100: [loading vocabulary from /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/yahoo_answers_classes.dict]
2018-11-09 11:47:05.652 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 89: [saving vocabulary to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/yahoo_answers_classes.dict]
2018-11-09 11:47:05.910 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/scaling:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/scaling
2018-11-09 11:47:05.913 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/aggregation/weights:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with aggregation/weights
2018-11-09 11:47:05.915 DEBUG in 'tensorflow'['tf_lo

2018-11-09 11:47:06.4 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel
2018-11-09 11:47:06.7 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias
2018-11-09 11:47:06.9 DEBUG in 'tensorflow'['tf_logging'] at line 100: Initialize variable module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0 from checkpoint b'/tmp/tfhub_modules/9bb74bc86f9caffc8c47dd7b33ec4bb354d9602d/variables/variables' with bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel
2018-11-09 11:47:06.14 DEBUG in 'tensorf

{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.61, "sets_accuracy": 0.5012, "f1_macro": 0.4685}, "time_spent": "0:00:18", "epochs_done": 0, "batches_seen": 0, "train_examples_seen": 0, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 1, "batches_seen": 4, "train_examples_seen": 3613, "metrics": {"roc_auc": 0.5414, "sets_accuracy": 0.5267, "f1_macro": 0.5082}, "time_spent": "0:06:37", "loss": 3.4515210390090942}}


2018-11-09 11:54:00.268 INFO in 'deeppavlov.core.commands.train'['train'] at line 525: Did not improve on the roc_auc of 0.61


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.3837, "sets_accuracy": 0.469, "f1_macro": 0.3651}, "time_spent": "0:06:53", "epochs_done": 1, "batches_seen": 4, "train_examples_seen": 3613, "impatience": 1, "patience_limit": 5}}
{"train": {"epochs_done": 2, "batches_seen": 8, "train_examples_seen": 7226, "metrics": {"roc_auc": 0.558, "sets_accuracy": 0.571, "f1_macro": 0.5622}, "time_spent": "0:12:53", "loss": 0.7258788794279099}}


2018-11-09 12:00:16.56 INFO in 'deeppavlov.core.commands.train'['train'] at line 518: New best roc_auc of 0.6118
2018-11-09 12:00:16.56 INFO in 'deeppavlov.core.commands.train'['train'] at line 520: Saving model
2018-11-09 12:00:16.57 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 375: [saving model to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/model_opt.json]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.6118, "sets_accuracy": 0.5633, "f1_macro": 0.3803}, "time_spent": "0:13:09", "epochs_done": 2, "batches_seen": 8, "train_examples_seen": 7226, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 3, "batches_seen": 12, "train_examples_seen": 10839, "metrics": {"roc_auc": 0.6298, "sets_accuracy": 0.605, "f1_macro": 0.5519}, "time_spent": "0:19:19", "loss": 0.730575680732727}}


2018-11-09 12:06:42.35 INFO in 'deeppavlov.core.commands.train'['train'] at line 518: New best roc_auc of 0.7126
2018-11-09 12:06:42.35 INFO in 'deeppavlov.core.commands.train'['train'] at line 520: Saving model
2018-11-09 12:06:42.36 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 375: [saving model to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/model_opt.json]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.7126, "sets_accuracy": 0.603, "f1_macro": 0.488}, "time_spent": "0:19:35", "epochs_done": 3, "batches_seen": 12, "train_examples_seen": 10839, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 4, "batches_seen": 16, "train_examples_seen": 14452, "metrics": {"roc_auc": 0.7447, "sets_accuracy": 0.6579, "f1_macro": 0.5972}, "time_spent": "0:25:44", "loss": 0.6782405525445938}}


2018-11-09 12:13:06.277 INFO in 'deeppavlov.core.commands.train'['train'] at line 518: New best roc_auc of 0.764
2018-11-09 12:13:06.278 INFO in 'deeppavlov.core.commands.train'['train'] at line 520: Saving model
2018-11-09 12:13:06.278 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 375: [saving model to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/model_opt.json]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.764, "sets_accuracy": 0.5806, "f1_macro": 0.4998}, "time_spent": "0:25:59", "epochs_done": 4, "batches_seen": 16, "train_examples_seen": 14452, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 5, "batches_seen": 20, "train_examples_seen": 18065, "metrics": {"roc_auc": 0.7854, "sets_accuracy": 0.6565, "f1_macro": 0.5992}, "time_spent": "0:31:59", "loss": 0.670505702495575}}


2018-11-09 12:19:21.762 INFO in 'deeppavlov.core.commands.train'['train'] at line 518: New best roc_auc of 0.8102
2018-11-09 12:19:21.763 INFO in 'deeppavlov.core.commands.train'['train'] at line 520: Saving model
2018-11-09 12:19:21.763 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 375: [saving model to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/model_opt.json]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.8102, "sets_accuracy": 0.598, "f1_macro": 0.4992}, "time_spent": "0:32:15", "epochs_done": 5, "batches_seen": 20, "train_examples_seen": 18065, "impatience": 0, "patience_limit": 5}}
{"train": {"epochs_done": 6, "batches_seen": 24, "train_examples_seen": 21678, "metrics": {"roc_auc": 0.8337, "sets_accuracy": 0.6789, "f1_macro": 0.6303}, "time_spent": "0:38:13", "loss": 0.6476360112428665}}


2018-11-09 12:25:35.581 INFO in 'deeppavlov.core.commands.train'['train'] at line 518: New best roc_auc of 0.8341
2018-11-09 12:25:35.581 INFO in 'deeppavlov.core.commands.train'['train'] at line 520: Saving model
2018-11-09 12:25:35.582 INFO in 'deeppavlov.models.classifiers.keras_classification_model'['keras_classification_model'] at line 375: [saving model to /home/dilyara/Documents/GitHub/DeepPavlov/download/YahooAnswers/models/model_v8/model_opt.json]


{"valid": {"eval_examples_count": 403, "metrics": {"roc_auc": 0.8341, "sets_accuracy": 0.7221, "f1_macro": 0.6935}, "time_spent": "0:38:29", "epochs_done": 6, "batches_seen": 24, "train_examples_seen": 21678, "impatience": 0, "patience_limit": 5}}
