## You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_tutorial.ipynb).

In [None]:
!pip install deeppavlov

## Data Preparation

In [1]:
from deeppavlov.dataset_readers.dstc2_reader import DSTC2DatasetReader

data = DSTC2DatasetReader().read('my_data')

2019-08-06 14:38:20.324 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2-trn.jsonlist]
2019-08-06 14:38:20.480 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2-val.jsonlist]
2019-08-06 14:38:20.590 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2-tst.jsonlist]


In [2]:
data['train'][1]

({'text': 'cheap restaurant',
  'intents': [{'slots': [['pricerange', 'cheap']], 'act': 'inform'}]},
 {'text': 'What kind of food would you like?', 'act': 'request_food'})

In [3]:
from deeppavlov.dataset_iterators.dialog_iterator import DialogDatasetIterator

iterator = DialogDatasetIterator(data)

In [4]:
x_dialog, y_dialog = iterator.train[0]
x_dialog

[{'text': '', 'intents': [], 'prev_resp_act': None},
 {'text': 'cheap restaurant',
  'intents': [{'slots': [['pricerange', 'cheap']], 'act': 'inform'}],
  'prev_resp_act': 'welcomemsg'},
 {'text': 'any',
  'intents': [{'slots': [['this', 'dontcare']], 'act': 'inform'}],
  'prev_resp_act': 'request_food'},
 {'text': 'south',
  'intents': [{'slots': [['area', 'south']], 'act': 'inform'}],
  'prev_resp_act': 'request_area'},
 {'text': 'south',
  'intents': [{'slots': [['area', 'south']], 'act': 'inform'}],
  'db_result': {'food': 'chinese',
   'pricerange': 'cheap',
   'area': 'south',
   'addr': 'cambridge leisure park clifton way cherry hinton',
   'phone': '01223 244277',
   'postcode': 'c.b 1, 7 d.y',
   'name': 'the lucky star'},
  'prev_resp_act': 'api_call'},
 {'text': 'address',
  'intents': [{'slots': [['slot', 'addr']], 'act': 'request'}],
  'prev_resp_act': 'inform_area+inform_food+offer_name'},
 {'text': 'phone number',
  'intents': [{'slots': [['slot', 'phone']], 'act': 'requ

## 1. Build database of items

In [5]:
from deeppavlov.core.data.sqlite_database import Sqlite3Database

database = Sqlite3Database(table_name="mytable",
                           primary_keys=["name"],
                           keys=["name", "food", "pricerange", "area", "addr", "phone", "postcode"],
                           save_path="my_bot/db.sqlite")

2019-08-06 14:38:23.370 INFO in 'deeppavlov.core.data.sqlite_database'['sqlite_database'] at line 63: Loading database from /home/vimary/code-projects/Pilot/examples/my_bot/db.sqlite.


In [6]:
db_results = []
for dialog in iterator.gen_batches(batch_size=1, data_type='all'):
    turns_x, turns_y = dialog
    db_results.extend(x['db_result'] for x in turns_x[0] if x.get('db_result'))

print(f"Adding {len(db_results)} items.")
if db_results:
    database.fit(db_results)

Adding 3016 items.


In [7]:
database([{'pricerange': 'cheap', 'area': 'south'}])

[[{'name': 'nandos',
   'food': 'portuguese',
   'pricerange': 'cheap',
   'area': 'south',
   'addr': 'cambridge leisure park clifton way',
   'phone': '01223 327908',
   'postcode': 'c.b 1, 7 d.y'},
  {'name': 'the lucky star',
   'food': 'chinese',
   'pricerange': 'cheap',
   'area': 'south',
   'addr': 'cambridge leisure park clifton way cherry hinton',
   'phone': '01223 244277',
   'postcode': 'c.b 1, 7 d.y'}]]

In [8]:
!ls my_bot

db.sqlite


## 2. Build Slot Filler

Slot Filler is component that takes text as input and outputs dictionary of slot names and their values:

    slot_filler(['I would like some chineese food'])
    >> [{'food': 'chinese'}]

To implement a slot filler you need to provide
    
 - **slot types**
 - all possible **slot values**
 - optionally, it will be good to provide examples of mentions for every value of a slot
 
The data should be provided in `slot_vals.json` file with the following format:

    {
        'food': {
            'chinese': ['chinese', 'chineese', 'chines'],
            'french': ['french', 'freench'],
            'dontcare': ['any food', '
                

There are two possible models for a slot filler:

   1. A non-trainable 

In [9]:
from deeppavlov import configs
from deeppavlov.core.common.file import read_json

ner_config = read_json(configs.ner.ner_dstc2)

In [10]:
ner_config['dataset_iterator']['download'] = False
ner_config['metadata']['variables']['DATA_PATH'] = 'my_data'

ner_config['metadata']['variables']['MODEL_PATH'] = 'my_bot'

In [11]:
from deeppavlov import train_model

train_model(ner_config, download=False);

2019-08-06 14:38:27.897 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from /home/vimary/code-projects/Pilot/examples/my_data/dstc2-trn.jsonlist]
2019-08-06 14:38:28.82 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from /home/vimary/code-projects/Pilot/examples/my_data/dstc2-val.jsonlist]
2019-08-06 14:38:28.226 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from /home/vimary/code-projects/Pilot/examples/my_data/dstc2-tst.jsonlist]
[nltk_data] Downloading package punkt to /home/vimary/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/vimary/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to
[nltk_data]     /home/vimary/nltk_data...
[nltk_data]   Package perluniprops is already up-to-date!
[nltk_data] Downloading 

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 4.6911, "per_token_accuracy": 0.2234}, "time_spent": "0:00:01", "epochs_done": 0, "batches_seen": 0, "train_examples_seen": 0, "impatience": 0, "patience_limit": 5}}


2019-08-06 14:38:33.741 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 457 tokens with 7 phrases; found: 7 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 7


I0806 14:38:33.741576 139782432462464 fmeasure.py:390] processed 457 tokens with 7 phrases; found: 7 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 7




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:02", "epochs_done": 3, "batches_seen": 100, "train_examples_seen": 6247, "learning_rate": 0.01, "momentum": null, "loss": 0.009858857964118214}}


2019-08-06 14:38:34.44 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:34.044127 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


2019-08-06 14:38:34.45 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 164: New best ner_f1 of 99.2208
I0806 14:38:34.045793 139782432462464 nn_trainer.py:164] New best ner_f1 of 99.2208
2019-08-06 14:38:34.46 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 166: Saving model
I0806 14:38:34.046424 139782432462464 nn_trainer.py:166] Saving model
2019-08-06 14:38:34.47 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 76:

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:03", "epochs_done": 5, "batches_seen": 155, "train_examples_seen": 9665, "impatience": 0, "patience_limit": 5}}


2019-08-06 14:38:34.361 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 361 tokens with 13 phrases; found: 13 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 13


I0806 14:38:34.361689 139782432462464 fmeasure.py:390] processed 361 tokens with 13 phrases; found: 13 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 13




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:03", "epochs_done": 6, "batches_seen": 200, "train_examples_seen": 12494, "learning_rate": 0.01, "momentum": null, "loss": 0.00013358064141812066}}


2019-08-06 14:38:34.827 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 385 tokens with 6 phrases; found: 6 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 6


I0806 14:38:34.827815 139782432462464 fmeasure.py:390] processed 385 tokens with 6 phrases; found: 6 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 6


2019-08-06 14:38:34.931 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:34.931679 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precisi

{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:03", "epochs_done": 9, "batches_seen": 300, "train_examples_seen": 18741, "learning_rate": 0.01, "momentum": null, "loss": 4.4304569955784244e-05}}
{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:03", "epochs_done": 10, "batches_seen": 310, "train_examples_seen": 19330, "impatience": 1, "patience_limit": 5}}


2019-08-06 14:38:35.364 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 418 tokens with 10 phrases; found: 10 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 10


I0806 14:38:35.364089 139782432462464 fmeasure.py:390] processed 418 tokens with 10 phrases; found: 10 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 10




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:04", "epochs_done": 12, "batches_seen": 400, "train_examples_seen": 24988, "learning_rate": 0.01, "momentum": null, "loss": 2.482367179140965e-05}}


2019-08-06 14:38:35.732 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 193 phrases; correct: 0.

precision:  98.96%; recall:  100.00%; FB1:  99.48

	pricerange: precision:  98.96%; recall:  100.00%; F1:  99.48 193


I0806 14:38:35.732610 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 193 phrases; correct: 0.

precision:  98.96%; recall:  100.00%; FB1:  99.48

	pricerange: precision:  98.96%; recall:  100.00%; F1:  99.48 193


2019-08-06 14:38:35.734 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 164: New best ner_f1 of 99.4792
I0806 14:38:35.734609 139782432462464 nn_trainer.py:164] New best ner_f1 of 99.4792
2019-08-06 14:38:35.735 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 166: Saving model
I0806 14:38:35.735409 139782432462464 nn_trainer.py:166] Saving model
2019-08-06 14:38:35.736 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.4792, "per_token_accuracy": 0.9998}, "time_spent": "0:00:04", "epochs_done": 15, "batches_seen": 465, "train_examples_seen": 28995, "impatience": 0, "patience_limit": 5}}
{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:04", "epochs_done": 16, "batches_seen": 500, "train_examples_seen": 31184, "learning_rate": 0.01, "momentum": null, "loss": 1.9916872644643036e-05}}


2019-08-06 14:38:36.495 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 419 tokens with 8 phrases; found: 8 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 8


I0806 14:38:36.495473 139782432462464 fmeasure.py:390] processed 419 tokens with 8 phrases; found: 8 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 8


2019-08-06 14:38:36.640 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:36.640517 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precisi

{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:05", "epochs_done": 19, "batches_seen": 600, "train_examples_seen": 37431, "learning_rate": 0.01, "momentum": null, "loss": 1.20993732841157e-05}}
{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:05", "epochs_done": 20, "batches_seen": 620, "train_examples_seen": 38660, "impatience": 1, "patience_limit": 5}}


2019-08-06 14:38:37.40 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 350 tokens with 6 phrases; found: 6 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 6


I0806 14:38:37.040794 139782432462464 fmeasure.py:390] processed 350 tokens with 6 phrases; found: 6 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 6




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:06", "epochs_done": 22, "batches_seen": 700, "train_examples_seen": 43678, "learning_rate": 0.01, "momentum": null, "loss": 9.790402919236384e-06}}


2019-08-06 14:38:37.453 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:37.453161 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


2019-08-06 14:38:37.454 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 170: Did not improve on the ner_f1 of 99.4792
I0806 14:38:37.454668 139782432462464 nn_trainer.py:170] Did not improve on the ner_f1 of 99.4792
2019-08-06 14:38:37.589 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 375 tokens with 7 phrases; found: 7 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:06", "epochs_done": 25, "batches_seen": 775, "train_examples_seen": 48325, "impatience": 2, "patience_limit": 5}}
{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:06", "epochs_done": 25, "batches_seen": 800, "train_examples_seen": 49925, "learning_rate": 0.01, "momentum": null, "loss": 1.0596876233961438e-05}}


2019-08-06 14:38:38.107 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 407 tokens with 10 phrases; found: 10 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 10


I0806 14:38:38.107974 139782432462464 fmeasure.py:390] processed 407 tokens with 10 phrases; found: 10 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 10


2019-08-06 14:38:38.294 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:38.294604 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: p

{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:07", "epochs_done": 29, "batches_seen": 900, "train_examples_seen": 56121, "learning_rate": 0.01, "momentum": null, "loss": 6.457263488357512e-06}}
{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:07", "epochs_done": 30, "batches_seen": 930, "train_examples_seen": 57990, "impatience": 3, "patience_limit": 5}}


2019-08-06 14:38:38.665 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 350 tokens with 4 phrases; found: 4 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 4


I0806 14:38:38.665331 139782432462464 fmeasure.py:390] processed 350 tokens with 4 phrases; found: 4 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 4




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:07", "epochs_done": 32, "batches_seen": 1000, "train_examples_seen": 62368, "learning_rate": 0.01, "momentum": null, "loss": 5.799743025960425e-06}}


2019-08-06 14:38:39.126 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:39.126231 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


2019-08-06 14:38:39.127 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 170: Did not improve on the ner_f1 of 99.4792
I0806 14:38:39.127966 139782432462464 nn_trainer.py:170] Did not improve on the ner_f1 of 99.4792
2019-08-06 14:38:39.220 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 409 tokens with 9 phrases; found: 9 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:08", "epochs_done": 35, "batches_seen": 1085, "train_examples_seen": 67655, "impatience": 4, "patience_limit": 5}}
{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:08", "epochs_done": 35, "batches_seen": 1100, "train_examples_seen": 68615, "learning_rate": 0.01, "momentum": null, "loss": 5.171824187542029e-06}}


2019-08-06 14:38:39.697 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 405 tokens with 14 phrases; found: 14 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 14


I0806 14:38:39.697935 139782432462464 fmeasure.py:390] processed 405 tokens with 14 phrases; found: 14 phrases; correct: 0.

precision:  100.00%; recall:  100.00%; FB1:  100.00

	pricerange: precision:  100.00%; recall:  100.00%; F1:  100.00 14




{"train": {"eval_examples_count": 64, "metrics": {"ner_f1": 100.0, "per_token_accuracy": 1.0}, "time_spent": "0:00:08", "epochs_done": 38, "batches_seen": 1200, "train_examples_seen": 74862, "learning_rate": 0.01, "momentum": null, "loss": 3.835173593529362e-06}}


2019-08-06 14:38:39.948 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


I0806 14:38:39.948005 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 194 phrases; correct: 0.

precision:  98.45%; recall:  100.00%; FB1:  99.22

	pricerange: precision:  98.45%; recall:  100.00%; F1:  99.22 194


2019-08-06 14:38:39.949 INFO in 'deeppavlov.core.trainers.nn_trainer'['nn_trainer'] at line 170: Did not improve on the ner_f1 of 99.4792
I0806 14:38:39.949785 139782432462464 nn_trainer.py:170] Did not improve on the ner_f1 of 99.4792
2019-08-06 14:38:39.980 INFO in 'deeppavlov.core.models.lr_scheduled_model'['lr_scheduled_model'] at line 430: New learning rate dividor = 10.0
I0806 14:38:39.980058 139782432462464 lr_scheduled_model.py:430] New learning rate dividor = 

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.2208, "per_token_accuracy": 0.9996}, "time_spent": "0:00:08", "epochs_done": 40, "batches_seen": 1240, "train_examples_seen": 77320, "impatience": 5, "patience_limit": 5}}


2019-08-06 14:38:40.695 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 52: [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]
I0806 14:38:40.695365 139782432462464 tf_model.py:52] [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]
2019-08-06 14:38:40.841 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 8424 tokens with 191 phrases; found: 193 phrases; correct: 0.

precision:  98.96%; recall:  100.00%; FB1:  99.48

	pricerange: precision:  98.96%; recall:  100.00%; F1:  99.48 193


I0806 14:38:40.841041 139782432462464 fmeasure.py:390] processed 8424 tokens with 191 phrases; found: 193 phrases; correct: 0.

precision:  98.96%; recall:  100.00%; FB1:  99.48

	pricerange: precision:  98.96%; recall:  100.00%; F1:  99.48 193


2019-08-06 14:38:40.911 DEBUG in 'deeppavlov.metrics.fmeasure'['fmeasure'] at line 390: processed 7797 tokens with 177 phrases; found: 178 phrases; correct: 0.

precision:  99.4

{"valid": {"eval_examples_count": 1444, "metrics": {"ner_f1": 99.4792, "per_token_accuracy": 0.9998}, "time_spent": "0:00:01"}}
{"test": {"eval_examples_count": 1386, "metrics": {"ner_f1": 99.7183, "per_token_accuracy": 0.9999}, "time_spent": "0:00:01"}}


2019-08-06 14:38:41.451 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 52: [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]
I0806 14:38:41.451829 139782432462464 tf_model.py:52] [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]


Tags (& counts) found in the data are:

In [12]:
!cat my_bot/tag.dict

O	11573
B-pricerange	268


In [13]:
from deeppavlov import build_model

ner = build_model(ner_config, download=False)

2019-08-06 14:39:27.415 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 112: [loading vocabulary from /home/vimary/code-projects/Pilot/examples/my_bot/word.dict]
I0806 14:39:27.415698 139782432462464 simple_vocab.py:112] [loading vocabulary from /home/vimary/code-projects/Pilot/examples/my_bot/word.dict]
2019-08-06 14:39:27.419 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 112: [loading vocabulary from /home/vimary/code-projects/Pilot/examples/my_bot/tag.dict]
I0806 14:39:27.419156 139782432462464 simple_vocab.py:112] [loading vocabulary from /home/vimary/code-projects/Pilot/examples/my_bot/tag.dict]
2019-08-06 14:39:28.147 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 52: [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]
I0806 14:39:28.147900 139782432462464 tf_model.py:52] [loading model from /home/vimary/code-projects/Pilot/examples/my_bot/model]


In [14]:
ner(['hi i want some cheap food'])

[[['hi', 'i', 'want', 'some', 'cheap', 'food']],
 [['O', 'O', 'O', 'O', 'B-pricerange', 'O']]]

## Train bot

In [None]:
config = {
  "dataset_reader": {
    "class_name": "dstc2_reader",
    "data_path": "{DOWNLOADS_PATH}"
  },
  "dataset_iterator": {
    "class_name": "dialog_iterator"
  },
  "chainer": {
    "in": ["x"],
    "in_y": ["y"],
    "out": ["y_predicted"],
    "pipe": [
      {
        "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper",
        "component": {
            "class_name": "split_tokenizer"
        },
        "in": ["x"],
        "out": ["x_tokens"]
      },
      {
        "id": "word_vocab",
        "class_name": "simple_vocab",
        "fit_on": ["x_tokens"],
        "save_path": "{BOT_PATH}/word.dict",
        "load_path": "{BOT_PATH}/word.dict"
      },
      {
        "id": "restaurant_database",
        "class_name": "sqlite_database",
        "table_name": "mytable",
        "primary_keys": ["name"],
        "save_path": "{DOWNLOADS_PATH}/db.sqlite"
      },
      {
        "class_name": "go_bot",
        "load_path": "{BOT_PATH}/model",
        "save_path": "{BOT_PATH}/model",
        "in": ["x"],
        "in_y": ["y"],
        "out": ["y_predicted"],
        "main": True,
        "debug": False,
        "learning_rate": 0.003,
        "learning_rate_drop_patience": 5,
        "learning_rate_drop_div": 10.0,
        "momentum": 0.95,
        "optimizer": "tensorflow.train:AdamOptimizer",
        "clip_norm": 2.0,
        "dropout_rate": 0.4,
        "l2_reg_coef": 3e-4,
        "hidden_size": 128,
        "dense_size": 160,
        "word_vocab": "#word_vocab",
        "template_path": "{DOWNLOADS_PATH}/templates.txt",
        "template_type": "DualTemplate",
        "database": "#restaurant_database",
        "api_call_action": "api_call",
        "use_action_mask": False,
        "slot_filler": {
          "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json"
        },
        "intent_classifier": None,
        "embedder": {
          "class_name": "glove",
          "load_path": "{DOWNLOADS_PATH}/glove.6B.100d.txt"
        },
        "bow_embedder": {
          "class_name": "bow",
          "depth": "#word_vocab.__len__()",
          "with_counts": True
        },
        "tokenizer": {
          "class_name": "stream_spacy_tokenizer",
          "lowercase": False
        },
        "tracker": {
          "class_name": "featurized_tracker",
          "slot_names": ["pricerange", "this", "area", "food", "name"]
        }
      }
    ]
  },
  "train": {
    "epochs": 200,
    "batch_size": 8,

    "metrics": ["per_item_dialog_accuracy"],
    "validation_patience": 10,
    "val_every_n_batches": 15,

    "log_every_n_batches": 15,
    "show_examples": False,
    "evaluation_targets": [
      "valid",
      "test"
    ],
    "class_name": "nn_trainer"
  },
  "metadata": {
    "variables": {
      "ROOT_PATH": ".",
      "DOWNLOADS_PATH": "{ROOT_PATH}/my_data",
      "MODEL_PATH": "{ROOT_PATH}/my_model",
      "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
    },
    "requirements": [
      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
      "{DEEPPAVLOV_PATH}/requirements/gensim.txt",
      "{DEEPPAVLOV_PATH}/requirements/spacy.txt",
      "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt"
    ],
    "labels": {
      "telegram_utils": "GoalOrientedBot",
      "server_utils": "GoalOrientedBot"
    },
    "download": [
      {
        "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt",
        "subdir": "{DOWNLOADS_PATH}/embeddings"
      }
    ]
  }
}

In [None]:
from deeppavlov import configs, parse_json

