## You can also run the notebook in [COLAB](https://colab.research.google.com/github/deepmipt/DeepPavlov/blob/master/examples/gobot_tutorial.ipynb).

In [None]:
!pip install deeppavlov

## Data Preparation

In [5]:
from deeppavlov.dataset_readers.dstc2_reader import DSTC2DatasetReader

data = DSTC2DatasetReader().read('my_data')

2019-08-01 17:49:21.26 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 95: [downloading data from http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz to my_data]
2019-08-01 17:49:21.551 INFO in 'deeppavlov.core.data.utils'['utils'] at line 63: Downloading from http://files.deeppavlov.ai/datasets/dstc2_v2.tar.gz to my_data/dstc2_v2.tar.gz
100%|██████████| 506k/506k [00:00<00:00, 51.4MB/s]
2019-08-01 17:49:21.566 INFO in 'deeppavlov.core.data.utils'['utils'] at line 201: Extracting my_data/dstc2_v2.tar.gz archive into my_data
2019-08-01 17:49:21.588 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2-trn.jsonlist]
2019-08-01 17:49:21.761 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2-val.jsonlist]
2019-08-01 17:49:21.902 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 112: [loading dialogs from my_data/dstc2

In [6]:
data['train'][1]

({'text': 'cheap restaurant',
  'intents': [{'slots': [['pricerange', 'cheap']], 'act': 'inform'}]},
 {'text': 'What kind of food would you like?', 'act': 'request_food'})

In [7]:
from deeppavlov.dataset_iterators.dialog_iterator import DialogDatasetIterator

iterator = DialogDatasetIterator(data)

In [8]:
x_dialog, y_dialog = iterator.train[0]
x_dialog

[{'text': '', 'intents': [], 'prev_resp_act': None},
 {'text': 'cheap restaurant',
  'intents': [{'slots': [['pricerange', 'cheap']], 'act': 'inform'}],
  'prev_resp_act': 'welcomemsg'},
 {'text': 'any',
  'intents': [{'slots': [['this', 'dontcare']], 'act': 'inform'}],
  'prev_resp_act': 'request_food'},
 {'text': 'south',
  'intents': [{'slots': [['area', 'south']], 'act': 'inform'}],
  'prev_resp_act': 'request_area'},
 {'text': 'south',
  'intents': [{'slots': [['area', 'south']], 'act': 'inform'}],
  'db_result': {'food': 'chinese',
   'pricerange': 'cheap',
   'area': 'south',
   'addr': 'cambridge leisure park clifton way cherry hinton',
   'phone': '01223 244277',
   'postcode': 'c.b 1, 7 d.y',
   'name': 'the lucky star'},
  'prev_resp_act': 'api_call'},
 {'text': 'address',
  'intents': [{'slots': [['slot', 'addr']], 'act': 'request'}],
  'prev_resp_act': 'inform_area+inform_food+offer_name'},
 {'text': 'phone number',
  'intents': [{'slots': [['slot', 'phone']], 'act': 'requ

## Create database of items

In [27]:
from deeppavlov.core.data.sqlite_database import Sqlite3Database

database = Sqlite3Database(table_name="mytable",
                           primary_keys=["name"],
                           save_path="my_data/db.sqlite")

2019-08-01 18:03:06.262 INFO in 'deeppavlov.core.data.sqlite_database'['sqlite_database'] at line 63: Loading database from /home/vimary/code-projects/Pilot/examples/my_data/db.sqlite.


In [38]:
for dialog in iterator.gen_batches(batch_size=1, data_type='all'):
    turns_x, turns_y = dialog
    db_results = [y['db_result'] for y in turns_y if 'db_result' in y]
    if db_results:
        database.fit(db_results)

In [41]:
database([{'pricerange': 'cheap', 'area': 'south'}])

[[{'pricerange': 'cheap',
   'food': 'chinese',
   'name': 'the lucky star',
   'phone': '01223 244277',
   'addr': 'cambridge leisure park clifton way cherry hinton',
   'postcode': 'c.b 1, 7 d.y',
   'area': 'south'},
  {'pricerange': 'cheap',
   'food': 'portuguese',
   'name': 'nandos',
   'phone': '01223 327908',
   'addr': 'cambridge leisure park clifton way',
   'postcode': 'c.b 1, 7 d.y',
   'area': 'south'}]]

In [42]:
!ls my_data

db.sqlite	     dstc2-trn.jsonlist  dstc2-val.jsonlist
dstc2-templates.txt  dstc2-tst.jsonlist  resto.sqlite


## Train NER

## Train bot

In [None]:
config = {
  "dataset_reader": {
    "class_name": "dstc2_reader",
    "data_path": "{DOWNLOADS_PATH}"
  },
  "dataset_iterator": {
    "class_name": "dialog_iterator"
  },
  "chainer": {
    "in": ["x"],
    "in_y": ["y"],
    "out": ["y_predicted"],
    "pipe": [
      {
        "class_name": "deeppavlov.models.go_bot.wrapper:DialogComponentWrapper",
        "component": {
            "class_name": "split_tokenizer"
        },
        "in": ["x"],
        "out": ["x_tokens"]
      },
      {
        "id": "word_vocab",
        "class_name": "simple_vocab",
        "fit_on": ["x_tokens"],
        "save_path": "{BOT_PATH}/word.dict",
        "load_path": "{BOT_PATH}/word.dict"
      },
      {
        "id": "restaurant_database",
        "class_name": "sqlite_database",
        "table_name": "mytable",
        "primary_keys": ["name"],
        "save_path": "{DOWNLOADS_PATH}/db.sqlite"
      },
      {
        "class_name": "go_bot",
        "load_path": "{BOT_PATH}/model",
        "save_path": "{BOT_PATH}/model",
        "in": ["x"],
        "in_y": ["y"],
        "out": ["y_predicted"],
        "main": True,
        "debug": False,
        "learning_rate": 0.003,
        "learning_rate_drop_patience": 5,
        "learning_rate_drop_div": 10.0,
        "momentum": 0.95,
        "optimizer": "tensorflow.train:AdamOptimizer",
        "clip_norm": 2.0,
        "dropout_rate": 0.4,
        "l2_reg_coef": 3e-4,
        "hidden_size": 128,
        "dense_size": 160,
        "word_vocab": "#word_vocab",
        "template_path": "{DOWNLOADS_PATH}/templates.txt",
        "template_type": "DualTemplate",
        "database": "#restaurant_database",
        "api_call_action": "api_call",
        "use_action_mask": False,
        "slot_filler": {
          "config_path": "{CONFIGS_PATH}/ner/slotfill_dstc2.json"
        },
        "intent_classifier": None,
        "embedder": {
          "class_name": "glove",
          "load_path": "{DOWNLOADS_PATH}/glove.6B.100d.txt"
        },
        "bow_embedder": {
          "class_name": "bow",
          "depth": "#word_vocab.__len__()",
          "with_counts": True
        },
        "tokenizer": {
          "class_name": "stream_spacy_tokenizer",
          "lowercase": False
        },
        "tracker": {
          "class_name": "featurized_tracker",
          "slot_names": ["pricerange", "this", "area", "food", "name"]
        }
      }
    ]
  },
  "train": {
    "epochs": 200,
    "batch_size": 8,

    "metrics": ["per_item_dialog_accuracy"],
    "validation_patience": 10,
    "val_every_n_batches": 15,

    "log_every_n_batches": 15,
    "show_examples": False,
    "evaluation_targets": [
      "valid",
      "test"
    ],
    "class_name": "nn_trainer"
  },
  "metadata": {
    "variables": {
      "ROOT_PATH": ".",
      "DOWNLOADS_PATH": "{ROOT_PATH}/my_data",
      "MODEL_PATH": "{ROOT_PATH}/my_model",
      "CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
    },
    "requirements": [
      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
      "{DEEPPAVLOV_PATH}/requirements/gensim.txt",
      "{DEEPPAVLOV_PATH}/requirements/spacy.txt",
      "{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt"
    ],
    "labels": {
      "telegram_utils": "GoalOrientedBot",
      "server_utils": "GoalOrientedBot"
    },
    "download": [
      {
        "url": "http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt",
        "subdir": "{DOWNLOADS_PATH}/embeddings"
      }
    ]
  }
}