<a href="https://colab.research.google.com/github/peterbussch/russian-nlp/blob/main/deeppavlov_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install deeppavlov
!python -m deeppavlov install gobot_dstc2_minimal

In [3]:
from deeppavlov.dataset_readers.dstc2_reader import SimpleDSTC2DatasetReader


class AssistantDatasetReader(SimpleDSTC2DatasetReader):
    
    url = "http://files.deeppavlov.ai/datasets/tutor_assistant_data.tar.gz"
    
    @staticmethod
    def _data_fname(datatype):
        assert datatype in ('val', 'trn', 'tst'), "wrong datatype name"
        return f"assistant-{datatype}.json"

In [4]:
data = AssistantDatasetReader().read('assistant_data')

2022-04-11 17:23:26.212 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 269: [PosixPath('assistant_data/assistant-val.json'), PosixPath('assistant_data/assistant-tst.json')]]
2022-04-11 17:23:26.214 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 270: [downloading data from http://files.deeppavlov.ai/datasets/tutor_assistant_data.tar.gz to assistant_data]
2022-04-11 17:23:26.216 INFO in 'deeppavlov.core.data.utils'['utils'] at line 95: Downloading from http://files.deeppavlov.ai/datasets/tutor_assistant_data.tar.gz to assistant_data/tutor_assistant_data.tar.gz
  utils.DeprecatedIn35,
100%|██████████| 838/838 [00:00<00:00, 295kB/s]
2022-04-11 17:23:26.895 INFO in 'deeppavlov.core.data.utils'['utils'] at line 272: Extracting assistant_data/tutor_assistant_data.tar.gz archive into assistant_data
2022-04-11 17:23:26.904 INFO in 'deeppavlov.dataset_readers.dstc2_reader'['dstc2_reader'] at line 290: [loading dialogs from assistant_data/assi

In [5]:
!ls assistant_data

assistant-templates.txt  assistant-tst.json
assistant-trn.json	 assistant-val.json


In [6]:
!head -n 310 assistant_data/assistant-trn.json

[
  [
    {
      "speaker": 1,
      "text": "hi"
    },
    {
      "speaker": 2,
      "text": "Hello, what is the weather today?",
      "act": "welcome_msg"
    },
    {
      "speaker": 1,
      "text": "Quite sunny outside"
    },
    {
      "speaker": 2,
      "text": "Then you should cycle!",
      "act": "suggest_cycling"
    },
    {
      "speaker": 1,
      "text": "Thanks! Great idea"
    },
    {
      "speaker": 2,
      "text": "You are welcome! Bye!",
      "act": "good_bye"
    }
  ],
  [
    {
      "speaker": 1,
      "text": "hey, bot"
    },
    {
      "speaker": 2,
      "text": "Hello, what is the weather today?",
      "act": "welcome_msg"
    },
    {
      "speaker": 1,
      "text": "raining a lot"
    },
    {
      "speaker": 2,
      "text": "Then you should try hot chinese tea!",
      "act": "suggest_tea"
    },
    {
      "speaker": 1,
      "text": "nice. thank you"
    },
    {
      "speaker": 2,
      "text": "You are welcome! Bye!",
      "act

In [7]:
from deeppavlov.dataset_iterators.dialog_iterator import DialogDatasetIterator

iterator = DialogDatasetIterator(data)

In [8]:
from pprint import pprint

for dialog in iterator.gen_batches(batch_size=1, data_type='train'):
    turns_x, turns_y = dialog
    
    print("User utterances:\n----------------\n")
    pprint(turns_x[0], indent=4)
    print("\nSystem responses:\n-----------------\n")
    pprint(turns_y[0], indent=4)
    
    break

print("\n-----------------")    
print(f"{len(iterator.get_instances('train')[0])} dialog(s) in train.")
print(f"{len(iterator.get_instances('valid')[0])} dialog(s) in valid.")
print(f"{len(iterator.get_instances('test')[0])} dialog(s) in test.")

User utterances:
----------------

[   {'prev_resp_act': None, 'text': 'good evening'},
    {'prev_resp_act': 'welcome_msg', 'text': 'mainly cloudy and gray'},
    {'prev_resp_act': 'suggest_tea', 'text': 'that sounds good'}]

System responses:
-----------------

[   {'act': 'welcome_msg', 'text': 'Hello, what is the weather today?'},
    {'act': 'suggest_tea', 'text': 'Then you should try hot chinese tea!'},
    {'act': 'good_bye', 'text': 'You are welcome! Bye!'}]

-----------------
8 dialog(s) in train.
1 dialog(s) in valid.
1 dialog(s) in test.


In [9]:
!head -n 31 assistant_data/assistant-trn.json

[
  [
    {
      "speaker": 1,
      "text": "hi"
    },
    {
      "speaker": 2,
      "text": "Hello, what is the weather today?",
      "act": "welcome_msg"
    },
    {
      "speaker": 1,
      "text": "Quite sunny outside"
    },
    {
      "speaker": 2,
      "text": "Then you should cycle!",
      "act": "suggest_cycling"
    },
    {
      "speaker": 1,
      "text": "Thanks! Great idea"
    },
    {
      "speaker": 2,
      "text": "You are welcome! Bye!",
      "act": "good_bye"
    }
  ],
  [


In [10]:
from deeppavlov import configs
from deeppavlov.core.common.file import read_json

gobot_config = read_json(configs.go_bot.gobot_dstc2_minimal)

In [11]:
from deeppavlov.download import download_resource

download_resource(url="http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt",
                  dest_paths=['assistant_bot/'])

  utils.DeprecatedIn35,
2022-04-11 17:26:39.218 INFO in 'deeppavlov.core.data.utils'['utils'] at line 95: Downloading from http://files.deeppavlov.ai/embeddings/glove.6B.100d.txt to assistant_bot/glove.6B.100d.txt
347MB [00:06, 55.9MB/s]


In [12]:
gobot_config['chainer']['pipe'][-1]['embedder'] = {
    "class_name": "glove",
    "load_path": "assistant_bot/glove.6B.100d.txt"
}

In [13]:
gobot_config['chainer']['pipe'][-1]['nlg_manager']['template_path'] = 'assistant_data/assistant-templates.txt'
gobot_config['chainer']['pipe'][-1]['nlg_manager']['api_call_action'] = None

In [14]:
gobot_config['dataset_reader']['class_name'] = '__main__:AssistantDatasetReader'
gobot_config['metadata']['variables']['DATA_PATH'] = 'assistant_data'

gobot_config['metadata']['variables']['MODEL_PATH'] = 'assistant_bot'

In [None]:
from deeppavlov import train_model

gobot_config['train']['batch_size'] = 4 # set batch size
gobot_config['train']['max_batches'] = 30 # maximum number of training batches
gobot_config['train']['val_every_n_batches'] = 30 # evaluate on full 'valid' split every 30 epochs
gobot_config['train']['log_every_n_batches'] = 5 # evaluate on full 'train' split every 5 batches

train_model(gobot_config);

In [16]:
from deeppavlov import build_model

bot = build_model(gobot_config)

2022-04-11 17:30:37.514 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 115: [loading vocabulary from /content/assistant_bot/word.dict]
2022-04-11 17:30:37.519 INFO in 'deeppavlov.models.embedders.glove_embedder'['glove_embedder'] at line 52: [loading GloVe embeddings from `/content/assistant_bot/glove.6B.100d.txt`]
2022-04-11 17:31:25.100 INFO in 'deeppavlov.models.go_bot.policy.policy_network'['policy_network'] at line 86: INSIDE PolicyNetwork init(). Initializing PolicyNetwork from checkpoint.
2022-04-11 17:31:25.105 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /content/assistant_bot/model/policy]


INFO:tensorflow:Restoring parameters from /content/assistant_bot/model/policy


In [17]:
bot([[{"text": "good evening, bot"}]])

[['Hello, what is the weather today?']]

In [18]:
bot([[{"text": "the weather is pretty good. how is your weather?"}]])

[['Then you should cycle!']]

In [19]:
bot([[{"text": "can I borrow your bicycle?"}]])

[['Hello, what is the weather today?']]

In [20]:
bot([[{"text": "howdy"}]])

[['Hello, what is the weather today?']]

In [21]:
bot([[{"text": "the weather is poor"}]])

[['Then you should cycle!']]

In [22]:
bot([[{"text": "i disagree"}]])

[['Hello, what is the weather today?']]

In [None]:
bot([[{"text": "good evening, bot"}]])