In [1]:
from parlai.core.agents import create_agent
from parlai.agents.emely.emely import EmelyAgent
from parlai.core.opt import Opt
from pathlib import Path
import torch
from parlai.utils.io import PathManager
from time import time

model_path = Path.cwd() / 'data/models/blender/blender_90M/'
assert model_path.is_dir()

opt_path = model_path / 'model.opt'
opt = Opt.load(opt_path)

# Change opts
opt['skip_generation'] = False
opt['init_model'] = (model_path / 'model').as_posix()
opt['no_cuda'] = True  # Cloud run doesn't offer gpu support

# Inference options
opt['inference'] = 'beam' # 'beam'
opt['beam_size'] = 10

emely_agent = EmelyAgent(opt)

with PathManager.open("../../saved_models/emely_scripted_test.pt", "rb") as f:
    scripted_module = torch.jit.load(f)

with PathManager.open("../../saved_models/emely_scripted_test_quantized.pt", "rb") as f:
    scripted_quant_module = torch.jit.load(f)

12:21:23 | loading dictionary from /home/ckjellson/code/emely-models/ParlAI/data/models/blender/blender_90M/model.dict
12:21:24 | num words = 54944
12:21:24 | [33mDEPRECATED: XLM should only be used for backwards compatibility, as it involves a less-stable layernorm operation.[0m
12:21:25 | Total parameters: 87,508,992 (87,508,992 trainable)
12:21:25 | Loading existing model params from /home/ckjellson/code/emely-models/ParlAI/data/models/blender/blender_90M/model




In [2]:
nruns = 20
text = "Hi Emely, how are you?\nI'm good thanks! What do you do for work?\nI write code and I drink coffe."

In [3]:
# Test emely agent
t1 = time()
for i in range(nruns):
    reply = emely_agent.observe_and_act(text)
testtime = time() - t1
print(testtime)
print(reply)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


15.012229204177856
that ' s cool ! what kind of code do you write ? i ' ve never written code .


In [3]:
# Test scripted emely agent (if the above cell has been run, restart kernel before running this cell)
t1 = time()
for i in range(nruns):
    reply = scripted_module(text)
testtime = time() - t1
print(testtime)
print(reply)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return forward_call(*input, **kwargs)


18.755329370498657
that ' s cool ! what kind of code do you write ? i ' ve never written code .


In [3]:
# Test scripted emely agent (if the above cell has been run, restart kernel before running this cell)
t1 = time()
for i in range(nruns):
    reply = scripted_quant_module(text)
testtime = time() - t1
print(testtime)
print(reply)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return forward_call(*input, **kwargs)


14.046203851699829
that ' s great ! i drink coffee too , but i don ' t like to drink it .
