Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: chainer profiling #1552

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions deeppavlov/core/commands/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@

def build_model(config: Union[str, Path, dict], mode: str = 'infer',
load_trained: bool = False, download: bool = False,
serialized: Optional[bytes] = None) -> Chainer:
serialized: Optional[bytes] = None, buckets: Optional[list] = None) -> Chainer:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert changes

"""Build and return the model described in corresponding configuration file."""
config = parse_config(config)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return line

if serialized:
serialized: list = pickle.loads(serialized)

Expand All @@ -43,9 +42,7 @@ def build_model(config: Union[str, Path, dict], mode: str = 'infer',
import_packages(config.get('metadata', {}).get('imports', []))

model_config = config['chainer']

model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'))

model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'), model_config.get('buckets'))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get buckets and histogram name from model_config['metadata']['profiling']
If there is no such field, don't initialize histogram, if there is such dict with parameters, initialize histogram here.
Initialized histogram should be passed to Chainer instead of buckets.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so statistics will be valid in inference mode only

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initialize hist in case "profiling" exist without "buckets" ?

for component_config in model_config['pipe']:
if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
try:
Expand Down Expand Up @@ -106,7 +103,7 @@ def predict_on_stream(config: Union[str, Path, dict],
else:
f = open(file_path, encoding='utf8')

model: Chainer = build_model(config)
model: Chainer = build_model(config, hist_name = "predict")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert changes


args_count = len(model.in_x)
while True:
Expand Down
33 changes: 29 additions & 4 deletions deeppavlov/core/common/chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import time
import pickle
import random
import string
from itertools import islice
from logging import getLogger
from types import FunctionType
from typing import Union, Tuple, List, Optional, Hashable, Reversible
from prometheus_client import Histogram

from deeppavlov.core.common.errors import ConfigError
from deeppavlov.core.models.component import Component
Expand Down Expand Up @@ -48,7 +52,7 @@ class Chainer(Component):
"""

def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] = None,
in_y: Union[str, list] = None, *args, **kwargs) -> None:
in_y: Union[str, list] = None, buckets: Optional[list] = None, *args, **kwargs) -> None:
self.pipe: List[Tuple[Tuple[List[str], List[str]], List[str], Component]] = []
self.train_pipe = []
if isinstance(in_x, str):
Expand All @@ -68,6 +72,23 @@ def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] =

self.main = None

self.hist_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(5))
if buckets is not None:
self.buckets = buckets
else:
self.buckets = [.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, 20, 50, 100, 200, 500]
self.hist = Histogram(self.hist_name, "response latency (seconds)", ["component"], buckets = self.buckets)
def print_hist():
intervals = [str(i) for i in self.hist._upper_bounds]
components = [k[0]for k in list(self.hist._metrics.keys())]
components_ = [i.split('.')[-1].split(' ')[0] for i in components]
values = [[str(self.hist.labels(component=i)._buckets[j].get()) for j in range(len(self.hist._upper_bounds))] for i in components]
format = "{:>45} "+"{:>10}" *len(intervals)
print(format.format("", *intervals))
for component, value in zip(components_, values):
print(format.format(component, *value))
self.print_hist = print_hist

def __getitem__(self, item):
if isinstance(item, int):
in_params, out_params, component = self.train_pipe[item]
Expand Down Expand Up @@ -201,13 +222,13 @@ def compute(self, x, y=None, targets=None):
args += list(zip(*y))
in_params += self.in_y

return self._compute(*args, pipe=pipe, param_names=in_params, targets=targets)
return self._compute(*args, pipe=pipe, param_names=in_params, targets=targets, hist = self.hist)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert after removing @staticmethod from _compute


def __call__(self, *args):
return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params)
return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params, hist = self.hist)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert after removing @staticmethod from _compute


@staticmethod
def _compute(*args, param_names, pipe, targets):
def _compute(*args, param_names, pipe, targets, hist: Optional[Histogram] = None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove @staticmethod from _compute. After that there will be no need to pass hist as argument, it can be used as self.hist

expected = set(targets)
final_pipe = []
for (in_keys, in_params), out_params, component in reversed(pipe):
Expand All @@ -224,10 +245,14 @@ def _compute(*args, param_names, pipe, targets):

for (in_keys, in_params), out_params, component in pipe:
x = [mem[k] for k in in_params]
start_time = time.perf_counter()
if in_keys:
res = component.__call__(**dict(zip(in_keys, x)))
else:
res = component.__call__(*x)
duration = time.perf_counter() - start_time
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move duration to if hist is not None

if hist is not None:
hist.labels(component = component).observe(duration)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replace whitespaces around = and use component.__class__.__name__ instead of component as value

if len(out_params) == 1:
mem[out_params[0]] = res
else:
Expand Down
8 changes: 5 additions & 3 deletions deeppavlov/core/trainers/fit_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1,
if kwargs:
log.info(f'{self.__class__.__name__} got additional init parameters {list(kwargs)} that will be ignored:')
self.chainer_config = chainer_config
self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
self.buckets = chainer_config.get('buckets')
self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'), self.buckets)
Comment on lines +70 to +71
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert changes

self.batch_size = batch_size
self.metrics = parse_metrics(metrics, self._chainer.in_y, self._chainer.out_params)
self.evaluation_targets = tuple(evaluation_targets)
Expand Down Expand Up @@ -155,7 +156,7 @@ def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator]
def _load(self) -> None:
if not self._loaded:
self._chainer.destroy()
self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved)
self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved, buckets = self.buckets)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert changes

self._loaded = True

def get_chainer(self) -> Chainer:
Expand Down Expand Up @@ -265,5 +266,6 @@ def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[
res[data_type] = report
if print_reports:
print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder))

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return line

if self._chainer.hist_name is not None:
self._chainer.print_hist()
Comment on lines +269 to +270
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

return res
4 changes: 4 additions & 0 deletions deeppavlov/core/trainers/nn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from deeppavlov.core.common.errors import ConfigError
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
from deeppavlov.core.models import component
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this import

from deeppavlov.core.trainers.fit_trainer import FitTrainer
from deeppavlov.core.trainers.utils import parse_metrics, NumpyArrayEncoder

Expand Down Expand Up @@ -293,6 +294,9 @@ def train_on_batches(self, iterator: DataLearningIterator) -> None:

if self.log_every_n_batches > 0 and self.train_batches_seen % self.log_every_n_batches == 0:
self._log(iterator, tensorboard_tag='every_n_batches', tensorboard_index=self.train_batches_seen)
if self._chainer.hist_name is not None:
self._chainer.print_hist()

Comment on lines +297 to +299
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this lines in case the user wants to see the statistics, otherwise how can we use this addition?


if self.val_every_n_batches > 0 and self.train_batches_seen % self.val_every_n_batches == 0:
self._validate(iterator,
Expand Down