-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: chainer profiling #1552
base: dev
Are you sure you want to change the base?
feat: chainer profiling #1552
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,10 +30,9 @@ | |
|
||
def build_model(config: Union[str, Path, dict], mode: str = 'infer', | ||
load_trained: bool = False, download: bool = False, | ||
serialized: Optional[bytes] = None) -> Chainer: | ||
serialized: Optional[bytes] = None, buckets: Optional[list] = None) -> Chainer: | ||
"""Build and return the model described in corresponding configuration file.""" | ||
config = parse_config(config) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return line |
||
if serialized: | ||
serialized: list = pickle.loads(serialized) | ||
|
||
|
@@ -43,9 +42,7 @@ def build_model(config: Union[str, Path, dict], mode: str = 'infer', | |
import_packages(config.get('metadata', {}).get('imports', [])) | ||
|
||
model_config = config['chainer'] | ||
|
||
model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y')) | ||
|
||
model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'), model_config.get('buckets')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get buckets and histogram name from model_config['metadata']['profiling'] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so statistics will be valid in inference mode only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. initialize hist in case "profiling" exist without "buckets" ? |
||
for component_config in model_config['pipe']: | ||
if load_trained and ('fit_on' in component_config or 'in_y' in component_config): | ||
try: | ||
|
@@ -106,7 +103,7 @@ def predict_on_stream(config: Union[str, Path, dict], | |
else: | ||
f = open(file_path, encoding='utf8') | ||
|
||
model: Chainer = build_model(config) | ||
model: Chainer = build_model(config, hist_name = "predict") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert changes |
||
|
||
args_count = len(model.in_x) | ||
while True: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,11 +12,15 @@ | |
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import time | ||
import pickle | ||
import random | ||
import string | ||
from itertools import islice | ||
from logging import getLogger | ||
from types import FunctionType | ||
from typing import Union, Tuple, List, Optional, Hashable, Reversible | ||
from prometheus_client import Histogram | ||
|
||
from deeppavlov.core.common.errors import ConfigError | ||
from deeppavlov.core.models.component import Component | ||
|
@@ -48,7 +52,7 @@ class Chainer(Component): | |
""" | ||
|
||
def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] = None, | ||
in_y: Union[str, list] = None, *args, **kwargs) -> None: | ||
in_y: Union[str, list] = None, buckets: Optional[list] = None, *args, **kwargs) -> None: | ||
self.pipe: List[Tuple[Tuple[List[str], List[str]], List[str], Component]] = [] | ||
self.train_pipe = [] | ||
if isinstance(in_x, str): | ||
|
@@ -68,6 +72,23 @@ def __init__(self, in_x: Union[str, list] = None, out_params: Union[str, list] = | |
|
||
self.main = None | ||
|
||
self.hist_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(5)) | ||
if buckets is not None: | ||
self.buckets = buckets | ||
else: | ||
self.buckets = [.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, 20, 50, 100, 200, 500] | ||
self.hist = Histogram(self.hist_name, "response latency (seconds)", ["component"], buckets = self.buckets) | ||
def print_hist(): | ||
intervals = [str(i) for i in self.hist._upper_bounds] | ||
components = [k[0]for k in list(self.hist._metrics.keys())] | ||
components_ = [i.split('.')[-1].split(' ')[0] for i in components] | ||
values = [[str(self.hist.labels(component=i)._buckets[j].get()) for j in range(len(self.hist._upper_bounds))] for i in components] | ||
format = "{:>45} "+"{:>10}" *len(intervals) | ||
print(format.format("", *intervals)) | ||
for component, value in zip(components_, values): | ||
print(format.format(component, *value)) | ||
self.print_hist = print_hist | ||
|
||
def __getitem__(self, item): | ||
if isinstance(item, int): | ||
in_params, out_params, component = self.train_pipe[item] | ||
|
@@ -201,13 +222,13 @@ def compute(self, x, y=None, targets=None): | |
args += list(zip(*y)) | ||
in_params += self.in_y | ||
|
||
return self._compute(*args, pipe=pipe, param_names=in_params, targets=targets) | ||
return self._compute(*args, pipe=pipe, param_names=in_params, targets=targets, hist = self.hist) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert after removing @staticmethod from _compute |
||
|
||
def __call__(self, *args): | ||
return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params) | ||
return self._compute(*args, param_names=self.in_x, pipe=self.pipe, targets=self.out_params, hist = self.hist) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert after removing @staticmethod from _compute |
||
|
||
@staticmethod | ||
def _compute(*args, param_names, pipe, targets): | ||
def _compute(*args, param_names, pipe, targets, hist: Optional[Histogram] = None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove |
||
expected = set(targets) | ||
final_pipe = [] | ||
for (in_keys, in_params), out_params, component in reversed(pipe): | ||
|
@@ -224,10 +245,14 @@ def _compute(*args, param_names, pipe, targets): | |
|
||
for (in_keys, in_params), out_params, component in pipe: | ||
x = [mem[k] for k in in_params] | ||
start_time = time.perf_counter() | ||
if in_keys: | ||
res = component.__call__(**dict(zip(in_keys, x))) | ||
else: | ||
res = component.__call__(*x) | ||
duration = time.perf_counter() - start_time | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move duration to |
||
if hist is not None: | ||
hist.labels(component = component).observe(duration) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replace whitespaces around |
||
if len(out_params) == 1: | ||
mem[out_params[0]] = res | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,7 +67,8 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1, | |
if kwargs: | ||
log.info(f'{self.__class__.__name__} got additional init parameters {list(kwargs)} that will be ignored:') | ||
self.chainer_config = chainer_config | ||
self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y')) | ||
self.buckets = chainer_config.get('buckets') | ||
self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'), self.buckets) | ||
Comment on lines
+70
to
+71
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert changes |
||
self.batch_size = batch_size | ||
self.metrics = parse_metrics(metrics, self._chainer.in_y, self._chainer.out_params) | ||
self.evaluation_targets = tuple(evaluation_targets) | ||
|
@@ -155,7 +156,7 @@ def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator] | |
def _load(self) -> None: | ||
if not self._loaded: | ||
self._chainer.destroy() | ||
self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved) | ||
self._chainer = build_model({'chainer': self.chainer_config}, load_trained=self._saved, buckets = self.buckets) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert changes |
||
self._loaded = True | ||
|
||
def get_chainer(self) -> Chainer: | ||
|
@@ -265,5 +266,6 @@ def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[ | |
res[data_type] = report | ||
if print_reports: | ||
print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return line |
||
if self._chainer.hist_name is not None: | ||
self._chainer.print_hist() | ||
Comment on lines
+269
to
+270
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove |
||
return res |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
from deeppavlov.core.common.errors import ConfigError | ||
from deeppavlov.core.common.registry import register | ||
from deeppavlov.core.data.data_learning_iterator import DataLearningIterator | ||
from deeppavlov.core.models import component | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove this import |
||
from deeppavlov.core.trainers.fit_trainer import FitTrainer | ||
from deeppavlov.core.trainers.utils import parse_metrics, NumpyArrayEncoder | ||
|
||
|
@@ -293,6 +294,9 @@ def train_on_batches(self, iterator: DataLearningIterator) -> None: | |
|
||
if self.log_every_n_batches > 0 and self.train_batches_seen % self.log_every_n_batches == 0: | ||
self._log(iterator, tensorboard_tag='every_n_batches', tensorboard_index=self.train_batches_seen) | ||
if self._chainer.hist_name is not None: | ||
self._chainer.print_hist() | ||
|
||
Comment on lines
+297
to
+299
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this lines in case the user wants to see the statistics, otherwise how can we use this addition? |
||
|
||
if self.val_every_n_batches > 0 and self.train_batches_seen % self.val_every_n_batches == 0: | ||
self._validate(iterator, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
revert changes