refactor: info replaced with debug messages, nltk download muted (#1601)

deeppavlov · Nov 21, 2022 · 7fc95e9 · 7fc95e9
1 parent 6887604
commit 7fc95e9
Show file tree

Hide file tree

Showing 23 changed files with 59 additions and 59 deletions.
diff --git a/deeppavlov/core/data/simple_vocab.py b/deeppavlov/core/data/simple_vocab.py
@@ -109,7 +109,7 @@ def load(self):
         self.reset()
         if self.load_path:
             if self.load_path.is_file():
-                log.info("[loading vocabulary from {}]".format(self.load_path))
+                log.debug("[loading vocabulary from {}]".format(self.load_path))
                 tokens, counts = [], []
                 for ln in self.load_path.open('r', encoding='utf8'):
                     token, cnt = self.load_line(ln)

diff --git a/deeppavlov/core/models/torch_model.py b/deeppavlov/core/models/torch_model.py
@@ -99,7 +99,7 @@ def __init__(self, device: str = "gpu",
         # we need to switch to eval mode here because by default it's in `train` mode.
         # But in case of `interact/build_model` usage, we need to have model in eval mode.
         self.model.eval()
-        log.info(f"Model was successfully initialized! Model summary:\n {self.model}")
+        log.debug(f"Model was successfully initialized! Model summary:\n {self.model}")
 
     def init_from_opt(self, model_func: str) -> None:
         """Initialize from scratch `self.model` with the architecture built in  `model_func` method of this class
@@ -150,22 +150,22 @@ def load(self, fname: Optional[str] = None, *args, **kwargs) -> None:
         model_func = getattr(self, self.opt.get("model_name", ""), None)
 
         if self.load_path:
-            log.info(f"Load path {self.load_path} is given.")
+            log.debug(f"Load path {self.load_path} is given.")
             if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir():
                 raise ConfigError("Provided load path is incorrect!")
 
             weights_path = Path(self.load_path.resolve())
             weights_path = weights_path.with_suffix(f".pth.tar")
             if weights_path.exists():
-                log.info(f"Load path {weights_path} exists.")
-                log.info(f"Initializing `{self.__class__.__name__}` from saved.")
+                log.debug(f"Load path {weights_path} exists.")
+                log.debug(f"Initializing `{self.__class__.__name__}` from saved.")
 
                 # firstly, initialize with random weights and previously saved parameters
                 if model_func:
                     self.init_from_opt(model_func)
 
                 # now load the weights, optimizer from saved
-                log.info(f"Loading weights from {weights_path}.")
+                log.debug(f"Loading weights from {weights_path}.")
                 checkpoint = torch.load(weights_path, map_location=self.device)
                 model_state = checkpoint["model_state_dict"]
                 optimizer_state = checkpoint["optimizer_state_dict"]
@@ -181,10 +181,10 @@ def load(self, fname: Optional[str] = None, *args, **kwargs) -> None:
                 self.optimizer.load_state_dict(optimizer_state)
                 self.epochs_done = checkpoint.get("epochs_done", 0)
             elif model_func:
-                log.info(f"Init from scratch. Load path {weights_path} does not exist.")
+                log.debug(f"Init from scratch. Load path {weights_path} does not exist.")
                 self.init_from_opt(model_func)
         elif model_func:
-            log.info(f"Init from scratch. Load path {self.load_path} is not provided.")
+            log.debug(f"Init from scratch. Load path {self.load_path} is not provided.")
             self.init_from_opt(model_func)
 
     @overrides

diff --git a/deeppavlov/core/trainers/fit_trainer.py b/deeppavlov/core/trainers/fit_trainer.py
@@ -63,7 +63,7 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1,
                  max_test_batches: int = -1,
                  **kwargs) -> None:
         if kwargs:
-            log.info(f'{self.__class__.__name__} got additional init parameters {list(kwargs)} that will be ignored:')
+            log.warning(f'{self.__class__.__name__} got additional init parameters {list(kwargs)} that will be ignored:')
         self.chainer_config = chainer_config
         self._chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
         self.batch_size = batch_size

diff --git a/deeppavlov/models/__init__.py b/deeppavlov/models/__init__.py
@@ -20,7 +20,7 @@
 
 if not os.environ.get('DP_SKIP_NLTK_DOWNLOAD'):
     with RedirectedPrints():
-        nltk.download('punkt')
-        nltk.download('stopwords')
-        nltk.download('perluniprops')
-        nltk.download('nonbreaking_prefixes')
+        nltk.download('punkt', quiet=True)
+        nltk.download('stopwords', quiet=True)
+        nltk.download('perluniprops', quiet=True)
+        nltk.download('nonbreaking_prefixes', quiet=True)
diff --git a/deeppavlov/models/classifiers/cos_sim_classifier.py b/deeppavlov/models/classifiers/cos_sim_classifier.py
@@ -130,5 +130,5 @@ def save(self) -> None:
 
     def load(self) -> None:
         """Load classifier parameters"""
-        logger.info("Loading faq_model from {}".format(self.load_path))
+        logger.debug("Loading faq_model from {}".format(self.load_path))
         self.x_train_features, self.y_train = load_pickle(self.load_path)
diff --git a/deeppavlov/models/classifiers/re_bert.py b/deeppavlov/models/classifiers/re_bert.py
@@ -156,7 +156,7 @@ def get_hrt(self, sequence_output: Tensor, attention: Tensor, entity_pos: List)
 
     def load(self) -> None:
         if self.pretrained_bert:
-            log.info(f"From pretrained {self.pretrained_bert}.")
+            log.debug(f"From pretrained {self.pretrained_bert}.")
             self.config = AutoConfig.from_pretrained(
                 self.pretrained_bert, num_labels=self.n_classes, output_attentions=True, output_hidden_states=True
             )

diff --git a/deeppavlov/models/doc_retrieval/pop_ranker.py b/deeppavlov/models/doc_retrieval/pop_ranker.py
@@ -57,11 +57,11 @@ class PopRanker(Component):
     def __init__(self, pop_dict_path: str, load_path: str, top_n: int = 3, active: bool = True,
                  **kwargs) -> None:
         pop_dict_path = expand_path(pop_dict_path)
-        logger.info(f"Reading popularity dictionary from {pop_dict_path}")
+        logger.debug(f"Reading popularity dictionary from {pop_dict_path}")
         self.pop_dict = read_json(pop_dict_path)
         self.mean_pop = np.mean(list(self.pop_dict.values()))
         load_path = expand_path(load_path)
-        logger.info(f"Loading popularity ranker from {load_path}")
+        logger.debug(f"Loading popularity ranker from {load_path}")
         self.clf = joblib.load(load_path)
         self.top_n = top_n
         self.active = active

diff --git a/deeppavlov/models/embedders/fasttext_embedder.py b/deeppavlov/models/embedders/fasttext_embedder.py
@@ -50,7 +50,7 @@ def load(self) -> None:
         """
         Load fastText binary model from self.load_path
         """
-        log.info(f"[loading fastText embeddings from `{self.load_path}`]")
+        log.debug(f"[loading fastText embeddings from `{self.load_path}`]")
         self.model = fasttext.load_model(str(self.load_path))
         self.dim = self.model.get_dimension()
 

diff --git a/deeppavlov/models/kbqa/query_generator_base.py b/deeppavlov/models/kbqa/query_generator_base.py
@@ -164,7 +164,7 @@ def get_entity_ids(self, entities: List[str], tags: List[str], question: str) ->
         try:
             el_output = self.entity_linker([entities], [tags], [[question]], [None], [None])
         except json.decoder.JSONDecodeError:
-            log.info("not received output from entity linking")
+            log.warning("not received output from entity linking")
         if el_output:
             if self.use_el_api_requester:
                 el_output = el_output[0]
@@ -262,7 +262,7 @@ def find_top_rels(self, question: str, entity_ids: List[List[str]], triplet_info
             try:
                 ex_rels = self.wiki_parser(parser_info_list, queries_list)
             except json.decoder.JSONDecodeError:
-                log.info("find_top_rels, not received output from wiki parser")
+                log.warning("find_top_rels, not received output from wiki parser")
             if self.use_wp_api_requester and ex_rels:
                 ex_rels = [rel[0] for rel in ex_rels]
             ex_rels = list(set(ex_rels))

diff --git a/deeppavlov/models/kbqa/rel_ranking_infer.py b/deeppavlov/models/kbqa/rel_ranking_infer.py
@@ -170,7 +170,7 @@ def __call__(self, questions_list: List[str],
                     try:
                         answer = sentence_answer(question, answer, entities, template_answer)
                     except:
-                        log.info("Error in sentence answer")
+                        log.warning("Error in sentence answer")
                 confidence = answers_with_scores[0][2]
             if self.return_confidences:
                 answers.append((answer, confidence))

diff --git a/deeppavlov/models/kbqa/wiki_parser.py b/deeppavlov/models/kbqa/wiki_parser.py
@@ -90,42 +90,42 @@ def execute_queries_list(self, parser_info_list: List[str], queries_list: List[A
                     candidate_output = self.execute(what_return, query_seq, filter_info, order_info,
                                                     query_answer_types, rel_types)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(candidate_output)
             elif parser_info == "find_rels":
                 rels = []
                 try:
                     rels = self.find_rels(*query)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output += rels
             elif parser_info == "find_object":
                 objects = []
                 try:
                     objects = self.find_object(*query)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(objects)
             elif parser_info == "check_triplet":
                 check_res = False
                 try:
                     check_res = self.check_triplet(*query)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(check_res)
             elif parser_info == "find_label":
                 label = ""
                 try:
                     label = self.find_label(*query)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(label)
             elif parser_info == "find_types":
                 types = []
                 try:
                     types = self.find_types(query)
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(types)
             elif parser_info == "find_triplets":
                 if self.file_format == "hdt":
@@ -138,14 +138,14 @@ def execute_queries_list(self, parser_info_list: List[str], queries_list: List[A
                         triplets.extend([triplet for triplet in triplets_backw
                                          if not triplet[0].startswith(self.prefixes["statement"])])
                     except:
-                        log.info("Wrong arguments are passed to wiki_parser")
+                        log.warning("Wrong arguments are passed to wiki_parser")
                     wiki_parser_output.append(list(triplets))
                 else:
                     triplets = {}
                     try:
                         triplets = self.document.get(query, {})
                     except:
-                        log.info("Wrong arguments are passed to wiki_parser")
+                        log.warning("Wrong arguments are passed to wiki_parser")
                     uncompressed_triplets = {}
                     if triplets:
                         if "forw" in triplets:
@@ -159,7 +159,7 @@ def execute_queries_list(self, parser_info_list: List[str], queries_list: List[A
                     found_triplets, c = \
                         self.document.search_triples("", f"{self.prefixes['rels']['direct']}/{query}", "")
                 except:
-                    log.info("Wrong arguments are passed to wiki_parser")
+                    log.warning("Wrong arguments are passed to wiki_parser")
                 wiki_parser_output.append(list(found_triplets))
             elif parser_info == "parse_triplets" and self.file_format == "pickle":
                 for entity in query:

diff --git a/deeppavlov/models/preprocessors/ner_preprocessor.py b/deeppavlov/models/preprocessors/ner_preprocessor.py
@@ -117,7 +117,7 @@ def __call__(self, sents, **kwargs):
         return padded_sents
 
     def load(self, *args, **kwargs):
-        log.info("[loading vocabulary from {}]".format(self.load_path))
+        log.debug("[loading vocabulary from {}]".format(self.load_path))
         if self.load_path is not None:
             self.load_from_file(self.load_path)
 

diff --git a/deeppavlov/models/preprocessors/str_utf8_encoder.py b/deeppavlov/models/preprocessors/str_utf8_encoder.py
@@ -134,7 +134,7 @@ def __call__(self, batch: Union[List[str], Tuple[str]]) -> StrUTF8EncoderInfo:
     def load(self) -> None:
         if self.load_path:
             if self.load_path.is_file():
-                log.info(f"[loading vocabulary from {self.load_path}]")
+                log.debug(f"[loading vocabulary from {self.load_path}]")
                 self.tokens = []
                 for ln in self.load_path.open('r', encoding='utf8'):
                     token = ln.strip().split()[0]

diff --git a/deeppavlov/models/sklearn/sklearn_component.py b/deeppavlov/models/sklearn/sklearn_component.py
@@ -135,10 +135,10 @@ def __call__(self, *args):
             predictions = self.infer_method(x_features)
         except TypeError or ValueError:
             if issparse(x_features):
-                log.info("Converting input for model {} to dense array".format(self.model_class))
+                log.debug("Converting input for model {} to dense array".format(self.model_class))
                 predictions = self.infer_method(x_features.todense())
             else:
-                log.info("Converting input for model {} to sparse array".format(self.model_class))
+                log.debug("Converting input for model {} to sparse array".format(self.model_class))
                 predictions = self.infer_method(csr_matrix(x_features))
 
         if isinstance(predictions, list):
@@ -163,7 +163,7 @@ def init_from_scratch(self) -> None:
         Returns:
             None
         """
-        log.info("Initializing model {} from scratch".format(self.model_class))
+        log.debug("Initializing model {} from scratch".format(self.model_class))
         model_function = cls_from_str(self.model_class)
 
         if model_function is None:
@@ -200,18 +200,18 @@ def load(self, fname: str = None) -> None:
         fname = Path(fname).with_suffix('.pkl')
 
         if fname.exists():
-            log.info("Loading model {} from {}".format(self.model_class, str(fname)))
+            log.debug("Loading model {} from {}".format(self.model_class, str(fname)))
             with open(fname, "rb") as f:
                 self.model = pickle.load(f)
 
             warm_start = self.model_params.get("warm_start", None)
             self.model_params = {param: getattr(self.model, param) for param in self.get_class_attributes(self.model)}
             self.model_class = self.model.__module__ + self.model.__class__.__name__
-            log.info("Model {} loaded  with parameters".format(self.model_class))
+            log.debug("Model {} loaded  with parameters".format(self.model_class))
 
             if warm_start and "warm_start" in self.model_params.keys():
                 self.model_params["warm_start"] = True
-                log.info("Fitting of loaded model can be continued because `warm_start` is set to True")
+                log.debug("Fitting of loaded model can be continued because `warm_start` is set to True")
             else:
                 log.warning("Fitting of loaded model can not be continued. Model can be fitted from scratch."
                             "If one needs to continue fitting, please, look at `warm_start` parameter")

diff --git a/deeppavlov/models/spelling_correction/brillmoore/error_model.py b/deeppavlov/models/spelling_correction/brillmoore/error_model.py
@@ -233,7 +233,7 @@ def load(self):
         """
         if self.load_path:
             if self.load_path.is_file():
-                logger.info("loading error_model from `{}`".format(self.load_path))
+                logger.debug("loading error_model from `{}`".format(self.load_path))
                 with open(self.load_path, 'r', newline='', encoding='utf8') as tsv_file:
                     reader = csv.reader(tsv_file, delimiter='\t')
                     for w, s, p in reader:
@@ -242,4 +242,4 @@ def load(self):
                 raise ConfigError("Provided `load_path` for {} doesn't exist!".format(
                     self.__class__.__name__))
         else:
-            logger.info('No load_path provided, initializing error model from scratch')
+            logger.warning('No load_path provided, initializing error model from scratch')
diff --git a/deeppavlov/models/torch_bert/torch_bert_ranker.py b/deeppavlov/models/torch_bert/torch_bert_ranker.py
@@ -161,7 +161,7 @@ def load(self, fname=None):
             self.load_path = fname
 
         if self.pretrained_bert:
-            log.info(f"From pretrained {self.pretrained_bert}.")
+            log.debug(f"From pretrained {self.pretrained_bert}.")
             if Path(expand_path(self.pretrained_bert)).exists():
                 self.pretrained_bert = str(expand_path(self.pretrained_bert))
             config = AutoConfig.from_pretrained(self.pretrained_bert,

diff --git a/deeppavlov/models/torch_bert/torch_transformers_classifier.py b/deeppavlov/models/torch_bert/torch_transformers_classifier.py
@@ -202,7 +202,7 @@ def load(self, fname=None):
             self.load_path = fname
 
         if self.pretrained_bert:
-            log.info(f"From pretrained {self.pretrained_bert}.")
+            log.debug(f"From pretrained {self.pretrained_bert}.")
             config = AutoConfig.from_pretrained(self.pretrained_bert,
                                                 # num_labels=self.n_classes,
                                                 output_attentions=False,

diff --git a/deeppavlov/models/torch_bert/torch_transformers_el_ranker.py b/deeppavlov/models/torch_bert/torch_transformers_el_ranker.py
@@ -235,7 +235,7 @@ def forward(self,
 
     def load(self) -> None:
         if self.pretrained_bert:
-            log.info(f"From pretrained {self.pretrained_bert}.")
+            log.debug(f"From pretrained {self.pretrained_bert}.")
             self.config = AutoConfig.from_pretrained(
                 self.pretrained_bert, output_hidden_states=True
             )

diff --git a/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py b/deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py
@@ -164,7 +164,7 @@ def load(self, fname = None):
             self.load_path = fname
 
         if self.pretrained_bert:
-            log.info(f"From pretrained {self.pretrained_bert}.")
+            log.debug(f"From pretrained {self.pretrained_bert}.")
             config = AutoConfig.from_pretrained(self.pretrained_bert, num_labels=self.n_classes, 
                                                 output_attentions=False, output_hidden_states=False)
 
@@ -189,21 +189,21 @@ def load(self, fname = None):
                 self.optimizer, **self.lr_scheduler_parameters)
 
         if self.load_path:
-            log.info(f"Load path {self.load_path} is given.")
+            log.debug(f"Load path {self.load_path} is given.")
             if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir():
                 raise ConfigError("Provided load path is incorrect!")
 
             weights_path = Path(self.load_path.resolve())
             weights_path = weights_path.with_suffix(f".pth.tar")
             if weights_path.exists():
-                log.info(f"Load path {weights_path} exists.")
-                log.info(f"Initializing `{self.__class__.__name__}` from saved.")
+                log.debug(f"Load path {weights_path} exists.")
+                log.debug(f"Initializing `{self.__class__.__name__}` from saved.")
 
                 # now load the weights, optimizer from saved
-                log.info(f"Loading weights from {weights_path}.")
+                log.debug(f"Loading weights from {weights_path}.")
                 checkpoint = torch.load(weights_path, map_location=self.device)
                 self.model.load_state_dict(checkpoint["model_state_dict"])
                 self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
                 self.epochs_done = checkpoint.get("epochs_done", 0)
             else:
-                log.info(f"Init from scratch. Load path {weights_path} does not exist.")
+                log.warning(f"Init from scratch. Load path {weights_path} does not exist.")
diff --git a/deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py b/deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py
@@ -309,7 +309,7 @@ def load(self, fname=None):
                     checkpoint = torch.load(weights_path_crf, map_location=self.device)
                     self.crf.load_state_dict(checkpoint["model_state_dict"], strict=False)
                 else:
-                    log.info(f"Init from scratch. Load path {weights_path_crf} does not exist.")
+                    log.warning(f"Init from scratch. Load path {weights_path_crf} does not exist.")
 
     @overrides
     def save(self, fname: Optional[str] = None, *args, **kwargs) -> None: