Improved statistics and cli interface.

* Set logger mode cli command. * Improved cli messages. * More statistics printed. * Functions for easy access to pkg conf, data and app dir.
erikdab · Jun 23, 2017 · e1e1fbc · e1e1fbc
1 parent ce5a54a
commit e1e1fbc
Show file tree

Hide file tree

Showing 7 changed files with 101 additions and 107 deletions.
diff --git a/conf/logging.ini b/conf/logging.ini
@@ -8,12 +8,12 @@ keys=stream_handler
 keys=formatter
 
 [logger_root]
-level=DEBUG
+level=ERROR
 handlers=stream_handler
 
 [handler_stream_handler]
 class=StreamHandler
-level=DEBUG
+level=ERROR
 formatter=formatter
 args=(sys.stderr,)
 

diff --git a/pyrelaxmapper/cli.py b/pyrelaxmapper/cli.py
@@ -1,8 +1,12 @@
 # -*- coding: utf-8 -*-
 """Application cli interface."""
+import os
+import shutil
+from configparser import ConfigParser
+
 import click
 
-from pyrelaxmapper import __version__, commands, conf
+from pyrelaxmapper import __version__, commands, conf, relax
 
 
 @click.group()
@@ -12,41 +16,55 @@ def main():
     pass
 
 
-# TODO: allow make to show help if no action is passed.
 @main.command()
 @click.argument('actions', nargs=-1, required=False)
-@click.option('--cache/--no-cache', default=True, help='Use caches.')
-# ENV FILE
+@click.option('--clean', default=False, help='Use caches.')
 @click.option('--configf', '-c', help='Specify configuration file.')
-def make(actions, cache, configf):
+def make(actions, clean, configf):
     """Make target ACTIONS in correct order. Chainable.
 
     \b
     ACTIONS:
       all      Make all actions.
-      dicts    Make translation dicts.
-      extract  Extract plWordNet data from DB.
-      map      Perform the mapping actions.
-      mono     Map monosemous words (without RL).
-      poly     Map polysemous words (with RL)."""
+      map      Setup and run relaxation labeling.
+      setup    Setup relaxation labeling.
+      relax    Run relaxation labeling.
+      """
     if not actions:
         return
-    parser = conf.load_conf()  # Allow specifying in interface
-    config = conf.Config(parser)
 
-    if not cache and any(action in ['mono', 'poly', 'all'] for action in actions):
-        commands.make_clean(config)
+    click.secho('Loading application configuration.', fg='blue')
+    parser = ConfigParser(configf) if configf else conf.load_conf()
+    config = conf.Config(parser, clean)
+
     if any(action in ['map', 'setup', 'relax', 'all'] for action in actions):
-        relaxer = config.cache(config.file_relaxer(), commands.make_setup, [config])
-        if all(action not in ['setup'] for action in actions):
-            commands.make_relax(relaxer)
+        click.secho('Loading relaxation labeling setup.', fg='blue')
+        relaxer = config.cache('Relaxer', relax.Relaxer, [config], group=config.mapping_group())
+
+        # If not only setup
+        if actions != ['setup']:
+            click.secho('Running relaxation labeling.', fg='blue')
+            relaxer.relax()
 
 
-@main.command('list config')
+@main.command('list-config')
 def list_config():
     """List configuration information."""
     commands.list_config()
 
 
+@main.command('set-logger')
+@click.option('--release/--debug', default=True, help='Logger mode.')
+def set_logger(release):
+    """Copy logger config template to app dir."""
+    if release:
+        click.secho('Set logger mode to RELEASE', color='red')
+    else:
+        click.secho('Set logger mode to DEBUG', color='blue')
+    logging_conf = 'logging.ini' if release else 'logging-debug.ini'
+    shutil.copyfile(os.path.join(conf.dir_pkg_conf(), logging_conf),
+                    conf.file_in_dir(conf.dir_app_data(), 'logging.ini'))
+
+
 if __name__ == "__main__":
     main()
diff --git a/pyrelaxmapper/commands.py b/pyrelaxmapper/commands.py
@@ -23,47 +23,3 @@ def list_config():
             if section == 'path' and os.path.exists(os.path.expanduser(value)):
                 exists = ': (exists)'
             click.echo(''.join(['\t', key, ': ', value, exists]))
-
-
-def make_clean(config):
-    """Cleans all files under the cache directory.
-
-    Parameters
-    ----------
-    config : conf.Config
-    """
-    config.clean_cache()
-
-
-def make_setup(config):
-    """Setup relaxation labeling wordnet mapping problem.
-
-    Parameters
-    ----------
-    config : conf.Config
-
-    Returns
-    -------
-    pyrelaxmapper.relax.Relaxer
-    """
-    click.secho('Running relaxation labeling setup.', fg='blue')
-    relaxer = relax.Relaxer(config)
-    click.secho('Done setting up relaxation labeling.', fg='blue')
-    return relaxer
-
-
-def make_relax(relaxer):
-    """Perform relaxation labeling wordnet mapping algorithm.
-
-    Parameters
-    ----------
-    relaxer : pyrelaxmapper.relax.Relaxer
-
-    Returns
-    -------
-    relaxer : pyrelaxmapper.relax.Relaxer
-    """
-    click.secho('Running relaxation labeling.', fg='blue')
-    relaxer.relax()
-    click.secho('Done relaxation labeling.', fg='blue')
-    return relaxer
diff --git a/pyrelaxmapper/conf.py b/pyrelaxmapper/conf.py
@@ -25,18 +25,22 @@
 # I/O utilities.
 
 
+def dir_pkg_conf():
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'conf'))
+
+
+def dir_pkg_data():
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data'))
+
+
+def dir_app_data():
+    return click.get_app_dir(APPLICATION)
+
+
 def search_paths():
     """Returns search paths for program configuration files."""
-    return [os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'conf')),
-            os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data')),
-            click.get_app_dir(APPLICATION)]
+    return [dir_pkg_conf(), dir_pkg_data(), dir_app_data()]
 
-
-def ensure_dir(directory):
-    """File in directory."""
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    return directory
 
 def ensure_dir(directory):
     """File in directory."""
@@ -226,9 +230,6 @@ def load_properties(file):
     config.seek(0, os.SEEK_SET)
     return config
 
-
-class Config:
-    """Application Configuration.
 
 class Config:
     """Application Configuration.
@@ -249,7 +250,11 @@ class Config:
     # WordNet source builtin types
     WORDNETS = [PLWordNet, PWordNet]
 
-    def __init__(self, parser, source_wn=None, target_wn=None, constrainer=None, translater=None):
+    def __init__(self, parser, clean=False, fast=None, source_wn=None, target_wn=None, constrainer=None,
+                 translater=None):
+        if clean:
+            self.clean_cache()
+
         self._pos = None
         self._dataset_split = 0.8
 
@@ -269,6 +274,8 @@ def __init__(self, parser, source_wn=None, target_wn=None, constrainer=None, tra
         self._data_dir = ''
         self._results_dir = ''
 
+        self._fast = fast
+
         if parser:
             self.load(parser)
 
@@ -314,9 +321,9 @@ def load(self, parser):
 
         for constraint in self._constraints:
             option = 'weights_{}'.format(constraint)
-            if not parser.has_option(section, option):
-                raise KeyError('Constraint weight missing: [{}][{}]'.format(constraint, option))
-            self._constr_weights[constraint] = parser[section][option]
+            # if not parser.has_option(section, option):
+            #     raise KeyError('Constraint weight missing: [{}][{}]'.format(constraint, option))
+            # self._constr_weights[constraint] = parser[section][option]
 
         if self._source_wn is None:
             self._source_wn = self._wnsource_loader(parser, 'source')
@@ -329,7 +336,7 @@ def load(self, parser):
             self._constrainer = Constrainer(self._source_wn, self._target_wn,
                                             self._constraints, self._constr_weights)
 
-        # logging.config.fileConfig(last_in_paths('logging.ini'))
+            # logging.config.fileConfig(last_in_paths('logging.ini'))
 
     def _parse_dirs(self, parser, section, options):
         """Parse directory configuration option.

diff --git a/pyrelaxmapper/relax.py b/pyrelaxmapper/relax.py
@@ -1,9 +1,11 @@
 # -*- coding: utf-8 -*-
 import logging
+import os
 
 import click
 import numpy as np
 
+from pyrelaxmapper import conf
 from pyrelaxmapper.status import Status
 from pyrelaxmapper.stats import Stats
 
@@ -27,12 +29,17 @@ def relax(self):
         """Run relaxation labeling"""
         iteration = self.status.iteration()
 
-        self.stats.wordnet_info()
         # Stopping Condition
         while iteration.index() <= 1 or self.status.iterations[-2].has_changes():
             self._iteration_relax()
             self.stats.stat_iteration(iteration)
             iteration = self.status.push_iteration()
+        stats = self.stats.stat_wordnets()
+        stats.update(self.stats.stat_wn_coverage())
+        stats.update(self.stats.stat_translation())
+        stats.update(self.stats.stat_mapping())
+        with open(os.path.join(self.config.cache_dir(), 'stats.csv'), 'w') as file:
+            file.write('\n'.join('{} : {}'.format(key, value) for key, value in stats.items()))
 
     def _iteration_relax(self):
         remaining = self.status.remaining
@@ -43,8 +50,10 @@ def _iteration_relax(self):
         with click.progressbar(remaining.values(), label='Relaxing nodes') as nodes:
             for node in nodes:
                 self.constrainer.apply(self.status.mappings, node)
+        # for node in remaining.values():
+        #     self.constrainer.apply(self.status.mappings, node)
 
-        logger.info('Normalizing weights.')
+        # logger.info('Normalizing weights.')
         # for node in iteration.remaining.values():
         # for node in temp:
         #     node.weights = utils.normalized(node.weights)

diff --git a/pyrelaxmapper/stats.py b/pyrelaxmapper/stats.py
@@ -17,17 +17,16 @@ class Stats:
     def __init__(self, status):
         self.status = status
 
-    def stat_mapping(self):
+    def stat_mapping(self, print_stats=True):
         cand = list(self.status.candidates.values())
-        counts = np.array([len(node.labels) for node in cand])
-        n = 10
-        counts_max = counts.argsort()[-n][::-1]
+        counts = np.array([len(node) for node in cand])
+        counts_max = counts.argsort()[-10:][::-1]
         no_translations = [synset.uid() for synset in self.status.source_wn.all_synsets()
                            if synset.uid() not in self.status.candidates]
         stats = {
             'n_nodes': len(self.status.candidates),
             'n_labels': sum(counts),
-            'most_ambiguous': cand[counts_max],
+            # 'most_ambiguous': cand[counts_max],
             'labels_max': 0,
             'labels_min': 0,
             'labels_avg': 0,
@@ -38,9 +37,11 @@ def stat_mapping(self):
             'n_polysemous': 0,
             'n_no_translations': no_translations,
         }
+        if print_stats:
+            self.print_stats(stats)
         return stats
 
-    def stat_wn_coverage(self):
+    def stat_wn_coverage(self, print_stats=True):
         """Statistics about coverage of the source vs target wordnet.
 
         Returns
@@ -55,6 +56,8 @@ def stat_wn_coverage(self):
             'monosemous': len(self.status.monosemous),
             'polysemous': len(self.status.polysemous),
         }
+        if print_stats:
+            self.print_stats(stats)
         return stats
 
     def stat_translation(self, print_stats=True):
@@ -78,9 +81,11 @@ def stat_translation(self, print_stats=True):
             'candidates_min': min_,
             'candidates_max': max_,
         }
+        if print_stats:
+            self.print_stats(stats)
         return stats
 
-    def stat_loading(self):
+    def stat_loading(self, print_stats=True):
         """Statistics about speed of starting up from cached vs not."""
         stats = {
             'cache': 0,
@@ -95,15 +100,14 @@ def stat_wordnets(self, print_stats=True):
         stats = {}
         for key, wordnet in wordnets.items():
             stats.update({
-                key+'synsets': 'Translations, count: {}'.format(wordnet.count_synsets()),
-                key+'lunits': 'Translations, count: {}'.format(wordnet.count_lunits())
+                key: wordnet.name_full(),
+                key+' type': wordnet.uid(),
+                key+' synsets': 'Synsets, count: {}'.format(wordnet.count_synsets()),
+                key+' lunits': 'Lunits, count: {}'.format(wordnet.count_lunits())
                 # Hiper/hypo relations
             })
-            logger.info('Translations, count: {}'.format(len(wordnet)))
-            logger.info(
-                'Source synsets, count: {}'.format(len(self.status.source_wn.all_synsets())))
-            logger.info(
-                'Target synsets, count: {}'.format(len(self.status.target_wn.all_synsets())))
+        if print_stats:
+            self.print_stats(stats)
         return stats
 
     def stat_iterations(self, print_stats=True):
@@ -137,4 +141,4 @@ def stat_iteration(self, iteration=None, print_stats=True):
         return stats_dict
 
     def print_stats(self, stats_dict):
-        print('\n'.join(stats_dict.values()))
+        print('\n'.join('{} : {}'.format(key, value) for key, value in stats_dict.items()))
diff --git a/pyrelaxmapper/status.py b/pyrelaxmapper/status.py
@@ -167,15 +167,15 @@ def load_cache(self):
 
         self.monosemous = {source_id: target_ids[0] for source_id, target_ids in
                            self.candidates.items() if len(target_ids) == 1}
-        # self.polysemous = {source_id: Node(source_id, target_ids) for source_id, target_ids in
-        #                    enumerate(self._candidates.items()) if len(target_ids) > 1}
-        idx = 0
-        for source_id, target_ids in self.candidates.items():
-            if idx == 3000:
-                break
-            if len(target_ids) > 1:
-                self.polysemous[source_id] = Node(source_id, target_ids)
-                idx += 1
+        self.polysemous = {source_id: Node(source_id, target_ids) for source_id, target_ids in
+                           self.candidates.items() if len(target_ids) > 1}
+        # idx = 0
+        # for source_id, target_ids in self.candidates.items():
+        #     if idx == 3000:
+        #         break
+        #     if len(target_ids) > 1:
+        #         self.polysemous[source_id] = Node(source_id, target_ids)
+        #         idx += 1
 
     def push_iteration(self):
         """Save current iteration's results and create a new iteration.