Skip to content

Commit

Permalink
Improved statistics and cli interface.
Browse files Browse the repository at this point in the history
* Set logger mode cli command.
* Improved cli messages.
* More statistics printed.
* Functions for easy access to pkg conf, data and
    app dir.
  • Loading branch information
erikdab committed Jun 23, 2017
1 parent ce5a54a commit e1e1fbc
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 107 deletions.
4 changes: 2 additions & 2 deletions conf/logging.ini
Expand Up @@ -8,12 +8,12 @@ keys=stream_handler
keys=formatter

[logger_root]
level=DEBUG
level=ERROR
handlers=stream_handler

[handler_stream_handler]
class=StreamHandler
level=DEBUG
level=ERROR
formatter=formatter
args=(sys.stderr,)

Expand Down
54 changes: 36 additions & 18 deletions pyrelaxmapper/cli.py
@@ -1,8 +1,12 @@
# -*- coding: utf-8 -*-
"""Application cli interface."""
import os
import shutil
from configparser import ConfigParser

import click

from pyrelaxmapper import __version__, commands, conf
from pyrelaxmapper import __version__, commands, conf, relax


@click.group()
Expand All @@ -12,41 +16,55 @@ def main():
pass


# TODO: allow make to show help if no action is passed.
@main.command()
@click.argument('actions', nargs=-1, required=False)
@click.option('--cache/--no-cache', default=True, help='Use caches.')
# ENV FILE
@click.option('--clean', default=False, help='Use caches.')
@click.option('--configf', '-c', help='Specify configuration file.')
def make(actions, cache, configf):
def make(actions, clean, configf):
"""Make target ACTIONS in correct order. Chainable.
\b
ACTIONS:
all Make all actions.
dicts Make translation dicts.
extract Extract plWordNet data from DB.
map Perform the mapping actions.
mono Map monosemous words (without RL).
poly Map polysemous words (with RL)."""
map Setup and run relaxation labeling.
setup Setup relaxation labeling.
relax Run relaxation labeling.
"""
if not actions:
return
parser = conf.load_conf() # Allow specifying in interface
config = conf.Config(parser)

if not cache and any(action in ['mono', 'poly', 'all'] for action in actions):
commands.make_clean(config)
click.secho('Loading application configuration.', fg='blue')
parser = ConfigParser(configf) if configf else conf.load_conf()
config = conf.Config(parser, clean)

if any(action in ['map', 'setup', 'relax', 'all'] for action in actions):
relaxer = config.cache(config.file_relaxer(), commands.make_setup, [config])
if all(action not in ['setup'] for action in actions):
commands.make_relax(relaxer)
click.secho('Loading relaxation labeling setup.', fg='blue')
relaxer = config.cache('Relaxer', relax.Relaxer, [config], group=config.mapping_group())

# If not only setup
if actions != ['setup']:
click.secho('Running relaxation labeling.', fg='blue')
relaxer.relax()


@main.command('list config')
@main.command('list-config')
def list_config():
"""List configuration information."""
commands.list_config()


@main.command('set-logger')
@click.option('--release/--debug', default=True, help='Logger mode.')
def set_logger(release):
"""Copy logger config template to app dir."""
if release:
click.secho('Set logger mode to RELEASE', color='red')
else:
click.secho('Set logger mode to DEBUG', color='blue')
logging_conf = 'logging.ini' if release else 'logging-debug.ini'
shutil.copyfile(os.path.join(conf.dir_pkg_conf(), logging_conf),
conf.file_in_dir(conf.dir_app_data(), 'logging.ini'))


if __name__ == "__main__":
main()
44 changes: 0 additions & 44 deletions pyrelaxmapper/commands.py
Expand Up @@ -23,47 +23,3 @@ def list_config():
if section == 'path' and os.path.exists(os.path.expanduser(value)):
exists = ': (exists)'
click.echo(''.join(['\t', key, ': ', value, exists]))


def make_clean(config):
"""Cleans all files under the cache directory.
Parameters
----------
config : conf.Config
"""
config.clean_cache()


def make_setup(config):
"""Setup relaxation labeling wordnet mapping problem.
Parameters
----------
config : conf.Config
Returns
-------
pyrelaxmapper.relax.Relaxer
"""
click.secho('Running relaxation labeling setup.', fg='blue')
relaxer = relax.Relaxer(config)
click.secho('Done setting up relaxation labeling.', fg='blue')
return relaxer


def make_relax(relaxer):
"""Perform relaxation labeling wordnet mapping algorithm.
Parameters
----------
relaxer : pyrelaxmapper.relax.Relaxer
Returns
-------
relaxer : pyrelaxmapper.relax.Relaxer
"""
click.secho('Running relaxation labeling.', fg='blue')
relaxer.relax()
click.secho('Done relaxation labeling.', fg='blue')
return relaxer
41 changes: 24 additions & 17 deletions pyrelaxmapper/conf.py
Expand Up @@ -25,18 +25,22 @@
# I/O utilities.


def dir_pkg_conf():
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'conf'))


def dir_pkg_data():
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data'))


def dir_app_data():
return click.get_app_dir(APPLICATION)


def search_paths():
"""Returns search paths for program configuration files."""
return [os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'conf')),
os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data')),
click.get_app_dir(APPLICATION)]
return [dir_pkg_conf(), dir_pkg_data(), dir_app_data()]


def ensure_dir(directory):
"""File in directory."""
if not os.path.exists(directory):
os.makedirs(directory)
return directory

def ensure_dir(directory):
"""File in directory."""
Expand Down Expand Up @@ -226,9 +230,6 @@ def load_properties(file):
config.seek(0, os.SEEK_SET)
return config


class Config:
"""Application Configuration.

class Config:
"""Application Configuration.
Expand All @@ -249,7 +250,11 @@ class Config:
# WordNet source builtin types
WORDNETS = [PLWordNet, PWordNet]

def __init__(self, parser, source_wn=None, target_wn=None, constrainer=None, translater=None):
def __init__(self, parser, clean=False, fast=None, source_wn=None, target_wn=None, constrainer=None,
translater=None):
if clean:
self.clean_cache()

self._pos = None
self._dataset_split = 0.8

Expand All @@ -269,6 +274,8 @@ def __init__(self, parser, source_wn=None, target_wn=None, constrainer=None, tra
self._data_dir = ''
self._results_dir = ''

self._fast = fast

if parser:
self.load(parser)

Expand Down Expand Up @@ -314,9 +321,9 @@ def load(self, parser):

for constraint in self._constraints:
option = 'weights_{}'.format(constraint)
if not parser.has_option(section, option):
raise KeyError('Constraint weight missing: [{}][{}]'.format(constraint, option))
self._constr_weights[constraint] = parser[section][option]
# if not parser.has_option(section, option):
# raise KeyError('Constraint weight missing: [{}][{}]'.format(constraint, option))
# self._constr_weights[constraint] = parser[section][option]

if self._source_wn is None:
self._source_wn = self._wnsource_loader(parser, 'source')
Expand All @@ -329,7 +336,7 @@ def load(self, parser):
self._constrainer = Constrainer(self._source_wn, self._target_wn,
self._constraints, self._constr_weights)

# logging.config.fileConfig(last_in_paths('logging.ini'))
# logging.config.fileConfig(last_in_paths('logging.ini'))

def _parse_dirs(self, parser, section, options):
"""Parse directory configuration option.
Expand Down
13 changes: 11 additions & 2 deletions pyrelaxmapper/relax.py
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
import logging
import os

import click
import numpy as np

from pyrelaxmapper import conf
from pyrelaxmapper.status import Status
from pyrelaxmapper.stats import Stats

Expand All @@ -27,12 +29,17 @@ def relax(self):
"""Run relaxation labeling"""
iteration = self.status.iteration()

self.stats.wordnet_info()
# Stopping Condition
while iteration.index() <= 1 or self.status.iterations[-2].has_changes():
self._iteration_relax()
self.stats.stat_iteration(iteration)
iteration = self.status.push_iteration()
stats = self.stats.stat_wordnets()
stats.update(self.stats.stat_wn_coverage())
stats.update(self.stats.stat_translation())
stats.update(self.stats.stat_mapping())
with open(os.path.join(self.config.cache_dir(), 'stats.csv'), 'w') as file:
file.write('\n'.join('{} : {}'.format(key, value) for key, value in stats.items()))

def _iteration_relax(self):
remaining = self.status.remaining
Expand All @@ -43,8 +50,10 @@ def _iteration_relax(self):
with click.progressbar(remaining.values(), label='Relaxing nodes') as nodes:
for node in nodes:
self.constrainer.apply(self.status.mappings, node)
# for node in remaining.values():
# self.constrainer.apply(self.status.mappings, node)

logger.info('Normalizing weights.')
# logger.info('Normalizing weights.')
# for node in iteration.remaining.values():
# for node in temp:
# node.weights = utils.normalized(node.weights)
Expand Down
34 changes: 19 additions & 15 deletions pyrelaxmapper/stats.py
Expand Up @@ -17,17 +17,16 @@ class Stats:
def __init__(self, status):
self.status = status

def stat_mapping(self):
def stat_mapping(self, print_stats=True):
cand = list(self.status.candidates.values())
counts = np.array([len(node.labels) for node in cand])
n = 10
counts_max = counts.argsort()[-n][::-1]
counts = np.array([len(node) for node in cand])
counts_max = counts.argsort()[-10:][::-1]
no_translations = [synset.uid() for synset in self.status.source_wn.all_synsets()
if synset.uid() not in self.status.candidates]
stats = {
'n_nodes': len(self.status.candidates),
'n_labels': sum(counts),
'most_ambiguous': cand[counts_max],
# 'most_ambiguous': cand[counts_max],
'labels_max': 0,
'labels_min': 0,
'labels_avg': 0,
Expand All @@ -38,9 +37,11 @@ def stat_mapping(self):
'n_polysemous': 0,
'n_no_translations': no_translations,
}
if print_stats:
self.print_stats(stats)
return stats

def stat_wn_coverage(self):
def stat_wn_coverage(self, print_stats=True):
"""Statistics about coverage of the source vs target wordnet.
Returns
Expand All @@ -55,6 +56,8 @@ def stat_wn_coverage(self):
'monosemous': len(self.status.monosemous),
'polysemous': len(self.status.polysemous),
}
if print_stats:
self.print_stats(stats)
return stats

def stat_translation(self, print_stats=True):
Expand All @@ -78,9 +81,11 @@ def stat_translation(self, print_stats=True):
'candidates_min': min_,
'candidates_max': max_,
}
if print_stats:
self.print_stats(stats)
return stats

def stat_loading(self):
def stat_loading(self, print_stats=True):
"""Statistics about speed of starting up from cached vs not."""
stats = {
'cache': 0,
Expand All @@ -95,15 +100,14 @@ def stat_wordnets(self, print_stats=True):
stats = {}
for key, wordnet in wordnets.items():
stats.update({
key+'synsets': 'Translations, count: {}'.format(wordnet.count_synsets()),
key+'lunits': 'Translations, count: {}'.format(wordnet.count_lunits())
key: wordnet.name_full(),
key+' type': wordnet.uid(),
key+' synsets': 'Synsets, count: {}'.format(wordnet.count_synsets()),
key+' lunits': 'Lunits, count: {}'.format(wordnet.count_lunits())
# Hiper/hypo relations
})
logger.info('Translations, count: {}'.format(len(wordnet)))
logger.info(
'Source synsets, count: {}'.format(len(self.status.source_wn.all_synsets())))
logger.info(
'Target synsets, count: {}'.format(len(self.status.target_wn.all_synsets())))
if print_stats:
self.print_stats(stats)
return stats

def stat_iterations(self, print_stats=True):
Expand Down Expand Up @@ -137,4 +141,4 @@ def stat_iteration(self, iteration=None, print_stats=True):
return stats_dict

def print_stats(self, stats_dict):
print('\n'.join(stats_dict.values()))
print('\n'.join('{} : {}'.format(key, value) for key, value in stats_dict.items()))
18 changes: 9 additions & 9 deletions pyrelaxmapper/status.py
Expand Up @@ -167,15 +167,15 @@ def load_cache(self):

self.monosemous = {source_id: target_ids[0] for source_id, target_ids in
self.candidates.items() if len(target_ids) == 1}
# self.polysemous = {source_id: Node(source_id, target_ids) for source_id, target_ids in
# enumerate(self._candidates.items()) if len(target_ids) > 1}
idx = 0
for source_id, target_ids in self.candidates.items():
if idx == 3000:
break
if len(target_ids) > 1:
self.polysemous[source_id] = Node(source_id, target_ids)
idx += 1
self.polysemous = {source_id: Node(source_id, target_ids) for source_id, target_ids in
self.candidates.items() if len(target_ids) > 1}
# idx = 0
# for source_id, target_ids in self.candidates.items():
# if idx == 3000:
# break
# if len(target_ids) > 1:
# self.polysemous[source_id] = Node(source_id, target_ids)
# idx += 1

def push_iteration(self):
"""Save current iteration's results and create a new iteration.
Expand Down

0 comments on commit e1e1fbc

Please sign in to comment.