Skip to content

Commit

Permalink
Improvements in results, stats, caching.
Browse files Browse the repository at this point in the history
* Relaxer:
  - Status is now cached for future analysis!
* Stats:
  - Requires only status, can create reports.
* DirManager:
  - Added r(...) and w(...) for simple pickling
    of objects, without acting like a cache.
* CLI(config list): now shows diff between merged
    and default configuration for comparison
* Config does not save large data to cache, only
    the class for future loading. This greatly
    decreases cache sizes.
  • Loading branch information
erikdab committed Jun 25, 2017
1 parent 8abfe9a commit 2fd1c06
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 70 deletions.
2 changes: 1 addition & 1 deletion pyrelaxmapper/__init__.py
Expand Up @@ -3,7 +3,7 @@

from pyrelaxmapper import fileutils

logging.config.fileConfig(fileutils.last_in_paths('logging.ini'))
logging.config.fileConfig(fileutils.in_lowest_path('logging.ini'))
logging.getLogger(__name__).addHandler(logging.NullHandler())

__author__ = """Erik David Burnell"""
Expand Down
14 changes: 7 additions & 7 deletions pyrelaxmapper/cli.py
Expand Up @@ -50,7 +50,7 @@ def make(actions, conf_file):
\b
ACTIONS:
clean Clean all caches.
relax Run RL algorithm.
relax Run RL algorithm and save results.
stats Create statistics report.
"""
if not actions:
Expand All @@ -64,15 +64,15 @@ def make(actions, conf_file):
if Action.Clean in actions:
commands.config_clean(config)

relaxer = commands.relaxer_load(config)

logger.info('Preloaded data.')

status = None
if Action.Relax in actions:
commands.relaxer_relax(relaxer)
# could merge
relaxer = commands.relaxer_load(config)
status = commands.relaxer_relax(relaxer)

# Maybe should be able to pass status file!
if Action.Stats in actions:
commands.relaxer_stats(relaxer, config)
commands.relaxer_stats(status, config)

logger.info('End.')

Expand Down
62 changes: 49 additions & 13 deletions pyrelaxmapper/commands.py
Expand Up @@ -4,8 +4,10 @@
from enum import Enum

import click
import sys

from pyrelaxmapper import conf, fileutils, utils, relax
from pyrelaxmapper.stats import Stats

# Click Colors
# TODO: turn into enum?
Expand All @@ -26,13 +28,34 @@ def relaxer_relax(relaxer):
"""Run RL Algorithm."""
click.secho('Running RL algorithm.', fg=CInfo)
relaxer.relax()
config = relaxer.config
stat_mru = config.results.path(fileutils.add_timestamp('Status.pkl'))
click.secho('Writing results.', fg=CInfo)
config.results.w(stat_mru, relaxer.status)
return relaxer.status


def relaxer_stats(relaxer, config):
# Create result selector?
def relaxer_stats(status, config):
"""Save statistics for RL Algorithm."""
click.secho('Preparing statistics.', fg=CInfo)
with open(config.results.path(fileutils.add_timestamp('stats.csv')), 'w') as file:
relaxer.save_stats(file)
result_n = 'stats.csv'
status_n = 'Status.pkl'

try:
stat_mru = fileutils.newest(config.results.dir(), status_n)
except ValueError as e:
click.secho('No results found, run \'make relax\' to generate results.', fg=CWarn)
sys.exit(1)

if not status:
click.secho('Reading most recently saved results.')
status = config.results.r(stat_mru)
results = '{}{}'.format(stat_mru[:stat_mru.find(status_n)], result_n)

click.secho('Creating statistics report.', fg=CInfo)
with open(results, 'w') as file:
stats = Stats(status)
stats.create_report(file)


#######################################################################
Expand Down Expand Up @@ -73,14 +96,27 @@ def config_list():
click.echo(''.join([path, ': (exists)' if os.path.exists(path) else '']))

click.secho('Merged configuration:', fg=CInfo)
parser = fileutils.conf_merge()
for section in parser.sections():
default = fileutils.conf_merge([fileutils.dir_pkg_conf()])
merged = fileutils.conf_merge()
sections = list(default.keys())
for section in sections:
click.echo(section)
for key in parser[section].keys():
value = parser[section][key]
value = os.path.expanduser(value)
exists = ': (exists)' if os.path.exists(value) else ''
click.echo(''.join(['\t', key, ': ', value, exists]))
keys = list(default[section].keys())
keys.extend(key for key in merged[section].keys() if key not in keys)
for key in keys:
v_merged = merged.get(section, key, fallback='')
v_default = default.get(section, key, fallback='')
diff = ' '
if v_merged and not v_default:
diff = '+'
elif not v_merged and v_default:
diff = '-'
elif v_merged != v_default:
diff = '~'
diff += ' '
value = os.path.expanduser(v_merged)
exists = ' <-(exists)' if os.path.exists(value) else ''
click.echo(''.join(['\t', diff, key, ': ', value, exists]))


def config_exists():
Expand Down Expand Up @@ -108,7 +144,7 @@ def logger_list(debug=True):
with open(logger_file(), 'r') as file:
for line in file:
if 'level=' in line:
level = line[line.find('=')+1:-1]
level = line[line.find('=') + 1:-1]
break
color = CWarn if level == 'ERROR' else CWarn
click.secho('Logger level: {}'.format(level), fg=color)
Expand All @@ -132,7 +168,7 @@ def logger_file():

def logger_edit():
"""Edit logger config file."""
click.edit(filename=fileutils.last_in_paths('logging.ini'))
click.edit(filename=fileutils.in_lowest_path('logging.ini'))


#######################################################################
Expand Down
14 changes: 14 additions & 0 deletions pyrelaxmapper/conf.py
Expand Up @@ -127,6 +127,20 @@ def __init__(self, parser, wn_classes=None, constrainer=None, translater=None):
self.cleaner = clean
self.translater = translater if translater else Translater()

def __getstate__(self):
# Save only source and target classes, not the data itself.
parser = self._parser
sections = ['source', 'target']
source_wn, target_wn, = _select_wordnets(parser, sections, self._wn_classes)
return (self._parser, self._wn_classes, source_wn, target_wn, self.data, self.results,
self.cache, self.pos, self.constraints, self.constr_weights, self.cleaner,
self.translater)

def __setstate__(self, state):
(self._parser, self._wn_classes, self._source_wn, self._target_wn, self.data, self.results,
self.cache, self.pos, self.constraints, self.constr_weights, self.cleaner,
self.translater) = state

def map_name(self):
"""Mapping name for folder organization."""
return '{} -> {}'.format(self._source_wn.name(), self._target_wn.name())
Expand Down
2 changes: 2 additions & 0 deletions pyrelaxmapper/constraints/hh.py
Expand Up @@ -13,11 +13,13 @@


# TODO: Init from config file.
# TODO: Use only HHType to speed up hashing!
class HHConstraint(Constraint):
"""Constraints which utilize hyper/hyponym connections."""

def __init__(self, orig, dest, weights):
super().__init__(orig, dest, weights)
# codes = ['ii', 'aa', 'ai', 'ia']
codes = ['ii']
# Anything better?
self.rel_weight = {HHDirection.hyper: 1.0, HHDirection.hypo: 0.93}
Expand Down
56 changes: 29 additions & 27 deletions pyrelaxmapper/dirmanager.py
Expand Up @@ -11,6 +11,7 @@


# TODO: Better extension handling
# TODO: Default name is type name
class DirManager:
"""Directory Manager, simplifying many common tasks.
Expand Down Expand Up @@ -45,12 +46,32 @@ def r(self, name, main_group=False, group=None):
File group (folder)
If main_group is set, will be placed inside it.
"""
path = self.path(name, main_group, group)
path = ensure_ext(self.path(name, main_group, group), self._extension)
try:
return self._load_obj(path)
except FileNotFoundError as e:
logger.debug('Reading error "{}". File: {}.'.format(e, path))

def w(self, name, obj, main_group=False, group=None):
"""Load file inside managed directory.
Parameters
----------
name : str
Name
obj : any
Object to save if file does not exist.
main_group : bool
If should use main context group.
group : str
File group (folder)
If main_group is set, will be placed inside it.
"""
path = ensure_ext(self.path(name, main_group, group), self._extension)
try:
return self._load_file(self.path(name, main_group, group))
self._save_obj(obj, path)
except FileNotFoundError as e:
logger.debug('Loading error "{}". File: {}.'.format(e, path))
raise e
logger.debug('Writing error "{}". File: {}.'.format(e, path))

def rw(self, name, obj, main_group=False, group=None, force=False):
"""Load or write object instance inside managed directory.
Expand Down Expand Up @@ -109,7 +130,7 @@ def rw_lazy(self, name, call, args=None, main_group=False, group=None, force=Fal
try:
data = self._load_obj(path)
except FileNotFoundError as e:
logger.debug('Loading error "{}". File: {}.'.format(e, path))
logger.debug('Reading error "{}". File: {}.'.format(e, path))
if not data:
data = call(*args)
self._save_obj(data, path)
Expand Down Expand Up @@ -157,6 +178,7 @@ def remove_all(self):
shutil.rmtree(self._directory)
os.makedirs(self._directory)

# Extension
def path(self, filename='', main_group=False, group=None, ensure=True):
"""Path to file inside managed directory."""
directory = self._directory
Expand All @@ -170,28 +192,8 @@ def path(self, filename='', main_group=False, group=None, ensure=True):
else:
return os.path.join(directory, filename)

@staticmethod
def _save_file(lines, filename):
"""Pickle object to filename.
Parameters
----------
obj : any
filename : str
"""
with open(filename, 'w') as f:
f.write(lines)

@staticmethod
def _load_file(filename):
"""Loads pickled object from filename.
Parameters
----------
filename : str
"""
with open(filename, 'w') as f:
return f.read()
def dir(self):
return self._directory

@staticmethod
def _save_obj(obj, filename):
Expand Down
11 changes: 9 additions & 2 deletions pyrelaxmapper/fileutils.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Configuration utilities."""
import datetime
import glob
import logging
import os
import shutil
Expand Down Expand Up @@ -50,7 +51,7 @@ def conf_merge(paths=search_paths()):
ConfigParser
Single config parser with merged settings.
"""
file_paths = last_in_paths('conf.ini')
file_paths = in_lowest_path('conf.ini', paths)
parser = ConfigParser()
parser.read(file_paths)
return parser
Expand Down Expand Up @@ -79,7 +80,7 @@ def cp_data_app_data(source_name, target_name=None):
cp(dir_pkg_data(), source_name, dir_app_data(), target_name)


def last_in_paths(filename, paths=search_paths()):
def in_lowest_path(filename, paths=search_paths()):
"""Find last file with pattern in paths.
Parameters
Expand All @@ -98,6 +99,12 @@ def last_in_paths(filename, paths=search_paths()):
return file_paths[-1] if file_paths else file_paths


def newest(directory, extension=''):
"""Newest file in directory with extension."""
pattern = os.path.join(directory, '*{}'.format(extension))
return max(glob.iglob(pattern), key=os.path.getctime)


def find_in_paths(filename, paths=search_paths()):
"""Find last file with pattern in paths.
Expand Down
3 changes: 2 additions & 1 deletion pyrelaxmapper/relax.py
Expand Up @@ -60,7 +60,8 @@ def _relax_loop(self, constrainer):
writer.writerows(self.stats.stat_total().items())

def _relax(self, constrainer):
with click.progressbar(self.status.remaining.values(), label='Constraining') as nodes:
todo = self.status.remaining.values()
with click.progressbar(todo, label='Constraining nodes:') as nodes:
# for node in nodes:
for idx, node in enumerate(nodes):
# if idx > 0 and idx % 50 == 0:
Expand Down

0 comments on commit 2fd1c06

Please sign in to comment.