Skip to content
This repository has been archived by the owner on Apr 22, 2021. It is now read-only.

Commit

Permalink
Merge pull request #37 from ajm188/remove-multiprocessing-and-dir-tra…
Browse files Browse the repository at this point in the history
…versal

Remove multiprocessing and dir traversal
  • Loading branch information
paiweilai committed Aug 29, 2016
2 parents 9e8afeb + e4762e9 commit 98b7ed2
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 122 deletions.
16 changes: 5 additions & 11 deletions docs/source/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,20 @@ Read it
.. code-block:: bash
$ undebt --help
usage: undebt [-h] --pattern path [--extension ext]
[--multiprocess processes] [--verbose] [--dry-run]
[PATH [PATH...]]
usage: undebt [-h] --pattern PATH [--verbose] [--dry-run]
[FILE [FILE...]]
positional arguments:
PATH [PATH...]
FILE [FILE...]
paths to files or directories (searched recursively
for extension) to be modified (if not passed uses
stdin)
optional arguments:
-h, --help show this help message and exit
--pattern path, -p path
--pattern PATH, -p PATH
paths to pattern definition files
--extension ext, -e ext
extensions of files to be modified when searching a
directory (exclude ".", e.g. "py" instead of ".py")
--multiprocess processes, -m processes
number of processes to run in parallel (default is 16)
--verbose
--verbose, -v
--dry-run, -d only print to stdout; do not overwrite files
Try it out
Expand Down
7 changes: 0 additions & 7 deletions tests/cmd/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,6 @@ def test_single_file():
assert _read_input_file() == method_to_function_output_contents == _read_output_file()


def test_directory():
args = ["undebt", "-p", method_to_function_path, "-e", "txt", tests_inputs_directory, "--verbose"]
with mock.patch("sys.argv", args):
main()
assert _read_input_file() == method_to_function_output_contents == _read_output_file()


def test_dry_run(capsys):
args = ["undebt", "-p", method_to_function_path, "--dry-run", method_to_function_input_path, "--verbose"]
with mock.patch("sys.argv", args):
Expand Down
118 changes: 14 additions & 104 deletions undebt/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
from __future__ import print_function

import argparse
import multiprocessing
import os
import sys
import time
import traceback

from undebt.cmd import logger
Expand Down Expand Up @@ -57,69 +55,24 @@ def _write_result_text(result_text, path, dry_run):
def _handle_arguments():
parser = argparse.ArgumentParser(prog='undebt')
parser.add_argument(
'paths', nargs='*', metavar='PATH',
help='paths to files or directories (searches for extension recursively) to be modified; '
'uses stdin if not passed')
'files', nargs='*', metavar='FILE',
help='files to be modified; uses stdin if not passed',
)
parser.add_argument(
'--pattern', '-p', metavar='PATH', action='append', required=True,
help='paths to pattern definition files')
parser.add_argument(
'--extension', '-e', metavar='EXT', action='append',
help='extensions of files to be modified when searching a directory')
parser.add_argument(
'--jobs', '-j', metavar='INTEGER', type=int, default=16,
help='number of processes to run in parallel (default is 16)')
help='paths to pattern definition files',
)
parser.add_argument(
'--verbose', '-v', action='store_true', default=False,
help='verbose logging for troubleshooting')
help='verbose logging for troubleshooting',
)
parser.add_argument(
'--dry-run', '-d', action='store_true', default=False,
help='only print to stdout; do not overwrite files')
help='only print to stdout; do not overwrite files',
)
return parser.parse_args()


@_exit_fail_upon_error
def _fix_exts(extensions):
if extensions is None:
return None

new_exts = []
for ext in extensions:
if ext.startswith("."):
new_exts.append(ext[1:])
else:
new_exts.append(ext)
return new_exts


@_exit_fail_upon_error
def _find_files(paths, extensions):
if paths is None:
return

for path in paths:

if os.path.isfile(path):
yield path

else:
for root, dirs, files in os.walk(path):

for f in files:
ext = os.path.splitext(f)[-1].lstrip('.')

if extensions is None:
log.error('must pass --extension when --input is a directory')
sys.exit(1)

if ext in extensions:
yield os.path.join(root, f)

for d in dirs[:]:
if d != "." * len(d) and d.startswith("."): # ignore .*
dirs.remove(d)


def _process_file(patterns, text_file, dry_run):
log.info('undebting {}'.format(text_file))

Expand Down Expand Up @@ -157,57 +110,14 @@ def main():
args = _handle_arguments()
logger.setup(args.verbose) # Reset logging level

if args.jobs <= 0:
log.error('number of processes must be > 0')
sys.exit(1)

processor = _file_processor(args.pattern, args.dry_run)
files = list(_find_files(args.paths, _fix_exts(args.extension)))

if bool(files) != bool(args.paths):
log.error('could not find any files for the given paths and extension')
sys.exit(1)
files = args.files

if not files: # Single process mode if stdin
log.info('running in stdin/stdout mode')
processor(None)
return

elif len(files) == 1 or args.jobs == 1: # Single process if only one file or only one process
log.info('running across {} file(s) using a single process'
.format(len(files)))
processor(files[0])

else:
process_pool = multiprocessing.Pool(args.jobs)
try:

result = process_pool.map_async(
processor,
files,
)
process_pool.close()

log.info('running across {} file(s) using {} processes'
.format(len(files), args.jobs))

# Cannot do process_pool.wait() because it prevents KeyboardInterrupt from being sent
# See http://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool
while not result.ready():
time.sleep(0.01)

if not result.successful():
log.error('multiprocessing failed (are your replace functions pickleable?)')
sys.exit(1)

result = result.get()
assert len(result) == len(files)
if not all(result):
log.error('failed to process {} files'
.format(len(result) - sum(result)))
sys.exit(1)

except:
process_pool.terminate()
raise
finally:
process_pool.join()
log.info('running across {} file(s)'.format(len(files)))
for f in files:
processor(f)

0 comments on commit 98b7ed2

Please sign in to comment.