From 052080a742180c15240e3663d2fcf1e22607ad32 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 1 Aug 2012 12:25:24 -0700 Subject: [PATCH] Let chardetect take data on stdin. This comes in handy for sending it segments of very large compressed files. --- bin/chardetect.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/bin/chardetect.py b/bin/chardetect.py index 727b5cdb..0a795c0a 100755 --- a/bin/chardetect.py +++ b/bin/chardetect.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -""" -Script which takes one or more file paths and reports on their detected +"""Script which takes one or more file paths and reports on their detected encodings Example:: @@ -9,30 +8,35 @@ somefile: windows-1252 with confidence 0.5 someotherfile: ascii with confidence 1.0 +If no paths are provided, it takes its input from stdin. + """ -from sys import argv +from sys import argv, stdin from chardet.universaldetector import UniversalDetector -def description_of(path): +def description_of(file, name='stdin'): """Return a string describing the probable encoding of a file.""" u = UniversalDetector() - for line in open(path, 'rb'): + for line in file: u.feed(line) u.close() result = u.result if result['encoding']: - return '%s: %s with confidence %s' % (path, + return '%s: %s with confidence %s' % (name, result['encoding'], result['confidence']) else: - return '%s: no result' % path + return '%s: no result' % name def main(): - for path in argv[1:]: - print description_of(path) + if len(argv) <= 1: + print description_of(stdin) + else: + for path in argv[1:]: + print description_of(open(path, 'rb'), path) if __name__ == '__main__':