Permalink
Browse files

Let chardetect take data on stdin.

This comes in handy for sending it segments of very large compressed files.
  • Loading branch information...
1 parent 73ab963 commit 052080a742180c15240e3663d2fcf1e22607ad32 @erikrose erikrose committed Aug 1, 2012
Showing with 13 additions and 9 deletions.
  1. +13 −9 bin/chardetect.py
View
@@ -1,6 +1,5 @@
#!/usr/bin/env python
-"""
-Script which takes one or more file paths and reports on their detected
+"""Script which takes one or more file paths and reports on their detected
encodings
Example::
@@ -9,30 +8,35 @@
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
+If no paths are provided, it takes its input from stdin.
+
"""
-from sys import argv
+from sys import argv, stdin
from chardet.universaldetector import UniversalDetector
-def description_of(path):
+def description_of(file, name='stdin'):
"""Return a string describing the probable encoding of a file."""
u = UniversalDetector()
- for line in open(path, 'rb'):
+ for line in file:
u.feed(line)
u.close()
result = u.result
if result['encoding']:
- return '%s: %s with confidence %s' % (path,
+ return '%s: %s with confidence %s' % (name,
result['encoding'],
result['confidence'])
else:
- return '%s: no result' % path
+ return '%s: no result' % name
def main():
- for path in argv[1:]:
- print description_of(path)
+ if len(argv) <= 1:
+ print description_of(stdin)
+ else:
+ for path in argv[1:]:
+ print description_of(open(path, 'rb'), path)
if __name__ == '__main__':

0 comments on commit 052080a

Please sign in to comment.