Skip to content

Commit

Permalink
Let chardetect take data on stdin.
Browse files Browse the repository at this point in the history
This comes in handy for sending it segments of very large compressed files.
  • Loading branch information
erikrose committed Aug 1, 2012
1 parent 73ab963 commit 052080a
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions bin/chardetect.py
@@ -1,6 +1,5 @@
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
"""Script which takes one or more file paths and reports on their detected
encodings
Example::
Expand All @@ -9,30 +8,35 @@
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
from sys import argv
from sys import argv, stdin

from chardet.universaldetector import UniversalDetector


def description_of(path):
def description_of(file, name='stdin'):
"""Return a string describing the probable encoding of a file."""
u = UniversalDetector()
for line in open(path, 'rb'):
for line in file:
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '%s: %s with confidence %s' % (path,
return '%s: %s with confidence %s' % (name,
result['encoding'],
result['confidence'])
else:
return '%s: no result' % path
return '%s: no result' % name


def main():
for path in argv[1:]:
print description_of(path)
if len(argv) <= 1:
print description_of(stdin)
else:
for path in argv[1:]:
print description_of(open(path, 'rb'), path)


if __name__ == '__main__':
Expand Down

0 comments on commit 052080a

Please sign in to comment.