Skip to content
Browse files

Remove broken import and cygwin-specific hack

  • Loading branch information...
1 parent 0e7982a commit 9a68e8c7ed562cc6e9e0f234e905dc122915ed53 @google-feinberg google-feinberg committed Jul 8, 2010
Showing with 11 additions and 14 deletions.
  1. +11 −14 html2text.py
View
25 html2text.py
@@ -3,26 +3,26 @@
import re
"""
-To the extent possible under law, Jonathan Feinberg has waived all copyright
-and related or neighboring rights to html2text. This work is published from United States.
+To the extent possible under law, Jonathan Feinberg has waived all copyright
+and related or neighboring rights to html2text. This work is published from United States.
"""
class TextFormatter:
def __init__(self, process=None, lynx='/usr/bin/lynx'):
self.lynx = lynx
self.process = process if process else lambda t:t
-
+
def html2text(self, unicode_html_source):
"Expects unicode; returns unicode"
- text = Popen([self.lynx,
- '-assume-charset=UTF-8',
- '-display-charset=UTF-8',
- '-dump',
- '-stdin'],
- stdin=PIPE,
+ text = Popen([self.lynx,
+ '-assume-charset=UTF-8',
+ '-display-charset=UTF-8',
+ '-dump',
+ '-stdin'],
+ stdin=PIPE,
stdout=PIPE).communicate(input=unicode_html_source.encode('utf-8'))[0].decode('utf-8')
return self.process(text)
-
+
def htmlfile2text(self, path, encoding='utf-8'):
"Returns unicode. Attempts to decode bytes in given file as utf-8 by default."
with open(path, "r") as f:
@@ -37,7 +37,4 @@ def htmlfile2text(self, path, encoding='utf-8'):
text = sys.stdin.read()
encoding = sys.argv[2] if len(sys.argv) > 2 else 'utf-8'
unicode_html = text.decode(encoding)
-
- import converter
- print TextFormatter(lynx="c:/cygwin/bin/lynx.exe").html2text(unicode_html).encode('utf-8')
-
+ print TextFormatter().html2text(unicode_html).encode('utf-8')

0 comments on commit 9a68e8c

Please sign in to comment.
Something went wrong with that request. Please try again.