Skip to content

Commit

Permalink
utils.unicode_from_html
Browse files Browse the repository at this point in the history
  • Loading branch information
Kenneth Reitz committed Aug 24, 2011
1 parent a94541d commit 636e61d
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions requests/utils.py
Expand Up @@ -110,6 +110,25 @@ def get_encoding_from_headers(headers):
return params['charset'].strip("'\"")


def unicode_from_html(content):
"""Attempts to decode an HTML string into unicode.
If unsuccessful, the original content is returned.
"""

encodings = get_encodings_from_content(content)

for encoding in encodings:

try:
return unicode(content, encoding)
except (UnicodeError, TypeError):
pass

return content




def get_unicode_from_response(r):
"""Returns the requested content back in unicode.
Expand Down

0 comments on commit 636e61d

Please sign in to comment.