Permalink
Browse files

remove lxml-dependent code

  • Loading branch information...
1 parent a7ba2f7 commit 293b79af9120c3fc056db60492f88e21a5610ab6 @mhils mhils committed Dec 10, 2016
View
@@ -27,7 +27,7 @@ test_script:
- ps: |
$Env:VERSION = $(python mitmproxy/version.py)
$Env:SKIP_MITMPROXY = "python -c `"print('skip mitmproxy')`""
- tox -e wheel -- https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl
+ tox -e wheel
tox -e rtool -- bdist
deploy_script:
@@ -11,7 +11,7 @@ def __init__(self, iframe_url):
def response(self, flow):
if flow.request.host in self.iframe_url:
return
- html = BeautifulSoup(flow.response.content, "lxml")
+ html = BeautifulSoup(flow.response.content)
if html.body:
iframe = html.new_tag(
"iframe",
@@ -22,7 +22,7 @@
from mitmproxy.net import http
from mitmproxy.utils import strutils
from . import (
- auto, raw, hex, json, xml, wbxml, html, javascript, css,
+ auto, raw, hex, json, html_outline, wbxml, javascript, css,
urlencoded, multipart, image, query, protobuf
)
from .base import View, VIEW_CUTOFF, KEY_MAX, format_text, format_dict
@@ -163,10 +163,8 @@ def get_content_view(viewmode: View, data: bytes, **metadata):
add(raw.ViewRaw())
add(hex.ViewHex())
add(json.ViewJSON())
-add(xml.ViewXML())
add(wbxml.ViewWBXML())
-add(html.ViewHTML())
-add(html.ViewHTMLOutline())
+add(html_outline.ViewHTMLOutline())
add(javascript.ViewJavaScript())
add(css.ViewCSS())
add(urlencoded.ViewURLEncoded())
@@ -1,42 +0,0 @@
-import html2text
-import lxml.etree
-import lxml.html
-
-from mitmproxy.contentviews.base import View, format_text
-from mitmproxy.utils import strutils
-
-
-class ViewHTML(View):
- name = "HTML"
- prompt = ("html", "h")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- if strutils.is_xml(data):
- parser = lxml.etree.HTMLParser(
- strip_cdata=True,
- remove_blank_text=True
- )
- d = lxml.html.fromstring(data, parser=parser)
- docinfo = d.getroottree().docinfo
- s = lxml.etree.tostring(
- d,
- pretty_print=True,
- doctype=docinfo.doctype,
- encoding='utf8'
- )
- return "HTML", format_text(s)
-
-
-class ViewHTMLOutline(View):
- name = "HTML Outline"
- prompt = ("html outline", "o")
- content_types = ["text/html"]
-
- def __call__(self, data, **metadata):
- data = data.decode("utf-8", "replace")
- h = html2text.HTML2Text(baseurl="")
- h.ignore_images = True
- h.body_width = 0
- outline = h.handle(data)
- return "HTML Outline", format_text(outline)
@@ -0,0 +1,17 @@
+import html2text
+
+from mitmproxy.contentviews import base
+
+
+class ViewHTMLOutline(base.View):
+ name = "HTML Outline"
+ prompt = ("html outline", "o")
+ content_types = ["text/html"]
+
+ def __call__(self, data, **metadata):
+ data = data.decode("utf-8", "replace")
+ h = html2text.HTML2Text(baseurl="")
+ h.ignore_images = True
+ h.body_width = 0
+ outline = h.handle(data)
+ return "HTML Outline", base.format_text(outline)
@@ -1,45 +0,0 @@
-import lxml.etree
-
-from . import base
-
-
-class ViewXML(base.View):
- name = "XML"
- prompt = ("xml", "x")
- content_types = ["text/xml"]
-
- def __call__(self, data, **metadata):
- parser = lxml.etree.XMLParser(
- remove_blank_text=True,
- resolve_entities=False,
- strip_cdata=False,
- recover=False
- )
- try:
- document = lxml.etree.fromstring(data, parser)
- except lxml.etree.XMLSyntaxError:
- return None
- docinfo = document.getroottree().docinfo
-
- prev = []
- p = document.getroottree().getroot().getprevious()
- while p is not None:
- prev.insert(
- 0,
- lxml.etree.tostring(p)
- )
- p = p.getprevious()
- doctype = docinfo.doctype
- if prev:
- doctype += "\n".join(p.decode() for p in prev).strip()
- doctype = doctype.strip()
-
- s = lxml.etree.tostring(
- document,
- pretty_print=True,
- xml_declaration=True,
- doctype=doctype or None,
- encoding=docinfo.encoding
- )
-
- return "XML-like data", base.format_text(s)
View
@@ -1,2 +1 @@
-https://snapshots.mitmproxy.org/misc/lxml-3.6.0-cp35-cp35m-win32.whl; sys_platform == 'win32' and python_version == '3.5'
-e .[dev,examples,contentviews]
View
@@ -70,7 +70,6 @@
"html2text>=2016.1.8, <=2016.9.19",
"hyperframe>=4.0.1, <5",
"jsbeautifier>=1.6.3, <1.7",
- "lxml>=3.5.0, <=3.6.0", # no wheels for 3.6.1 yet.
"Pillow>=3.2, <3.5",
"passlib>=1.6.5, <1.8",
"pyasn1>=0.1.9, <0.2",
@@ -1,18 +0,0 @@
-from mitmproxy.contentviews import html
-from . import full_eval
-
-
-def test_view_html():
- v = full_eval(html.ViewHTML())
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
-
- s = b"gobbledygook"
- assert not v(s)
-
-
-def test_view_html_outline():
- v = full_eval(html.ViewHTMLOutline())
- s = b"<html><br><br></br><p>one</p></html>"
- assert v(s)
- assert v(b'\xfe')
@@ -0,0 +1,9 @@
+from mitmproxy.contentviews import html_outline
+from test.mitmproxy.contentviews import full_eval
+
+
+def test_view_html_outline():
+ v = full_eval(html_outline.ViewHTMLOutline())
+ s = b"<html><br><br></br><p>one</p></html>"
+ assert v(s)
+ assert v(b'\xfe')
@@ -1,17 +0,0 @@
-from mitmproxy.contentviews import xml
-from . import full_eval
-
-
-def test_view_xml():
- v = full_eval(xml.ViewXML())
- assert v(b"<foo></foo>")
- assert not v(b"<foo>")
- s = b"""<?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet title="XSL_formatting"?>
- <rss
- xmlns:media="http://search.yahoo.com/mrss/"
- xmlns:atom="http://www.w3.org/2005/Atom"
- version="2.0">
- </rss>
- """
- assert v(s)

0 comments on commit 293b79a

Please sign in to comment.