Permalink
Browse files

* Added huge HTML file generator

* Added test to measure memory usage with memory_profiler module #8614
  • Loading branch information...
1 parent 43ffc7e commit 2af252f0a63ab1ddbb58184cb7e5e7e53e9b1f44 @andresriancho committed May 21, 2015
View
@@ -92,3 +92,5 @@ debian/w3af.substvars
debian/w3af/
docker/kali-debootstrap
*.deb
+
+w3af/core/data/parsers/tests/data/huge.html
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+import sys
+
+SOME_TEXT = 'This is placeholder text'
+OUTPUT_FILE = 'w3af/core/data/parsers/tests/data/huge.html'
+
+
+def main():
+ """
+ Generate a huge HTML file which is useful for testing parser performance,
+ not really real-life data, but forces the parser to use a lot of memory
+ if it loads the whole thing right away/keeps the tree in memory.
+
+ :return: None, we write the file to data/huge.html
+ """
+ output = file(OUTPUT_FILE, 'w')
+ write = lambda s: output.write('%s\n' % s)
+
+ write('<html>')
+ write('<title>%s</title>' % SOME_TEXT)
+
+ write('<body>')
+
+ #
+ # Long
+ #
+ for i in xrange(5000):
+ write('<p>')
+ write(SOME_TEXT)
+ write('</p>')
+
+ write('<p>')
+ write(SOME_TEXT)
+ write('<a href="/%s">%s</a>' % (i, SOME_TEXT))
+ write('</p>')
+
+ write('<div>')
+ write('<a href="/%s">%s</a>' % (i, SOME_TEXT))
+ write(SOME_TEXT)
+ write('<form action="/%s" method="POST">' % i)
+ write('<input type="text" name="abc-%s">' % i)
+ write('</form>')
+ write('</div>')
+
+ #
+ # Long II
+ #
+ for i in xrange(5000):
+ write('<div>')
+ write('<img src="/img-%s" />' % i)
+ write('<a href="mailto:andres%s@test.com">%s</a>' % (i, SOME_TEXT))
+ write('</div>')
+
+ #
+ # Deep
+ #
+ for i in xrange(5000):
+ write('<div id="id-%s">' % i)
+ write('<a href="/deep-div-%s">%s</a>' % (i, SOME_TEXT))
+
+ for i in xrange(5000):
+ write('<p>')
+ write(SOME_TEXT)
+ write('</p>')
+ write('</div>')
+
+ write('</body>')
+ write('</html>')
+
+
+if __name__ == '__main__':
+ sys.exit(main())
@@ -26,12 +26,14 @@
import os
from nose.plugins.attrib import attr
+from memory_profiler import profile
from w3af import ROOT_PATH
from w3af.core.data.url.HTTPResponse import HTTPResponse
from w3af.core.data.dc.headers import Headers
from w3af.core.data.parsers.html import HTMLParser
from w3af.core.data.parsers.url import URL
+from w3af.core.data.parsers.tests.generate_html_file import OUTPUT_FILE
class TestHTMLParserPerformance(unittest.TestCase):
@@ -87,4 +89,27 @@ def measure_memory(self, _id):
import psutil
self_pid = psutil.Process()
# pylint: disable=E1101
- print self_pid.memory_info()
+ print self_pid.memory_info()
+
+
+@profile
+def test():
+ """
+ Run using:
+ python -m memory_profiler w3af/core/data/parsers/tests/test_htmlparser_performance.py
+
+ That will activate the profiler.
+ """
+ body = file(OUTPUT_FILE).read()
+ url = URL('http://www.clarin.com.ar/')
+ headers = Headers()
+ headers['content-type'] = 'text/html'
+ response = HTTPResponse(200, body, headers, url, url, charset='utf-8')
+
+ p = HTMLParser(response)
+ print('The')
+ print('End')
+
+
+if __name__ == '__main__':
+ test()

0 comments on commit 2af252f

Please sign in to comment.