Skip to content

Commit

Permalink
* Added huge HTML file generator
Browse files Browse the repository at this point in the history
* Added test to measure memory usage with memory_profiler module #8614
  • Loading branch information
andresriancho committed May 21, 2015
1 parent 43ffc7e commit 2af252f
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -92,3 +92,5 @@ debian/w3af.substvars
debian/w3af/ debian/w3af/
docker/kali-debootstrap docker/kali-debootstrap
*.deb *.deb

w3af/core/data/parsers/tests/data/huge.html
72 changes: 72 additions & 0 deletions w3af/core/data/parsers/tests/generate_html_file.py
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python
import sys

SOME_TEXT = 'This is placeholder text'
OUTPUT_FILE = 'w3af/core/data/parsers/tests/data/huge.html'


def main():
"""
Generate a huge HTML file which is useful for testing parser performance,
not really real-life data, but forces the parser to use a lot of memory
if it loads the whole thing right away/keeps the tree in memory.
:return: None, we write the file to data/huge.html
"""
output = file(OUTPUT_FILE, 'w')
write = lambda s: output.write('%s\n' % s)

write('<html>')
write('<title>%s</title>' % SOME_TEXT)

write('<body>')

#
# Long
#
for i in xrange(5000):
write('<p>')
write(SOME_TEXT)
write('</p>')

write('<p>')
write(SOME_TEXT)
write('<a href="/%s">%s</a>' % (i, SOME_TEXT))
write('</p>')

write('<div>')
write('<a href="/%s">%s</a>' % (i, SOME_TEXT))
write(SOME_TEXT)
write('<form action="/%s" method="POST">' % i)
write('<input type="text" name="abc-%s">' % i)
write('</form>')
write('</div>')

#
# Long II
#
for i in xrange(5000):
write('<div>')
write('<img src="/img-%s" />' % i)
write('<a href="mailto:andres%s@test.com">%s</a>' % (i, SOME_TEXT))
write('</div>')

#
# Deep
#
for i in xrange(5000):
write('<div id="id-%s">' % i)
write('<a href="/deep-div-%s">%s</a>' % (i, SOME_TEXT))

for i in xrange(5000):
write('<p>')
write(SOME_TEXT)
write('</p>')
write('</div>')

write('</body>')
write('</html>')


if __name__ == '__main__':
sys.exit(main())
27 changes: 26 additions & 1 deletion w3af/core/data/parsers/tests/test_htmlparser_performance.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
import os import os


from nose.plugins.attrib import attr from nose.plugins.attrib import attr
from memory_profiler import profile


from w3af import ROOT_PATH from w3af import ROOT_PATH
from w3af.core.data.url.HTTPResponse import HTTPResponse from w3af.core.data.url.HTTPResponse import HTTPResponse
from w3af.core.data.dc.headers import Headers from w3af.core.data.dc.headers import Headers
from w3af.core.data.parsers.html import HTMLParser from w3af.core.data.parsers.html import HTMLParser
from w3af.core.data.parsers.url import URL from w3af.core.data.parsers.url import URL
from w3af.core.data.parsers.tests.generate_html_file import OUTPUT_FILE




class TestHTMLParserPerformance(unittest.TestCase): class TestHTMLParserPerformance(unittest.TestCase):
Expand Down Expand Up @@ -87,4 +89,27 @@ def measure_memory(self, _id):
import psutil import psutil
self_pid = psutil.Process() self_pid = psutil.Process()
# pylint: disable=E1101 # pylint: disable=E1101
print self_pid.memory_info() print self_pid.memory_info()


@profile
def test():
"""
Run using:
python -m memory_profiler w3af/core/data/parsers/tests/test_htmlparser_performance.py
That will activate the profiler.
"""
body = file(OUTPUT_FILE).read()
url = URL('http://www.clarin.com.ar/')
headers = Headers()
headers['content-type'] = 'text/html'
response = HTTPResponse(200, body, headers, url, url, charset='utf-8')

p = HTMLParser(response)
print('The')
print('End')


if __name__ == '__main__':
test()

0 comments on commit 2af252f

Please sign in to comment.