Skip to content

Commit

Permalink
Merge pull request #3 from mrname/combined-parser
Browse files Browse the repository at this point in the history
Combined parser
  • Loading branch information
mrname committed Jul 5, 2015
2 parents 312f18a + ca9bab3 commit 8b6544a
Show file tree
Hide file tree
Showing 13 changed files with 318 additions and 695 deletions.
695 changes: 21 additions & 674 deletions LICENSE.txt

Large diffs are not rendered by default.

66 changes: 55 additions & 11 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ a ``HarParser`` with `har_parser=parser`, or a ``dict`` representing the JSON of
file (see example above) with `har_data=har_data`::

import json
from haralyzer import HarPage
From haralyzer import HarPage

with open('har_data.har', 'r') as f:
har_page = HarPage('page_3', har_data=json.loads(f.read()))
Expand All @@ -75,28 +75,72 @@ file (see example above) with `har_data=har_data`::
har_page.image_load_time
# prints 713

# Get the TOTAL image load time
har_page.total_image_load_time
# prints 2875
# We could do this with 'css', 'js', 'html', 'audio', or 'video'

### WORK WITH SIZES (all sizes are in bytes) ###

# Get the total page size (with all assets)
har_page.total_page_size
# prints 2423765

# Get the size of the actual first page that was not a redirect
# (i.e. - the HTML of the first page we care about)
har_page.page_size
# prints 26951
# prints 2423765

# Get the total image size
har_page.total_image_size
har_page.image_size
# prints 733488
# We could do this with 'css', 'js', 'html', 'audio', or 'video'


MultiHarParser
++++++++++++++

The ``MutliHarParser`` takes a ``list`` of ``dict``, each of which represents the JSON
of a full HAR file. The concept here is that you can provide multiple HAR files of the
same page (representing multiple test runs) and the ``MultiHarParser`` will provide
aggregate results for load times::

import json
from haralyzer import HarParser, HarPage

test_runs = []
with open('har_data1.har', 'r') as f1:
test_runs.append( (json.loads( f1.read() ) )
with open('har_data2.har', 'r') as f2:
test_runs.append( (json.loads( f2.read() ) )

multi_har_parser = MultiHarParser(har_data=test_runs)

# Get the mean for the time to first byte of all runs in MS
print multi_har_parser.time_to_first_byte
# 70

# Get the total page load time mean for all runs in MS
print multi_har_parser.load_time
# 150

# Get the javascript load time mean for all runs in MS
print multi_har_parser.js_load_time
# 50

# You can get the standard deviation for any of these as well
# Let's get the standard deviation for javascript load time
print multi_har_parser.get_stdev('js')
# 5
# We can also do that with 'page' or 'ttfb' (time to first byte)
print multi_har_parser.get_stdev('page')
# 11
print multi_har_parser.get_stdev('ttfb')
# 10

### DECIMAL PRECISION ###

# You will notice that all of the results are above. That is because
# the default decimal precision for the multi parser is 0. However, you
# can pass whatever you want into the constructor to control this.

multi_har_parser = MultiHarParser(har_data=test_runs, decimal_precision=2)
print multi_har_parser.time_to_first_byte
# 70.15


Advanced Usage
==============

Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@
# built documents.
#
# The short X.Y version.
version = '1.1'
version = '1.2'
# The full version, including alpha/beta/rc tags.
release = '1.1.1'
release = '1.2.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion haralyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
Module for analyzing web pages using HAR files
"""
from .assets import HarParser, HarPage
from .assets import HarParser, MultiHarParser, HarPage
161 changes: 159 additions & 2 deletions haralyzer/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
from dateutil import parser
assert parser
import re
import statistics


DECIMAL_PRECISION = 0


class HarParser(object):
Expand All @@ -19,8 +23,8 @@ class HarParser(object):

def __init__(self, har_data=None):
"""
:param har: a ``dict`` representing the JSON of a HAR file (i.e. - you
need to load the HAR data into a string using json.loads or
:param har_data: a ``dict`` representing the JSON of a HAR file
(i.e. - you need to load the HAR data into a string using json.loads or
requests.json() if you are pulling the data via HTTP.
"""
if not har_data or not isinstance(har_data, dict):
Expand Down Expand Up @@ -150,6 +154,159 @@ def creator(self):
return self.har_data['creator']


class MultiHarParser(object):
"""
An object that represents multiple HAR files OF THE SAME CONTENT.
It is used to gather overall statistical data in situations where you have
multiple runs against the same web asset, which is common in performance
testing.
"""

def __init__(self, har_data, page_id=None,
decimal_precision=DECIMAL_PRECISION):
"""
:param har_data: A ``list`` of ``dict`` representing the JSON
of a HAR file. See the docstring of HarParser.__init__ for more detail.
:param page_id: IF a ``str`` of the page ID is provided, the
multiparser will return aggregate results for this specific page. If
not, it will assume that there is only one page in the run (this was
written specifically for that use case).
:param decimal_precision: ``int`` representing the precision of the
return values for the means and standard deviations provided by this
class.
"""
self.har_data = har_data
self.page_id = page_id
self.decimal_precision = decimal_precision

def get_load_times(self, asset_type):
"""
Just a ``list`` of the load times of a certain asset type for each page
:param asset_type: ``str`` of the asset to type to return load times for
"""
load_times = []
search_str = '{0}_load_time'.format(asset_type)
for har_page in self.pages:
val = getattr(har_page, search_str, None)
load_times.append(val)
return load_times

def get_stdev(self, asset_type):
"""
Returns the standard deviation for a set of a certain asset type.
:param asset_type: ``str`` of the asset type to calculate standard
deviation for.
:returns: A ``int`` or ``float`` of standard deviation, depending on
the self.decimal_precision
"""
load_times = []
# Handle edge cases like TTFB
if asset_type == 'ttfb':
for page in self.pages:
load_times.append(page.time_to_first_byte)
elif asset_type not in self.asset_types and asset_type != 'page':
raise ValueError('asset_type must be one of:\nttfb\n{0}'.format(
'\n'.join(self.asset_types)))
else:
load_times = self.get_load_times(asset_type)

return round(statistics.stdev(load_times), self.decimal_precision)

@property
def pages(self):
"""
The aggregate pages of all the parser objects.
"""
pages = []
for har_dict in self.har_data:
har_parser = HarParser(har_data=har_dict)
if self.page_id:
for page in har_parser.pages:
if page.page_id == self.page_id:
pages.append(page)
else:
pages.append(har_parser.pages[0])
return pages

@cached_property
def asset_types(self):
"""
Mimic the asset types stored in HarPage
"""
return self.pages[0].asset_types

@cached_property
def time_to_first_byte(self):
"""
The aggregate time to first byte for all pages.
"""
ttfb = []
for page in self.pages:
ttfb.append(page.time_to_first_byte)
return round(statistics.mean(ttfb), self.decimal_precision)

@cached_property
def page_load_time(self):
"""
The average total load time for all runs (not weighted).
"""
load_times = self.get_load_times('page')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def js_load_time(self):
"""
Returns aggregate javascript load time.
:param total: ``bool`` indicating whether this should be should be the
total load time or the browser load time
"""
load_times = self.get_load_times('js')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def css_load_time(self):
"""
Returns aggregate css load time for all pages.
"""
load_times = self.get_load_times('css')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def image_load_time(self):
"""
Returns aggregate image load time for all pages.
"""
load_times = self.get_load_times('image')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def html_load_time(self):
"""
Returns aggregate html load time for all pages.
"""
load_times = self.get_load_times('html')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def audio_load_time(self):
"""
Returns aggregate audio load time for all pages.
"""
load_times = self.get_load_times('audio')
return round(statistics.mean(load_times), self.decimal_precision)

@cached_property
def video_load_time(self):
"""
Returns aggregate video load time for all pages.
"""
load_times = self.get_load_times('video')
return round(statistics.mean(load_times), self.decimal_precision)


class HarPage(object):
"""
An object representing one page of a HAR resource
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
statistics
python-dateutil
pytest-cov
python-coveralls
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
from distutils.core import setup


install_reqs = ['python-dateutil',
'cached_property']
install_reqs = ['cached-property',
'python-dateutil',
'statistics', ]

readme = open('README.rst').read()

setup(
name='haralyzer',
version='1.1.1',
version='1.2.1',
description='A python framework for getting useful stuff out of HAR files',
long_description=readme,
author='Justin Crown',
Expand Down
1 change: 1 addition & 0 deletions tests/data/multi_test_1.har
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"log": {"pages": [{"id": "page_3", "startedDateTime": "2015-03-11T16:35:13.159-07:00", "pageTimings": {"onLoad": 527, "onContentLoad": 298}, "title": "http://humanssuck.net/"}], "browser": {"version": "25.0.1", "name": "Firefox"}, "entries": [{"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.159-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 292, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 0, "send": 0, "connect": 0, "dns": 0, "wait": 36, "blocked": 40}, "connection": "80", "time": 76, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "text/html", "text": "<!DOCTYPE HTML>\n<html>\r\n<head>humanssuck.net\n<link rel=\"stylesheet\" type=\"text/css\" href=\"bootstrap.css\"></head>\r\n<body>\r\n<img src=\"test1.jpg\">\n<script src=\"jquery-1.7.1.min.js\"></script>\n</video>\n</body>\r\n</html>\r\n\n", "size": 216}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "text/html; charset=UTF-8"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "X-Accel-Version", "value": "0.01"}, {"name": "Last-Modified", "value": "Wed, 11 Mar 2015 22:39:28 GMT"}, {"name": "Etag", "value": "\"3e20f0c-d8-5110af0ace06d\""}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 337, "redirectURL": "", "bodySize": 186, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/bootstrap.css", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "text/css,*/*;q=0.1"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 293, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 36, "send": 0, "connect": 0, "dns": 0, "wait": 41, "blocked": 0}, "connection": "80", "time": 77, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "text/css", "text": "/*!\n * Bootstrap v3.3.2 (http://getbootstrap.com)\n * Copyright 2011-2015 Twitter, Inc.\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)\n */\n\n/*! normalize.css v3.0.2 | MIT License | git.io/normalize */\nhtml {\n font-family: sans-serif;\n -webkit-text-size-adjust: 100%;\n -ms-text-size-adjust: 100%;\n}\nbody {\n margin: 0;\n}\narticle,\naside,\ndetails,\nfigcaption,\nfigure,\nfooter,\nheader,\nhgroup,\nmain,\nmenu,\nnav,\nsection,\nsummary {\n display: block;\n}\naudio,\ncanvas,\nprogress,\nvideo {\n display: inline-block;\n vertical-align: baseline;\n}\naudio:not([controls]) {\n display: none;\n height: 0;\n}\n[hidden],\ntemplate {\n display: none;\n}\na {\n background-color: transparent;\n}\na:active,\na:hover {\n outline: 0;\n}\nabbr[title] {\n border-bottom: 1px dotted;\n}\nb,\nstrong {\n font-weight: bold;\n}\ndfn {\n font-style: italic;\n}\nh1 {\n margin: .67em 0;\n font-size: 2em;\n}\nmark {\n color: #000;\n background: #ff0;\n}\nsmall {\n font-size: 80%;\n}\nsub,\nsup {\n position: relative;\n font-size: 75%;\n line... [truncated to save space (140390 more bytes)]", "size": 141414}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "text/css"}, {"name": "Last-Modified", "value": "Wed, 11 Mar 2015 22:25:21 GMT"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "Cache-Control", "value": "max-age=2592000"}, {"name": "Expires", "value": "Fri, 10 Apr 2015 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 336, "redirectURL": "", "bodySize": 27495, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/test1.jpg", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "image/png,image/*;q=0.8,*/*;q=0.5"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 304, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 230, "send": 0, "connect": 32, "dns": 10, "wait": 34, "blocked": 77}, "connection": "80", "time": 383, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "image/jpeg", "size": 446967}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "image/jpeg"}, {"name": "Content-Length", "value": "446967"}, {"name": "Last-Modified", "value": "Sun, 03 Oct 2010 21:04:45 GMT"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Cache-Control", "value": "max-age=31536000"}, {"name": "Expires", "value": "Thu, 10 Mar 2016 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Accept-Ranges", "value": "bytes"}], "headersSize": 310, "redirectURL": "", "bodySize": 446967, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/jquery-1.7.1.min.js", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "*/*"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 284, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 36, "send": 0, "connect": 0, "dns": 10, "wait": 31, "blocked": 77}, "connection": "80", "time": 154, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "application/x-javascript", "text": "/*! jQuery v1.7.1 jquery.com | jquery.org/license */\n(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f(\"<\"+a+\">\").appendTo(b),e=d.css(\"display\");d.remove();if(e===\"none\"||e===\"\"){cl||(cl=c.createElement(\"iframe\"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode===\"CSS1Compat\"?\"<!doctype html>\":\"\")+\"<html><body>\"),cm.close();d=cm.createElement(a),cm.body.appendChild(d),e=f.css(d,\"display\"),b.removeChild(cl)}ck[a]=e}return ck[a]}function cu(a,b){var c={};f.each(cq.concat.apply([],cq.slice(0,b)),function(){c[this]=a});return c}function ct(){cr=b}function cs(){setTimeout(ct,0);return cr=f.now()}function cj(){try{return new a.ActiveXObject(\"Microsoft.XMLHTTP\")}catch(b){}}function ci(){try{return new a.XMLHttpRequest}catch(b){}}function cc(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d... [truncated to save space (92843 more bytes)]", "size": 93867}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "application/x-javascript"}, {"name": "Last-Modified", "value": "Mon, 23 Feb 2015 03:16:00 GMT"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "Cache-Control", "value": "max-age=2592000"}, {"name": "Expires", "value": "Fri, 10 Apr 2015 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 352, "redirectURL": "", "bodySize": 38367, "httpVersion": "HTTP/1.1"}}], "version": "1.1", "creator": {"version": "1.12", "name": "Firebug"}}}

0 comments on commit 8b6544a

Please sign in to comment.