Merge pull request #3 from mrname/combined-parser

Combined parser
haralyzer · Jul 5, 2015 · 8b6544a · 8b6544a
2 parents 312f18a + ca9bab3
commit 8b6544a
Show file tree

Hide file tree

Showing 13 changed files with 318 additions and 695 deletions.
diff --git a/LICENSE.txt b/LICENSE.txt
diff --git a/README.rst b/README.rst
@@ -64,7 +64,7 @@ a ``HarParser`` with `har_parser=parser`, or a ``dict`` representing the JSON of
 file (see example above) with `har_data=har_data`::
 
     import json
-    from haralyzer import HarPage
+    From haralyzer import HarPage
 
     with open('har_data.har', 'r') as f:
         har_page = HarPage('page_3', har_data=json.loads(f.read()))
@@ -75,28 +75,72 @@ file (see example above) with `har_data=har_data`::
     har_page.image_load_time
     # prints 713
 
-    # Get the TOTAL image load time
-    har_page.total_image_load_time
-    # prints 2875 
     # We could do this with 'css', 'js', 'html', 'audio', or 'video'
 
     ### WORK WITH SIZES (all sizes are in bytes) ###
 
     # Get the total page size (with all assets)
-    har_page.total_page_size
-    # prints 2423765
-
-    # Get the size of the actual first page that was not a redirect
-    # (i.e. - the HTML of the first page we care about)
     har_page.page_size
-    # prints 26951
+    # prints 2423765
 
     # Get the total image size
-    har_page.total_image_size
+    har_page.image_size
     # prints 733488
     # We could do this with 'css', 'js', 'html', 'audio', or 'video'
 
 
+MultiHarParser
+++++++++++++++
+
+The ``MutliHarParser`` takes a ``list`` of ``dict``, each of which represents the JSON
+of a full HAR file. The concept here is that you can provide multiple HAR files of the
+same page (representing multiple test runs) and the ``MultiHarParser`` will provide
+aggregate results for load times::
+
+    import json
+    from haralyzer import HarParser, HarPage
+
+    test_runs = []
+    with open('har_data1.har', 'r') as f1:
+        test_runs.append( (json.loads( f1.read() ) )
+    with open('har_data2.har', 'r') as f2:
+        test_runs.append( (json.loads( f2.read() ) )
+
+    multi_har_parser = MultiHarParser(har_data=test_runs)
+
+    # Get the mean for the time to first byte of all runs in MS
+    print multi_har_parser.time_to_first_byte
+    # 70
+
+    # Get the total page load time mean for all runs in MS
+    print multi_har_parser.load_time
+    # 150
+
+    # Get the javascript load time mean for all runs in MS
+    print multi_har_parser.js_load_time
+    # 50
+
+    # You can get the standard deviation for any of these as well
+    # Let's get the standard deviation for javascript load time
+    print multi_har_parser.get_stdev('js')
+    # 5
+    # We can also do that with 'page' or 'ttfb' (time to first byte)
+    print multi_har_parser.get_stdev('page')
+    # 11
+    print multi_har_parser.get_stdev('ttfb')
+    # 10
+
+    ### DECIMAL PRECISION ###
+
+    # You will notice that all of the results are above. That is because
+    # the default decimal precision for the multi parser is 0. However, you
+    # can pass whatever you want into the constructor to control this.
+
+    multi_har_parser = MultiHarParser(har_data=test_runs, decimal_precision=2)
+    print multi_har_parser.time_to_first_byte
+    # 70.15
+
+
 Advanced Usage
 ==============
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -60,9 +60,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.1'
+version = '1.2'
 # The full version, including alpha/beta/rc tags.
-release = '1.1.1'
+release = '1.2.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/haralyzer/__init__.py b/haralyzer/__init__.py
@@ -1,4 +1,4 @@
 """
 Module for analyzing web pages using HAR files
 """
-from .assets import HarParser, HarPage
+from .assets import HarParser, MultiHarParser, HarPage
diff --git a/haralyzer/assets.py b/haralyzer/assets.py
@@ -9,6 +9,10 @@
 from dateutil import parser
 assert parser
 import re
+import statistics
+
+
+DECIMAL_PRECISION = 0
 
 
 class HarParser(object):
@@ -19,8 +23,8 @@ class HarParser(object):
 
     def __init__(self, har_data=None):
         """
-        :param har: a ``dict`` representing the JSON of a HAR file (i.e. - you
-        need to load the HAR data into a string using json.loads or
+        :param har_data: a ``dict`` representing the JSON of a HAR file
+        (i.e. - you need to load the HAR data into a string using json.loads or
         requests.json() if you are pulling the data via HTTP.
         """
         if not har_data or not isinstance(har_data, dict):
@@ -150,6 +154,159 @@ def creator(self):
         return self.har_data['creator']
 
 
+class MultiHarParser(object):
+    """
+    An object that represents multiple HAR files OF THE SAME CONTENT.
+    It is used to gather overall statistical data in situations where you have
+    multiple runs against the same web asset, which is common in performance
+    testing.
+    """
+
+    def __init__(self, har_data, page_id=None,
+                 decimal_precision=DECIMAL_PRECISION):
+        """
+        :param har_data: A ``list`` of ``dict`` representing the JSON
+        of a HAR file. See the docstring of HarParser.__init__ for more detail.
+        :param page_id: IF a ``str`` of the page ID is provided, the
+        multiparser will return aggregate results for this specific page. If
+        not, it will assume that there is only one page in the run (this was
+        written specifically for that use case).
+        :param decimal_precision: ``int`` representing the precision of the
+        return values for the means and standard deviations provided by this
+        class.
+        """
+        self.har_data = har_data
+        self.page_id = page_id
+        self.decimal_precision = decimal_precision
+
+    def get_load_times(self, asset_type):
+        """
+        Just a ``list`` of the load times of a certain asset type for each page
+
+        :param asset_type: ``str`` of the asset to type to return load times for
+        """
+        load_times = []
+        search_str = '{0}_load_time'.format(asset_type)
+        for har_page in self.pages:
+            val = getattr(har_page, search_str, None)
+            load_times.append(val)
+        return load_times
+
+    def get_stdev(self, asset_type):
+        """
+        Returns the standard deviation for a set of a certain asset type.
+
+        :param asset_type: ``str`` of the asset type to calculate standard
+        deviation for.
+        :returns: A ``int`` or ``float`` of standard deviation, depending on
+        the self.decimal_precision
+        """
+        load_times = []
+        # Handle edge cases like TTFB
+        if asset_type == 'ttfb':
+            for page in self.pages:
+                load_times.append(page.time_to_first_byte)
+        elif asset_type not in self.asset_types and asset_type != 'page':
+            raise ValueError('asset_type must be one of:\nttfb\n{0}'.format(
+                '\n'.join(self.asset_types)))
+        else:
+            load_times = self.get_load_times(asset_type)
+
+        return round(statistics.stdev(load_times), self.decimal_precision)
+
+    @property
+    def pages(self):
+        """
+        The aggregate pages of all the parser objects.
+        """
+        pages = []
+        for har_dict in self.har_data:
+            har_parser = HarParser(har_data=har_dict)
+            if self.page_id:
+                for page in har_parser.pages:
+                    if page.page_id == self.page_id:
+                        pages.append(page)
+            else:
+                pages.append(har_parser.pages[0])
+        return pages
+
+    @cached_property
+    def asset_types(self):
+        """
+        Mimic the asset types stored in HarPage
+        """
+        return self.pages[0].asset_types
+
+    @cached_property
+    def time_to_first_byte(self):
+        """
+        The aggregate time to first byte for all pages.
+        """
+        ttfb = []
+        for page in self.pages:
+            ttfb.append(page.time_to_first_byte)
+        return round(statistics.mean(ttfb), self.decimal_precision)
+
+    @cached_property
+    def page_load_time(self):
+        """
+        The average total load time for all runs (not weighted).
+        """
+        load_times = self.get_load_times('page')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def js_load_time(self):
+        """
+        Returns aggregate javascript load time.
+
+        :param total: ``bool`` indicating whether this should be should be the
+        total load time or the browser load time
+        """
+        load_times = self.get_load_times('js')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def css_load_time(self):
+        """
+        Returns aggregate css load time for all pages.
+        """
+        load_times = self.get_load_times('css')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def image_load_time(self):
+        """
+        Returns aggregate image load time for all pages.
+        """
+        load_times = self.get_load_times('image')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def html_load_time(self):
+        """
+        Returns aggregate html load time for all pages.
+        """
+        load_times = self.get_load_times('html')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def audio_load_time(self):
+        """
+        Returns aggregate audio load time for all pages.
+        """
+        load_times = self.get_load_times('audio')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+    @cached_property
+    def video_load_time(self):
+        """
+        Returns aggregate video load time for all pages.
+        """
+        load_times = self.get_load_times('video')
+        return round(statistics.mean(load_times), self.decimal_precision)
+
+
 class HarPage(object):
     """
     An object representing one page of a HAR resource

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+statistics
 python-dateutil
 pytest-cov
 python-coveralls

diff --git a/setup.py b/setup.py
@@ -6,14 +6,15 @@
     from distutils.core import setup
 
 
-install_reqs = ['python-dateutil',
-                'cached_property']
+install_reqs = ['cached-property',
+                'python-dateutil',
+                'statistics', ]
 
 readme = open('README.rst').read()
 
 setup(
     name='haralyzer',
-    version='1.1.1',
+    version='1.2.1',
     description='A python framework for getting useful stuff out of HAR files',
     long_description=readme,
     author='Justin Crown',

diff --git a/tests/data/multi_test_1.har b/tests/data/multi_test_1.har
@@ -0,0 +1 @@
+{"log": {"pages": [{"id": "page_3", "startedDateTime": "2015-03-11T16:35:13.159-07:00", "pageTimings": {"onLoad": 527, "onContentLoad": 298}, "title": "http://humanssuck.net/"}], "browser": {"version": "25.0.1", "name": "Firefox"}, "entries": [{"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.159-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 292, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 0, "send": 0, "connect": 0, "dns": 0, "wait": 36, "blocked": 40}, "connection": "80", "time": 76, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "text/html", "text": "<!DOCTYPE HTML>\n<html>\r\n<head>humanssuck.net\n<link rel=\"stylesheet\" type=\"text/css\" href=\"bootstrap.css\"></head>\r\n<body>\r\n<img src=\"test1.jpg\">\n<script src=\"jquery-1.7.1.min.js\"></script>\n</video>\n</body>\r\n</html>\r\n\n", "size": 216}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "text/html; charset=UTF-8"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "X-Accel-Version", "value": "0.01"}, {"name": "Last-Modified", "value": "Wed, 11 Mar 2015 22:39:28 GMT"}, {"name": "Etag", "value": "\"3e20f0c-d8-5110af0ace06d\""}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 337, "redirectURL": "", "bodySize": 186, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/bootstrap.css", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "text/css,*/*;q=0.1"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 293, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 36, "send": 0, "connect": 0, "dns": 0, "wait": 41, "blocked": 0}, "connection": "80", "time": 77, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "text/css", "text": "/*!\n * Bootstrap v3.3.2 (http://getbootstrap.com)\n * Copyright 2011-2015 Twitter, Inc.\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)\n */\n\n/*! normalize.css v3.0.2 | MIT License | git.io/normalize */\nhtml {\n  font-family: sans-serif;\n  -webkit-text-size-adjust: 100%;\n      -ms-text-size-adjust: 100%;\n}\nbody {\n  margin: 0;\n}\narticle,\naside,\ndetails,\nfigcaption,\nfigure,\nfooter,\nheader,\nhgroup,\nmain,\nmenu,\nnav,\nsection,\nsummary {\n  display: block;\n}\naudio,\ncanvas,\nprogress,\nvideo {\n  display: inline-block;\n  vertical-align: baseline;\n}\naudio:not([controls]) {\n  display: none;\n  height: 0;\n}\n[hidden],\ntemplate {\n  display: none;\n}\na {\n  background-color: transparent;\n}\na:active,\na:hover {\n  outline: 0;\n}\nabbr[title] {\n  border-bottom: 1px dotted;\n}\nb,\nstrong {\n  font-weight: bold;\n}\ndfn {\n  font-style: italic;\n}\nh1 {\n  margin: .67em 0;\n  font-size: 2em;\n}\nmark {\n  color: #000;\n  background: #ff0;\n}\nsmall {\n  font-size: 80%;\n}\nsub,\nsup {\n  position: relative;\n  font-size: 75%;\n  line... [truncated to save space (140390 more bytes)]", "size": 141414}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "text/css"}, {"name": "Last-Modified", "value": "Wed, 11 Mar 2015 22:25:21 GMT"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "Cache-Control", "value": "max-age=2592000"}, {"name": "Expires", "value": "Fri, 10 Apr 2015 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 336, "redirectURL": "", "bodySize": 27495, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/test1.jpg", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "image/png,image/*;q=0.8,*/*;q=0.5"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 304, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 230, "send": 0, "connect": 32, "dns": 10, "wait": 34, "blocked": 77}, "connection": "80", "time": 383, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "image/jpeg", "size": 446967}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "image/jpeg"}, {"name": "Content-Length", "value": "446967"}, {"name": "Last-Modified", "value": "Sun, 03 Oct 2010 21:04:45 GMT"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Cache-Control", "value": "max-age=31536000"}, {"name": "Expires", "value": "Thu, 10 Mar 2016 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Accept-Ranges", "value": "bytes"}], "headersSize": 310, "redirectURL": "", "bodySize": 446967, "httpVersion": "HTTP/1.1"}}, {"serverIPAddress": "216.70.110.121", "cache": {}, "startedDateTime": "2015-03-11T16:35:13.272-07:00", "pageref": "page_3", "request": {"cookies": [], "url": "http://humanssuck.net/jquery-1.7.1.min.js", "queryString": [], "headers": [{"name": "Host", "value": "humanssuck.net"}, {"name": "User-Agent", "value": "Mozilla/5.0 (X11; Linux i686 on x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"}, {"name": "Accept", "value": "*/*"}, {"name": "Accept-Language", "value": "en-US,en;q=0.5"}, {"name": "Accept-Encoding", "value": "gzip, deflate"}, {"name": "Referer", "value": "http://humanssuck.net/"}, {"name": "Connection", "value": "keep-alive"}], "headersSize": 284, "bodySize": -1, "method": "GET", "httpVersion": "HTTP/1.1"}, "timings": {"receive": 36, "send": 0, "connect": 0, "dns": 10, "wait": 31, "blocked": 77}, "connection": "80", "time": 154, "response": {"status": 200, "cookies": [], "statusText": "OK", "content": {"mimeType": "application/x-javascript", "text": "/*! jQuery v1.7.1 jquery.com | jquery.org/license */\n(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f(\"<\"+a+\">\").appendTo(b),e=d.css(\"display\");d.remove();if(e===\"none\"||e===\"\"){cl||(cl=c.createElement(\"iframe\"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode===\"CSS1Compat\"?\"<!doctype html>\":\"\")+\"<html><body>\"),cm.close();d=cm.createElement(a),cm.body.appendChild(d),e=f.css(d,\"display\"),b.removeChild(cl)}ck[a]=e}return ck[a]}function cu(a,b){var c={};f.each(cq.concat.apply([],cq.slice(0,b)),function(){c[this]=a});return c}function ct(){cr=b}function cs(){setTimeout(ct,0);return cr=f.now()}function cj(){try{return new a.ActiveXObject(\"Microsoft.XMLHTTP\")}catch(b){}}function ci(){try{return new a.XMLHttpRequest}catch(b){}}function cc(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d... [truncated to save space (92843 more bytes)]", "size": 93867}, "headers": [{"name": "Server", "value": "nginx"}, {"name": "Date", "value": "Wed, 11 Mar 2015 23:35:13 GMT"}, {"name": "Content-Type", "value": "application/x-javascript"}, {"name": "Last-Modified", "value": "Mon, 23 Feb 2015 03:16:00 GMT"}, {"name": "Transfer-Encoding", "value": "chunked"}, {"name": "Connection", "value": "keep-alive"}, {"name": "Vary", "value": "Accept-Encoding"}, {"name": "Cache-Control", "value": "max-age=2592000"}, {"name": "Expires", "value": "Fri, 10 Apr 2015 23:35:13 GMT"}, {"name": "X-Powered-By", "value": "PleskLin"}, {"name": "Content-Encoding", "value": "gzip"}], "headersSize": 352, "redirectURL": "", "bodySize": 38367, "httpVersion": "HTTP/1.1"}}], "version": "1.1", "creator": {"version": "1.12", "name": "Firebug"}}}