/
test_page.py
286 lines (231 loc) · 9.67 KB
/
test_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import pytest
from haralyzer import HarPage, HarParser, HarEntry
from haralyzer.compat import iteritems
from haralyzer.errors import PageNotFoundError
import re
import six
BAD_PAGE_ID = 'sup_dawg'
PAGE_ID = 'page_3'
def test_init(har_data):
"""
Test the object loading
"""
with pytest.raises(ValueError):
assert HarPage(PAGE_ID)
init_data = har_data('humanssuck.net.har')
# Throws PageNotFoundException with bad page ID
with pytest.raises(PageNotFoundError):
assert HarPage(BAD_PAGE_ID, har_data=init_data)
# Make sure it can load with either har_data or a parser
page = HarPage(PAGE_ID, har_data=init_data)
assert isinstance(page, HarPage)
assert repr(page) == "ID: page_3, URL: http://humanssuck.net/"
parser = HarParser(init_data)
page = HarPage(PAGE_ID, har_parser=parser)
assert isinstance(page, HarPage)
assert len(page.entries) == 4
# Make sure that the entries are actually in order. Going a little bit
# old school here.
for index in range(0, len(page.entries)):
if index != len(page.entries) - 1:
current_date = page.entries[index].startTime
next_date = page.entries[index + 1].startTime
assert current_date <= next_date
def test_no_title(har_data):
"""
A page with no title should set the title property as an empty string
instead of throwing an exception.
"""
init_data = har_data('no_title.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.title == ''
def test_filter_entries(har_data):
"""
Tests ability to filter entries, with or without regex
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
# Filter by request type only
entries = page.filter_entries(request_type='.*ET')
assert len(entries) == 4
for entry in entries:
assert entry.request.method == entry["request"]["method"] == 'GET'
# Filter by request type and content_type
entries = page.filter_entries(request_type='.*ET', content_type='image.*')
assert len(entries) == 1
for entry in entries:
assert entry.request.method == entry["request"]["method"] == 'GET'
for header in entry.request.headers:
if header['name'] == 'Content-Type':
assert re.search('image.*', header['value'])
# Filter by request type, content type, and status code
entries = page.filter_entries(request_type='.*ET', content_type='image.*',
status_code='2.*')
assert len(entries) == 1
for entry in entries:
assert entry.request.method == entry["request"]["method"] == 'GET'
assert re.search('2.*', str(entry.response.status))
for header in entry.response.headers:
if header['name'] == 'Content-Type':
assert re.search('image.*', header['value'])
for header in entry["response"]["headers"]:
if header['name'] == 'Content-Type':
assert re.search('image.*', header['value'])
entries = page.filter_entries(request_type='.*ST')
assert len(entries) == 0
entries = page.filter_entries(request_type='.*ET', content_type='video.*')
assert len(entries) == 0
entries = page.filter_entries(request_type='.*ET', content_type='image.*',
status_code='3.*')
assert len(entries) == 0
def test_filter_entries_load_time(har_data):
"""
Tests ability to filter entries by load time
"""
init_data = har_data('humanssuck.net_duplicate_url.har')
page = HarPage(PAGE_ID, har_data=init_data)
entries = page.filter_entries(load_time__gt=100)
assert len(entries) == 4
entries = page.filter_entries(load_time__gt=300)
assert len(entries) == 3
entries = page.filter_entries(load_time__gt=500)
assert len(entries) == 0
def test_get_load_time(har_data):
"""
Tests HarPage.get_load_time()
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.get_load_time(request_type='GET') == 463
assert page.get_load_time(request_type='GET', asynchronous=False) == 843
assert page.get_load_time(content_type='image.*') == 304
assert page.get_load_time(status_code='2.*') == 463
def test_entries(har_data):
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
for entry in page.entries:
assert entry.pageref == entry["pageref"] == page.page_id
@pytest.mark.skipif(six.PY3, reason="Runs with Python 2")
def test_iteration_python2(har_data):
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
entries = [x for x in page]
assert len(entries) == 4
iter_object = iter(page)
assert str(next(iter_object)) == 'HarEntry for http://humanssuck.net/'
assert str(next(iter_object)) == 'HarEntry for http://humanssuck.net/test.css'
assert str(next(iter_object)) == 'HarEntry for http://humanssuck.net/screen_login.gif'
assert str(next(iter_object)) == 'HarEntry for http://humanssuck.net/jquery-1.7.1.min.js'
with pytest.raises(StopIteration):
assert next(iter_object)
@pytest.mark.skipif(six.PY2, reason="Runs with Python 3")
def test_iteration_python2(har_data):
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
entries = [x for x in page]
assert len(entries) == 4
assert str(next(page)) == 'HarEntry for http://humanssuck.net/'
assert str(next(page)) == 'HarEntry for http://humanssuck.net/test.css'
assert str(next(page)) == 'HarEntry for http://humanssuck.net/screen_login.gif'
assert str(next(page)) == 'HarEntry for http://humanssuck.net/jquery-1.7.1.min.js'
with pytest.raises(StopIteration):
assert next(page)
def test_file_types(har_data):
"""
Test file type properties
"""
init_data = har_data('cnn.har')
page = HarPage(PAGE_ID, har_data=init_data)
file_types = {'image_files': ['image'], 'css_files': ['css'],
'js_files': ['javascript'], 'audio_files': ['audio'],
'video_files': ['video', 'flash'], 'text_files': ['text'],
'html_files': ['html']}
for k, v in iteritems(file_types):
for asset in getattr(page, k, None):
assert _correct_file_type(asset, v)
def test_request_types(har_data):
"""
Test request type filters
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
# Check request type lists
for req in page.get_requests:
assert req.request.method == req["request"]["method"] == 'GET'
for req in page.post_requests:
assert req.request.method == req["request"]["method"] == 'POST'
def test_sizes_trans(har_data):
init_data = har_data('cnn-chrome.har')
page = HarPage('page_1', har_data=init_data)
assert page.page_size_trans == 2609508
assert page.text_size_trans == 569814
assert page.css_size_trans == 169573
assert page.js_size_trans == 1600321
assert page.image_size_trans == 492950
# TODO - Get test data for audio and video
assert page.audio_size_trans == 0
assert page.video_size_trans == 0
def test_sizes(har_data):
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.page_size == 62204
assert page.text_size == 246
assert page.css_size == 8
assert page.js_size == 38367
assert page.image_size == 23591
# TODO - Get test data for audio and video
assert page.audio_size == 0
assert page.video_size == 0
def test_load_times(har_data):
"""
This whole test really needs better sample data. I need to make a
web page with like 2-3 of each asset type to really test the load times.
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
# Check initial page load
assert page.actual_page.request.url == 'http://humanssuck.net/'
# Check initial page load times
assert page.initial_load_time == 153
assert page.content_load_time == 543
# Check content type browser (async) load times
assert page.image_load_time == 304
assert page.css_load_time == 76
assert page.js_load_time == 310
assert page.html_load_time == 153
assert page.page_load_time == 567
# TODO - Need to get sample data for these types
assert page.audio_load_time == 0
assert page.video_load_time == 0
def test_time_to_first_byte(har_data):
"""
Tests that TTFB is correctly reported as a property of the page.
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.time_to_first_byte == 153
def test_hostname(har_data):
"""
Makes sure that the correct hostname is returned.
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.hostname == 'humanssuck.net'
def test_url(har_data):
"""
Makes sure that the correct URL is returned.
"""
init_data = har_data('humanssuck.net.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.url == 'http://humanssuck.net/'
def _correct_file_type(entry, file_types):
for header in entry.response.headers:
if header['name'] == 'Content-Type':
return any(ft in header['value'] for ft in file_types)
def test_duplicate_urls_count(har_data):
"""
Makes sure that the correct number of urls that appear more than once in har is displayed.
"""
init_data = har_data('humanssuck.net_duplicate_url.har')
page = HarPage(PAGE_ID, har_data=init_data)
assert page.duplicate_url_request == {'http://humanssuck.net/jquery-1.7.1.min.js': 2}