/
url_fuzzer.py
380 lines (314 loc) · 13.3 KB
/
url_fuzzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
"""
url_fuzzer.py
Copyright 2006 Andres Riancho
This file is part of w3af, http://w3af.org/ .
w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
"""
from itertools import chain, repeat, izip
import w3af.core.controllers.output_manager as om
import w3af.core.data.kb.knowledge_base as kb
from w3af.core.controllers.plugins.crawl_plugin import CrawlPlugin
from w3af.core.controllers.core_helpers.fingerprint_404 import is_404
from w3af.core.controllers.exceptions import BaseFrameworkException
from w3af.core.data.dc.headers import Headers
from w3af.core.data.bloomfilter.scalable_bloom import ScalableBloomFilter
from w3af.core.data.fuzzer.utils import rand_alnum
from w3af.core.data.options.opt_factory import opt_factory
from w3af.core.data.options.option_list import OptionList
from w3af.core.data.parsers.doc.url import URL
from w3af.core.data.kb.info import Info
from w3af.core.data.request.fuzzable_request import FuzzableRequest
class url_fuzzer(CrawlPlugin):
"""
Try to find backups, and other related files.
:author: Andres Riancho (andres.riancho@gmail.com)
"""
_appendables = ('~', '.tar.gz', '.gz', '.7z', '.cab', '.tgz',
'.gzip', '.bzip2', '.inc', '.zip', '.rar', '.jar', '.java',
'.class', '.properties', '.bak', '.bak1', '.bkp', '.back',
'.backup', '.backup1', '.old', '.old1', '.$$$'
)
_backup_exts = ('tar.gz', '7z', 'gz', 'cab', 'tgz', 'gzip',
'bzip2', 'zip', 'rar'
)
_file_types = (
'inc', 'fla', 'jar', 'war', 'java', 'class', 'properties',
'bak', 'bak1', 'backup', 'backup1', 'old', 'old1', 'c', 'cpp',
'cs', 'vb', 'phps', 'disco', 'ori', 'orig', 'original'
)
def __init__(self):
CrawlPlugin.__init__(self)
self._headers = None
self._first_time = True
self._fuzz_images = False
self._seen = ScalableBloomFilter()
def crawl(self, fuzzable_request):
"""
Searches for new Url's using fuzzing.
:param fuzzable_request: A fuzzable_request instance that contains
(among other things) the URL to test.
"""
url = fuzzable_request.get_url()
self._headers = Headers([('Referer', url.url_string)])
if self._first_time:
self._verify_head_enabled(url)
self._first_time = False
# First we need to delete fragments and query strings from URL.
url = url.uri2url()
# And we mark this one as a "do not return" URL, because the
# core already found it using another technique.
self._seen.add(url)
self._verify_head_enabled(url)
if self._head_enabled():
response = self._uri_opener.HEAD(url, cache=True,
headers=self._headers)
else:
response = self._uri_opener.GET(url, cache=True,
headers=self._headers)
if response.is_text_or_html() or self._fuzz_images:
mutants_chain = chain(self._mutate_by_appending(url),
self._mutate_path(url),
self._mutate_file_type(url),
self._mutate_domain_name(url))
url_repeater = repeat(url)
args = izip(url_repeater, mutants_chain)
self.worker_pool.map_multi_args(self._do_request, args)
def _do_request(self, url, mutant):
"""
Perform a simple GET to see if the result is an error or not, and then
run the actual fuzzing.
"""
response = self._uri_opener.GET(mutant, cache=True,
headers=self._headers)
if not (is_404(response) or
response.get_code() in (403, 401) or
self._return_without_eval(mutant)):
# Create the fuzzable request and send it to the core
fr = FuzzableRequest.from_http_response(response)
self.output_queue.put(fr)
#
# Save it to the kb (if new)!
#
if response.get_url() not in self._seen and response.get_url().get_file_name():
desc = 'A potentially interesting file was found at: "%s".'
desc = desc % response.get_url()
i = Info('Potentially interesting file', desc, response.id,
self.get_name())
i.set_url(response.get_url())
kb.kb.append(self, 'files', i)
om.out.information(i.get_desc())
# Report only once
self._seen.add(response.get_url())
def _return_without_eval(self, uri):
"""
This method tries to lower the false positives.
"""
if not uri.has_query_string():
return False
uri.set_file_name(uri.get_file_name() + rand_alnum(7))
try:
response = self._uri_opener.GET(uri, cache=True,
headers=self._headers)
except BaseFrameworkException, e:
msg = 'An exception was raised while requesting "%s", the error'
msg += 'message is: "%s"'
om.out.error(msg % (uri, e))
else:
if not is_404(response):
return True
return False
def _mutate_domain_name(self, url):
"""
If the url is : "http://www.foobar.com/asd.txt" this method returns:
- http://www.foobar.com/foobar.zip
- http://www.foobar.com/foobar.rar
- http://www.foobar.com/www.foobar.zip
- http://www.foobar.com/www.foobar.rar
- etc...
:param url: A URL to transform.
:return: A list of URL's that mutate the original url passed
as parameter.
>>> from w3af.core.data.parsers.doc.url import URL
>>> u = url_fuzzer()
>>> url = URL('http://www.w3af.com/')
>>> mutants = list(u._mutate_domain_name(url))
>>> URL('http://www.w3af.com/www.tar.gz') in mutants
True
>>> URL('http://www.w3af.com/www.w3af.tar.gz') in mutants
True
>>> URL('http://www.w3af.com/www.w3af.com.tar.gz') in mutants
True
>>> len(mutants) > 20
True
"""
domain = url.get_domain()
domain_path = url.get_domain_path()
splitted_domain = domain.split('.')
for i in xrange(len(splitted_domain)):
filename = '.'.join(splitted_domain[0: i + 1])
for extension in self._backup_exts:
filename_ext = filename + '.' + extension
domain_path_copy = domain_path.copy()
domain_path_copy.set_file_name(filename_ext)
yield domain_path_copy
def _mutate_by_appending(self, url):
"""
Adds something to the end of the url (mutate the file being requested)
:param url: A URL to transform.
:return: A list of URL's that mutate the original url passed
as parameter.
>>> from w3af.core.data.parsers.doc.url import URL
>>> u = url_fuzzer()
>>> url = URL( 'http://www.w3af.com/' )
>>> mutants = u._mutate_by_appending( url )
>>> list(mutants)
[]
>>> url = URL( 'http://www.w3af.com/foo.html' )
>>> mutants = u._mutate_by_appending( url )
>>> URL( 'http://www.w3af.com/foo.html~' ) in mutants
True
>>> len(list(mutants)) > 20
True
"""
if not url.url_string.endswith('/') and url.url_string.count('/') >= 3:
#
# Only get here on these cases:
# - http://host.tld/abc
# - http://host.tld/abc/def.html
#
# And not on these:
# - http://host.tld
# - http://host.tld/abc/
#
for to_append in self._appendables:
url_copy = url.copy()
filename = url_copy.get_file_name()
filename += to_append
url_copy.set_file_name(filename)
yield url_copy
def _mutate_file_type(self, url):
"""
If the url is : "http://www.foobar.com/asd.txt" this method returns:
- http://www.foobar.com/asd.zip
- http://www.foobar.com/asd.tgz
- etc...
:param url: A URL to transform.
:return: A list of URL's that mutate the original url passed as parameter.
>>> from w3af.core.data.parsers.doc.url import URL
>>> u = url_fuzzer()
>>> list(u._mutate_file_type(URL('http://www.w3af.com/')))
[]
>>> url = URL('http://www.w3af.com/foo.html')
>>> mutants = list(u._mutate_file_type( url))
>>> URL('http://www.w3af.com/foo.tar.gz') in mutants
True
>>> URL('http://www.w3af.com/foo.disco') in mutants
True
>>> len(mutants) > 20
True
"""
extension = url.get_extension()
if extension:
for filetype in chain(self._backup_exts, self._file_types):
url_copy = url.copy()
url_copy.set_extension(filetype)
yield url_copy
def _mutate_path(self, url):
"""
Mutate the path instead of the file.
:param url: A URL to transform.
:return: A list of URL's that mutate the original url passed
as parameter.
>>> from w3af.core.data.parsers.doc.url import URL
>>> u = url_fuzzer()
>>> url = URL( 'http://www.w3af.com/' )
>>> list(u._mutate_path(url))
[]
>>> url = URL( 'http://www.w3af.com/foo.html' )
>>> list(u._mutate_path(url))
[]
>>> url = URL('http://www.w3af.com/foo/bar.html' )
>>> mutants = list(u._mutate_path(url))
>>> URL('http://www.w3af.com/foo.tar.gz') in mutants
True
>>> URL('http://www.w3af.com/foo.old') in mutants
True
>>> URL('http://www.w3af.com/foo.zip') in mutants
True
"""
url_string = url.url_string
if url_string.count('/') > 3:
# Create the new path
url_string = url_string[:url_string.rfind('/')]
to_append_list = self._appendables
for to_append in to_append_list:
newurl = URL(url_string + to_append)
yield newurl
def _verify_head_enabled(self, url):
"""
Verifies if the requested URL permits a HEAD request.
This was saved inside the KB by the plugin allowed_methods
:return : Sets self._head to the correct value, nothing is returned.
"""
allowed_methods_infos = kb.kb.get('allowed_methods', 'methods')
allowed_methods = []
for info in allowed_methods_infos:
allowed_methods.extend(info['methods'])
if 'HEAD' in allowed_methods:
self._head = True
else:
self._head = False
def _head_enabled(self):
return self._head
def get_options(self):
"""
:return: A list of option objects for this plugin.
"""
ol = OptionList()
d = 'Apply URL fuzzing to all URLs, including images, videos, zip, etc.'
h = 'Don\'t change this unless you read the plugin code.'
o = opt_factory('fuzz_images', self._fuzz_images, d, 'boolean', help=h)
ol.add(o)
return ol
def set_options(self, options_list):
"""
This method sets all the options that are configured using the user interface
generated by the framework using the result of get_options().
:param options_list: A dictionary with the options for the plugin.
:return: No value is returned.
"""
self._fuzz_images = options_list['fuzz_images'].get_value()
def get_plugin_deps(self):
"""
:return: A list with the names of the plugins that should be run before the
current one.
"""
return ['infrastructure.allowed_methods']
def get_long_desc(self):
"""
:return: A DETAILED description of the plugin functions and features.
"""
return """
This plugin will try to find new URL's based on the input. If the input
is for example:
- http://host.tld/a.html
The plugin will request:
- http://host.tld/a.html.tgz
- http://host.tld/a.tgz
...
- http://host.tld/a.zip
If the response is different from the 404 page (whatever it may be,
automatic detection is performed), then we have found a new URL. This
plugin searches for backup files, source code, and other common extensions.
One configurable parameter exist:
- fuzz_images
"""