Permalink
Browse files

Refactored to use the Requests library

BURP now uses the Requests library to make a single HTTP request for a given URL, rather than making separate requests for both HTML and HTTP-header analysis.
  • Loading branch information...
1 parent a18d264 commit ce9f1b2072eeab01e588cb0a8ab513716c5418c5 @eromba eromba committed Dec 9, 2012
Showing with 36 additions and 75 deletions.
  1. +1 −1 burp/url/__init__.py
  2. +0 −40 burp/url/analyzer.py
  3. +16 −13 process_urls.py
  4. +19 −21 scripts/burp
View
@@ -1,2 +1,2 @@
-from burp.url.analyzer import getHttpHeaders, getIpAddr, getWhoIs
+from burp.url.analyzer import getIpAddr, getWhoIs
from burp.url.tokenizer import getTokens
View
@@ -4,27 +4,6 @@
from pprint import pprint
from burp.url.tokenizer import getTokens
-try:
- from http.client import HTTPConnection
-except ImportError:
- from httplib import HTTPConnection
-
-try:
- from urllib.request import Request, OpenerDirector, HTTPHandler, HTTPDefaultErrorHandler, HTTPErrorProcessor, HTTPRedirectHandler
-except ImportError:
- from urllib2 import Request, OpenerDirector, HTTPHandler, HTTPDefaultErrorHandler, HTTPErrorProcessor, HTTPRedirectHandler
-
-try:
- from urllib.parse import urlparse
-except ImportError:
- from urlparse import urlparse
-
-try:
- from urllib.error import HTTPError
-except ImportError:
- from urllib2 import HTTPError
-
-
def getWhoIs(dom):
"""Return a dictionary of whois infomation
Will throw exception if tld server not known, or query limit reached
@@ -33,25 +12,6 @@ def getWhoIs(dom):
#print(ws);
return ws.__dict__;
-class HeadRequest(Request):
- """Make a HEAD request for a given url, inherits from urllib.request.Request"""
- def get_method(self):
- return 'HEAD'
-
-def getHttpHeaders(url, redirections=True):
- """Return a dictionary of the headers for the site at url"""
- opener = OpenerDirector()
- opener.add_handler(HTTPHandler())
- opener.add_handler(HTTPDefaultErrorHandler())
- if redirections:
- # HTTPErrorProcessor makes HTTPRedirectHandler work
- opener.add_handler(HTTPErrorProcessor())
- opener.add_handler(HTTPRedirectHandler())
- res = opener.open(HeadRequest(url))
- res.close()
- return res.info().__dict__
-
-
def getIpAddr(dom):
"""Return the ip address of the domain"""
return socket.gethostbyname(dom);
View
@@ -4,6 +4,7 @@
import threading
import Queue
import codecs
+import requests
import burp.html
import burp.url
@@ -61,9 +62,17 @@ def run(self):
info = UrlInfo(url, isBad)
# okay want to isolate errors as much as possible
+ try:
+ r = requests.get(url)
+ except Exception as e:
+ print('request, %s, %s\n' % (url, e))
+
try: # html analysis
- html = burp.html.HTMLAnalyzer(url)
- html_data = html.analyze()
+ html = r.text
+ html_analyzer = HTMLAnalyzer()
+ html_analyzer.setUrl(url)
+ html_analyzer.loadHtml(html)
+ html_data = html_analyzer.analyze()
for key, value in html_data.iteritems():
info[key] = value
except Exception as e:
@@ -84,17 +93,11 @@ def run(self):
if domain == "":
domain = url #hack
try: # whois and headers
- headers = burp.url.getHttpHeaders(url)
- if "cache-control" in headers:
- info['cache_control'] = headers['cache-control']
- if "expires" in headers:
- info['expires'] = headers['expires']
- if "content-type" in headers:
- info['content_type'] = headers['content-type']
- if "server" in headers:
- info['server'] = headers['server']
- if "transfer-encoding" in headers:
- info['transfer_encoding'] = headers['transfer-encoding']
+ analysis['cache_control'] = r.headers['Cache-Control']
+ analysis['expires'] = r.headers['Expires']
+ analysis['content_type'] = r.headers['Content-Type']
+ analysis['server'] = r.headers['Server']
+ analysis['transfer_encoding'] = r.headers['Transfer-Encoding']
info['ip_address'] = burp.url.getIpAddr(domain)
View
@@ -5,16 +5,11 @@
# Usage: burp [URL]
##
-import sys, os, re, arff
+import sys, os, re, requests, arff
from subprocess import Popen, PIPE, STDOUT
from burp.html import HTMLAnalyzer
from burp.url import getTokens
-from burp.url import getHttpHeaders, getIpAddr, getWhoIs
-
-try:
- from urllib.request import URLError
-except ImportError:
- from urllib2 import URLError
+from burp.url import getIpAddr, getWhoIs
WEKA_MODEL = 'weka/J48.model'
WEKA_TRAINING_SET = 'weka/training_set.arff'
@@ -29,13 +24,22 @@ if not len(sys.argv) == 2:
##
url = sys.argv[1]
-url = url.strip(',') # saw in log file a lot
analysis = {}
+print("Retrieving web page ...")
+try:
+ r = requests.get(url)
+except Exception as e:
+ print('ERROR: Could not retrieve %s' % (url))
+ exit(1)
+
print("Analyzing HTML ...")
try: # html analysis
- html = HTMLAnalyzer(url)
- analysis = html.analyze()
+ html = r.text
+ htmlanalyzer = HTMLAnalyzer()
+ htmlanalyzer.setUrl(url)
+ htmlanalyzer.loadHtml(html)
+ analysis = htmlanalyzer.analyze()
except Exception as e:
print('ERROR: HTML Analysis, %s, %s\n' %(url, e))
exit(1)
@@ -63,17 +67,11 @@ if domain == "":
domain = url #hack
print("Analyzing HTTP headers ...")
-headers = getHttpHeaders(url)
-if headers.__contains__("cache-control"):
- analysis['cache_control'] = headers['cache-control']
-if headers.__contains__("expires"):
- analysis['expires'] = headers['expires']
-if headers.__contains__("content-type"):
- analysis['content_type'] = headers['content-type']
-if headers.__contains__("server"):
- analysis['server'] = headers['server']
-if headers.__contains__("transfer-encoding"):
- analysis['transfer_encoding'] = headers['transfer-encoding']
+analysis['cache_control'] = r.headers['Cache-Control']
+analysis['expires'] = r.headers['Expires']
+analysis['content_type'] = r.headers['Content-Type']
+analysis['server'] = r.headers['Server']
+analysis['transfer_encoding'] = r.headers['Transfer-Encoding']
ipAddress = getIpAddr(domain)
analysis['ip_address'] = ipAddress

0 comments on commit ce9f1b2

Please sign in to comment.