In [35]:
import os
import re

import numpy
import requests
import http_parser
import xml.etree.ElementTree as ET

from lxml import etree, html
from collections import Counter

try:
    from http_parser.parser import HttpParser
except ImportError:
    from http_parser.pyparser import HttpParser


## Global variables

In [54]:
VAR_SEARCH_REGEXP = [
                     ur"([a-zA-Z_]\w*)\[([a-zA-Z_]\w*)*\w*\]", # array regexp
                     ur"var\s+([a-zA-Z_]\w*)",                 # var name regexp   
                     ur"([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\.*",    # class hierarchy
                     ur"([a-zA-Z_]\w*)\s*=\s*\w",              # name = value
                     ur"\w+\s*=\s*([a-zA-Z_]\w*)",             # smth = name 
                     ur'''[\"\']([a-zA-Z_]\w*)[\"\']:[\"\']\w*[\"\']''' # "name":"value"
                    ]

JAVASCRIPT_KEYWORDS = set([
                        'abstract','arguments','boolean','break','byte',
                        'case','catch','char','class*','const',
                        'continue','debugger','default','delete','do',
                        'double','else','enum*','eval','export*',
                        'extends*','false','final','finally','float',
                        'for','function','goto','if','implements',
                        'import','in','instanceof','int','interface',
                        'let','long','native','new','null',
                        'package','private','protected','public','return',
                        'short','static','super*','switch','synchronized',
                        'this','throw','throws','transient','true',
                        'try','typeof','var','void','volatile',
                        'while','with','yield'
                    ])

BURPSUITE_PROXIES = {
      'http': '127.0.0.1:8080',
      'https': '127.0.0.1:8080'
    }

CERT_FILE = os.getcwd()+"/PortSwiggerCA.crt"                  # Correct path to Burp Suite crt file
                                                              # I add cacert.der to Firefox, and then  
SEVERITY_CLASSIFICATION = {                                   # import this .crt file 
    "Hight":5, 
    "Medium":3,
    "Low":1
}                                                             

COMMON_PAYLOADS_DICT = {
                        "a87ui":"Low",
                        "87923":"Low",
                        "<img/>":"Medium",
                        "alert(1)":"Hight",
                        "javascript:alert(1)":"Hight",
                        }


## Http Classes

In [37]:
class HttpResponse:
    def __init__(self,http_plain):
        self.ResponseParser = HttpParser()
        self.ResponseParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.ResponseParser.recv_body()
        
    def GetHeaders(self):
        if self.ResponseParser.is_headers_complete():
            return dict(self.ResponseParser.get_headers())
        else:
            return dict()
        
class HttpRequest:
    def __init__(self,http_plain):
        self.RequestParser = HttpParser()
        self.RequestParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.RequestParser.recv_body()
        
    def GetHeaders(self):
        if self.RequestParser.is_headers_complete():
            return dict(self.RequestParser.get_headers())
        else:
            return dict()
        
        

## Burp Suite History Class

In [38]:
class BurpHistoryItem:
    def __init__(self,item):
        self.item = item
        
        base64_response = self.item.findall(".//response")[0].text
        self.response_plain = base64_response.decode('base64')
        self.Response = HttpResponse(self.response_plain)
        
        base64_request = self.item.findall(".//request")[0].text
        self.request_plain = base64_request.decode('base64')
        self.Request = HttpRequest(self.request_plain)
        
        
    def GetUrl(self):
        url = self.item.findall(".//url")[0]
        return url.text
    
    def GetHost(self):
        host = self.item.findall(".//host")[0]
        return host.text
    
    def GetHostIP(self):
        host = self.item.findall(".//host")[0]
        ip = host.attrib["ip"]
        return ip
    
    def GetPort(self):
        port = self.item.findall(".//port")[0]
        return int(port)
    
    def GetProtocol(self):
        protocol = self.item.findall(".//protocol")[0]
        return protocol
    
    def GetMethod(self):
        method = self.item.findall(".//method")[0]
        return method.text
    
    def GetPath(self):
        path = self.item.findall(".//path")[0]
        return path.text
    
    def GetExtension(self):
        extension = self.item.findall(".//extension")[0]
        return extension.text
    
    def GetHttpRequestData(self):
        return http_request
    
    def GetRequest(self):
        return self.Request
    
    def GetStatus(self):
        status = self.item.findall(".//status")[0].text
        return int(status.text)
    
    def GetResponseLength(self):
        responselength = self.item.findall(".//responselength")[0].text
        return int(responselength)
    
    def GetResponse(self):
        return self.Response
    
    def GetMiemType(self):
        miem_type = self.item.findall(".//mimetype")[0].text
        
    def GetHttpResponseData(self):
        return self.response_plain
    
    def GetComment(self):
        comment = self.item.findall(".//comment")[0].text
        if comment != None:
            return comment
        else:
            return ""
    
        

## Utilities

In [39]:
def GetListOfHistItemsFromFile(filename):
    xmlTree = ET.parse(filename)
    xmlRoot = xmlTree.getroot()
    
    HistoryItemList = list()
    
    for item in xmlTree.findall("//item"):
        tempHistoryItem = BurpHistoryItem(item)
        HistoryItemList.append(tempHistoryItem)
    
    return HistoryItemList

In [40]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [41]:
def MatchUrl(text):
    result_urls = list()
    
    #Search for:
    # url: http://url
    # url: https://url
    # url: //url 
    
    url_regexp = ur"http[s]*://[0-9a-zA-z\/\\\%.\?&=-]+|//[0-9a-zA-z\/\\\%.\?&=-]+"
    
    for url in re.finditer(url_regexp, text):
        result_urls.append(url.group())
    return result_urls

In [42]:
def GetUrlParams(Url):
    if Url.find('?') != -1:
        return re.findall("(\?.+)",Url)[0]
    else:
        return ""

In [43]:
def AddPayloadToUrl(Url,VarName,Payload):
    ResultUrl = Url
    if Url.find('?') != -1:
        ResultUrl += "&"+VarName+"="+Payload
    else:
        ResultUrl += "?"+VarName+"="+Payload
    return ResultUrl

In [44]:
def GetPostVariables(Content):
    VarRegExp = ur"([a-zA-Z_]\w*)=(\w*);?"
    DictOfVars = dict()
    for Match in re.finditer(VarRegExp,Content):
        try:
            VarName = Match.groups()[0]
            VarValue = Match.groups()[1]
            DictOfVars[VarName] = VarValue
        except:
            pass
        
    return DictOfVars

## Variable Extractor

In [45]:
def FindJSVariables(PlainHtml):
    SetOfVariables = set()
    HTMLDocument = html.fromstring(PlainHtml)
    
    PageScripts = HTMLDocument.xpath(".//script")
    VarList = list()
    
    for Script in PageScripts:
        ScriptContent = Script.text
        if ScriptContent == None:
            continue
            
        for RegExp in VAR_SEARCH_REGEXP:
            try:  
                for Matched in re.finditer(RegExp,ScriptContent):
                    for VarName in Matched.groups():
                        if False == (VarName == None):
                            if VarName not in JAVASCRIPT_KEYWORDS:
                                VarList.append(VarName)
            except:
                continue
                
    VarList = list(set(VarList))
    VarList = [item.strip(' ') for item in VarList]
    
    return VarList
    

In [52]:
def ScanSiteFromBurpHistory(HistItem,Payloads,UseProxie=True):
    
    Url = HistItem.GetUrl()
    Headers = HistItem.GetRequest().GetHeaders()
    
    #try:
    if UseProxie == True:
         Response = requests.get(Url,headers=Headers,proxies=BURPSUITE_PROXIES, verify=CERT_FILE)
    else:
        Responce = requests.get(Url,headers=Headers)
    #except:
    #    print "Could not download requested page!\n%s"%(Url)
    #    return
        
    VariableNames = FindJSVariables(Response.text)
    
    print "Scanning %s Testing method: GET\n Try %d variables and %d types of payloads"%(
                                                    Url,
                                                    len(VariableNames),
                                                    len(Payloads))
    
    for VariableName in VariableNames:
        PayloadCounter = Counter()
        for Payload in Payloads:
            TestUrl = AddPayloadToUrl(Url,VariableName,Payload)
            try:
                if UseProxie == True:
                    Response = requests.get(Url,headers=Headers,proxies=BURPSUITE_PROXIES, verify=CERT_FILE)
                else:
                    Responce = requests.get(Url,headers=Headers)
            except:
                print "Failed to load %s \nPage excluded from analys list"%(TestUrl)
                
            PlainHtml = Response.text     
            PlainHtml = re.sub(re.escape(TestUrl),"",PlainHtml)
            
            TestUrlParam = GetUrlParams(TestUrl)
            PlainHtml = re.sub(re.escape(TestUrlParam),"",PlainHtml)
            
            if  PlainHtml.find(Payload) != -1:
                for Match in re.finditer(Payload):
                    PayloadCounter[Payload] += 1
                        
                print "Variable %s contain value %s with severity %s which occur %d times" %(
                                                VariableName,
                                                Payload,
                                                Payloads[Payload],
                                                PayloadCounter[Payload]
                                                )
    

## Simple vulnerabilities scanner

In [None]:
BurpHistoryFile = os.getcwd()+"/hist"
BurpHistItems = GetListOfHistItemsFromFile(BurpHistoryFile)
for item in BurpHistItems:
    ScanSiteFromBurpHistory(item,COMMON_PAYLOADS_DICT)




Scanning https://e.mail.ru/ Testing method: GET
 Try 430 variables and 5 types of payloads


## Test & Debug

In [None]:
response = requests.get("https://e.mail.ru")

In [None]:
VarList = list()
for j in VAR_SEARCH_REGEXP:
    for i in re.finditer(j,response.text):
        for k in i.groups():
            if False == (k == None):
                if k not in JAVASCRIPT_KEYWORDS:
                    VarList.append(k)

WordCounter = Counter()
for item in VarList:
    WordCounter[item] += 1
    
for item in WordCounter:
    print "%s : %d"%(item,WordCounter[item])
    
print len([item for item in WordCounter])


In [190]:
name_pattern = ur"[_a-zA-Z][_0-9a-zA-Z.]+"
varname_pattern = ur"(?P<variable>[_a-zA-Z][_0-9a-zA-Z.]+(\[(?P=variable)*\])*)"
jsvar_regexp = ur""  

In [191]:
re.findall(varname_pattern,"array[name[aaaa]]=")

[('array', ''), ('name', ''), ('aaaa', '')]

In [192]:
regexpression = "%s ="%(varname_pattern)
re.findall(regexpression,"array[name[aaaa]]=")

[]

In [221]:
AddPayloadToUrl( 'https://m.mail.ru/cgi-bin/signup','src','payload')

'https://m.mail.ru/cgi-bin/signup?src=payload'

In [None]:
response = requests.get('https://e.mail.ru',proxies=BURPSUITE_PROXIES, verify=CERT_FILE)