In [1]:
import os
import re

import numpy
import requests
import http_parser
import xml.etree.ElementTree as ET

from lxml import etree, html
from collections import Counter

try:
    from http_parser.parser import HttpParser
except ImportError:
    from http_parser.pyparser import HttpParser


## Http Classes

In [2]:
class HttpResponse:
    def __init__(self,http_plain):
        self.ResponseParser = HttpParser()
        self.ResponseParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.ResponseParser.recv_body()
        
    def GetHeaders(self):
        if self.ResponseParser.is_headers_complete():
            return dict(self.ResponseParser.get_headers())
        else:
            return dict()
        
class HttpRequest:
    def __init__(self,http_plain):
        self.RequestParser = HttpParser()
        self.RequestParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.RequestParser.recv_body()
        
    def GetHeaders(self):
        if self.RequestParser.is_headers_complete():
            return dict(self.RequestParser.get_headers())
        else:
            return dict()
        
        

## Burp Suite History Class

In [3]:
class BurpHistoryItem:
    def __init__(self,item):
        self.item = item
        
        base64_response = self.item.findall(".//response")[0].text
        self.response_plain = base64_response.decode('base64')
        self.Response = HttpResponse(self.response_plain)
        
        base64_request = self.item.findall(".//request")[0].text
        self.request_plain = base64_request.decode('base64')
        self.Request = HttpRequest(self.request_plain)
        
        
    def GetUrl(self):
        url = self.item.findall(".//url")[0]
        return url.text
    
    def GetHost(self):
        host = self.item.findall(".//host")[0]
        return host.text
    
    def GetHostIP(self):
        host = self.item.findall(".//host")[0]
        ip = host.attrib["ip"]
        return ip
    
    def GetPort(self):
        port = self.item.findall(".//port")[0]
        return int(port)
    
    def GetProtocol(self):
        protocol = self.item.findall(".//protocol")[0]
        return protocol
    
    def GetMethod(self):
        method = self.item.findall(".//method")[0]
        return method.text
    
    def GetPath(self):
        path = self.item.findall(".//path")[0]
        return path.text
    
    def GetExtension(self):
        extension = self.item.findall(".//extension")[0]
        return extension.text
    
    def GetHttpRequestData(self):
        return http_request
    
    def GetRequest(self):
        return self.Request
    
    def GetStatus(self):
        status = self.item.findall(".//status")[0].text
        return int(status.text)
    
    def GetResponseLength(self):
        responselength = self.item.findall(".//responselength")[0].text
        return int(responselength)
    
    def GetResponse(self):
        return self.Response
    
    def GetMiemType(self):
        miem_type = self.item.findall(".//mimetype")[0].text
        
    def GetHttpResponseData(self):
        return self.response_plain
    
    def GetComment(self):
        comment = self.item.findall(".//comment")[0].text
        if comment != None:
            return comment
        else:
            return ""
    
        

## Utilities

In [4]:
def GetListOfHistItemsFromFile(filename):
    xmlTree = ET.parse(filename)
    xmlRoot = xmlTree.getroot()
    
    HistoryItemList = list()
    
    for item in xmlTree.findall("//item"):
        tempHistoryItem = BurpHistoryItem(item)
        HistoryItemList.append(tempHistoryItem)
    
    return HistoryItemList

In [5]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [6]:
def MatchUrl(text):
    result_urls = list()
    
    #Search for:
    # url: http://url
    # url: https://url
    # url: //url 
    
    url_regexp = ur"http[s]*://[0-9a-zA-z\/\\\%.\?&=-]+|//[0-9a-zA-z\/\\\%.\?&=-]+"
    
    for url in re.finditer(url_regexp, text):
        result_urls.append(url.group())
    return result_urls

In [7]:
def GetUrlParams(Url):
    if Url.find('?') != -1:
        return re.findall("(\?.+)",Url)[0]
    else:
        return ""

In [8]:
def AddPayloadToUrl(Url,VarName,Payload):
    ResultUrl = Url
    if Url.find('?') != -1:
        ResultUrl += "&"+VarName+"="+Payload
    else:
        ResultUrl += "?"+VarName+"="+Payload
    return ResultUrl

In [9]:
def GetPostVariables(Content):
    VarRegExp = ur"([a-zA-Z_]\w*)=(\w*);?"
    DictOfVars = dict()
    for Match in re.finditer(VarRegExp,Content):
        try:
            VarName = Match.groups()[0]
            VarValue = Match.groups()[1]
            DictOfVars[VarName] = VarValue
        except:
            pass
        
    return DictOfVars

In [10]:
COMMON_PAYLOADS_DICT = {
                        "a87ui":"Low",
                        "87923":"Low",
                        "<img/>":"Medium",
                        "alert(1)":"Hight",
                        "javascript:alert(1)":"Hight",
                        }

## Variable Extractor

In [11]:
VAR_SEARCH_REGEXP = [
                     ur"([a-zA-Z_]\w*)\[([a-zA-Z_]\w*)*\w*\]", # array regexp
                     ur"var\s+([a-zA-Z_]\w*)",                 # var name regexp   
                     ur"([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\.*",    # class hierarchy
                     ur"([a-zA-Z_]\w*)\s*=\s*\w",              # name = value
                     ur"\w+\s*=\s*([a-zA-Z_]\w*)",             # smth = name 
                     ur'''[\"\']([a-zA-Z_]\w*)[\"\']:[\"\']\w*[\"\']''' # "name":"value"
                    ]

JAVASCRIPT_KEYWORDS = set([
                        'abstract','arguments','boolean','break','byte',
                        'case','catch','char','class*','const',
                        'continue','debugger','default','delete','do',
                        'double','else','enum*','eval','export*',
                        'extends*','false','final','finally','float',
                        'for','function','goto','if','implements',
                        'import','in','instanceof','int','interface',
                        'let','long','native','new','null',
                        'package','private','protected','public','return',
                        'short','static','super*','switch','synchronized',
                        'this','throw','throws','transient','true',
                        'try','typeof','var','void','volatile',
                        'while','with','yield'
                    ])

def FindJSVariables(PlainHtml):
    SetOfVariables = set()
    HTMLDocument = html.fromstring(PlainHtml)
    
    PageScripts = HTMLDocument.xpath(".//script")
    VarList = list()
    
    for Script in PageScripts:
        ScriptContent = Script.text
        if ScriptContent == None:
            continue
            
        for RegExp in VAR_SEARCH_REGEXP:
            try:  
                for Matched in re.finditer(RegExp,ScriptContent):
                    for VarName in Matched.groups():
                        if False == (VarName == None):
                            if VarName not in JAVASCRIPT_KEYWORDS:
                                VarList.append(VarName)
            except:
                continue
                
    VarList = list(set(VarList))
    VarList = [item.strip(' ') for item in VarList]
    
    return VarList
    

In [12]:
BURPSUITE_PROXIES = {
      'http': '127.0.0.1:8080',
      'https': '127.0.0.1:8080'
    }

def ScanSiteFromBurpHistory(HistItem,Payloads,Proxies=BURPSUITE_PROXIES):
    
    Url = HistItem.GetUrl()
    Headers = HistItem.GetRequest().GetHeaders()
    Response = requests.get(Url,headers=Headers,proxies=Proxie)
    VariableNames = FindJSVariables(Response.text)
    
    print "Scanning %s Testing method: GET\n Try %d variables and %d types of payloads"%(
                                                    Url,
                                                    len(VariableNames),
                                                    len(Payloads))
    
    for VariableName in VariableNames:
        PayloadCounter = Counter()
        for Payload in Payloads:
            TestUrl = AddPayloadToUrl(Url,VariableName,Payload)
            try:
                Response = requests.get(TestUrl,headers=Headers,proxies=Proxie)
            except:
                print "Failed to load %s \nAbort..."%(TestUrl)
                
            PlainHtml = Response.text
                
            try:    
                PlainHtml = re.sub(re.escape(TestUrl),"",PlainHtml)
            except:
                pass
            
            try:
                TestUrlParam = GetUrlParams(TestUrl)
                PlainHtml = re.sub(re.escape(TestUrlParam),"",PlainHtml)
            except:
                pass
            
            if  PlainHtml.find(Payload) != -1:
                for Match in re.finditer(Payload):
                    PayloadCounter[Payload] += 1
                        
                print "Variable %s contain value %s with severity %s which occur %d times" %(
                                                VariableName,
                                                Payload,
                                                Payloads[Payload],
                                                PayloadCounter[Payload]
                                                )
    

## Simple vulnerabilities scanner

In [None]:
BurpHistoryFile = "/home/ruslan/MailRuSecurity/hist"
BurpHistItems = GetListOfHistItemsFromFile(BurpHistoryFile)
for item in BurpHistItems:
    ScanSiteFromBurpHistory(item,COMMON_PAYLOADS_DICT)




Scanning https://e.mail.ru/ Testing method: GET
 Try 204 variables and 5 types of payloads


## Test & Debug

In [11]:
response = requests.get("https://e.mail.ru")

v.relay
g.defer
q.authDomain
n.hideAll
this._userEmail
rlog_message
projectEmail
a.status
this._JSONPCallbackParam
this._dropdown
e.settings.multiAuthEnabled
g.__prefix
b.registerLinkClickHandler
f._triggerHideOnInit
window.__PH
window
e.authForm.hide
activeEmail
K
v.prototype.on
f._triggerShowOnInit
g.charset
b.AccountManager
ta
D
gamescnt
b.counters._counters
ya
H
\\
this._rootNode
L
b.accountManager
rnd
c._triggerShowOnInit
b.Menu
this._anim
adgin
c.current
d.games
X
g.src
_
t.timeEnd
d
this._processing
this._container
.current
s.prototype
h
this._openCounter
e.authForm.insert
b.authLinkClickHandler
l
qa
p
this._context
t
b.showQuery
document.body.style.cursor
x
page
this._baseClass
this._inited
domain
this._button
n.resetShowOnInit
b.Counters
b.expires
c.defer
this._authView.style.display
this._counters
this._xhrTimeout
this._rightClass
c.id
v.prototype
this._animResetTimeout
fail
e.settings.projectsMenuEnabled
this._node
this.__mouseOut
a.className
e.loadAccountsList
b.cssText
thi

In [27]:
VarList = list()
for j in VAR_SEARCH_REGEXP:
    for i in re.finditer(j,response.text):
        for k in i.groups():
            if False == (k == None):
                if k not in JAVASCRIPT_KEYWORDS:
                    VarList.append(k)

WordCounter = Counter()
for item in VarList:
    WordCounter[item] += 1
    
for item in WordCounter:
    print "%s : %d"%(item,WordCounter[item])
    
print len([item for item in WordCounter])


__PHEmptyCallback : 1
__domain : 1
show : 4
timeEnd : 1
va : 1
prefix : 1
_xhrTimeout : 1
isCorp : 1
X : 2
na : 1
authFormExtDomainsEnabled : 1
_authView : 1
resetShowOnInit : 1
updateLastRequest : 2
ts : 1
_rootNode : 1
window : 8
location : 2
_triggerHideOnInit : 2
mail : 1
q : 1
logoutAccount : 1
D : 3
gamescnt : 1
__mouseOut : 1
__mouseIn : 1
L : 1
__PH : 1
rnd : 40
_xhr : 1
_openCounter : 1
_hideTimeout : 2
d : 39
settings : 1
h : 32
l : 4
authMenu : 2
qa : 1
p : 3
t : 6
d1126003 : 1
x : 1
Event : 1
_counters : 5
domain : 1
right : 1
_invocations : 1
__eventsTimeout : 1
navi : 1
switchAccount : 1
back : 1
innerHTML : 2
disableIFCheck : 1
registerLinkClickHandler : 1
remove : 1
RegExp : 2
fail : 1
accountManager : 1
navigator : 2
_getXHR : 1
_updateTimeout : 1
_opts : 1
attachEvent : 1
current : 2
ra : 1
enableUpdateRadars : 1
counters : 1
ru_RU : 2
C : 2
G : 1
K : 2
Counter : 1
_JSONPCallbackParam : 1
O : 1
_isAnim : 1
_button : 1
W : 1
requestSource : 2
_container : 1
__uuid : 1


In [190]:
name_pattern = ur"[_a-zA-Z][_0-9a-zA-Z.]+"
varname_pattern = ur"(?P<variable>[_a-zA-Z][_0-9a-zA-Z.]+(\[(?P=variable)*\])*)"
jsvar_regexp = ur""  

In [191]:
re.findall(varname_pattern,"array[name[aaaa]]=")

[('array', ''), ('name', ''), ('aaaa', '')]

In [192]:
regexpression = "%s ="%(varname_pattern)
re.findall(regexpression,"array[name[aaaa]]=")

[]

In [221]:
AddPayloadToUrl( 'https://m.mail.ru/cgi-bin/signup','src','payload')

'https://m.mail.ru/cgi-bin/signup?src=payload'