In [53]:
import os
import re

import numpy
import shutil
import requests
import http_parser
import xml.etree.ElementTree as ET
import html as HTML

from lxml import etree, html
from collections import Counter

try:
    from http_parser.parser import HttpParser
except ImportError:
    from http_parser.pyparser import HttpParser
    
from concurrent.futures import ThreadPoolExecutor



## Global variables

In [55]:
VAR_SEARCH_REGEXP = [
                     ur"([a-zA-Z_]\w*)\[([a-zA-Z_]\w*)*\w*\]", # array regexp
                     ur"var\s+([a-zA-Z_]\w*)",                 # var name regexp   
                     ur"([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\.*",    # class hierarchy
                     ur"([a-zA-Z_]\w*)\s*=\s*\w",              # name = value
                     ur"\w+\s*=\s*([a-zA-Z_]\w*)",             # smth = name 
                     ur'''[\"\']([a-zA-Z_]\w*)[\"\']:[\"\']\w*[\"\']''' # "name":"value"
                    ]

JAVASCRIPT_KEYWORDS = set([
                        'abstract','arguments','boolean','break','byte',
                        'case','catch','char','class*','const',
                        'continue','debugger','default','delete','do',
                        'double','else','enum*','eval','export*',
                        'extends*','false','final','finally','float',
                        'for','function','goto','if','implements',
                        'import','in','instanceof','int','interface',
                        'let','long','native','new','null',
                        'package','private','protected','public','return',
                        'short','static','super*','switch','synchronized',
                        'this','throw','throws','transient','true',
                        'try','typeof','var','void','volatile',
                        'while','with','yield'
                    ])

BURPSUITE_PROXIES = {
      'http': '127.0.0.1:8080',
      'https': '127.0.0.1:8080'
    }

CERT_FILE = "/home/ruslan/PortSwiggerCA.crt"                  # Correct path to Burp Suite crt file
                                                              # I add cacert.der to Firefox, and then  
SEVERITY_CLASSIFICATION = {                                   # import this .crt file 
    "Hight":5, 
    "Medium":3,
    "Low":1,
    "Undefined":-1
}

TEST_PAYLOAD_DICT = {
    "a87ui":"High",
    "87923":"Low",
}

COMMON_PAYLOADS_DICT = {
                        "amF2YXNjcmlwdDphbGVydCgxKTsvLy8=":"Hight",
                        "\"-alert-\"":"Hight",
                        "amF2YXNjcmlwdDphbGVydCgxKTs=":"Medium",
                        "javascript:alert(1)":"Medium",
                        "alert(1)":"Low"
                        }


## Http Classes

In [56]:
class HttpResponse:
    def __init__(self,http_plain):
        self.ResponseParser = HttpParser()
        self.ResponseParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.ResponseParser.recv_body()
        
    def GetHeaders(self):
        if self.ResponseParser.is_headers_complete():
            return dict(self.ResponseParser.get_headers())
        else:
            return dict()
        
class HttpRequest:
    def __init__(self,http_plain):
        self.RequestParser = HttpParser()
        self.RequestParser.execute(http_plain,len(http_plain))
        
    def GetContent(self):
        return self.RequestParser.recv_body()
        
    def GetHeaders(self):
        if self.RequestParser.is_headers_complete():
            return dict(self.RequestParser.get_headers())
        else:
            return dict()
        
        

## Burp Suite History Class

In [57]:
class BurpHistoryItem:
    def __init__(self,item):
        self.item = item
        
        base64_response = self.item.findall(".//response")[0].text
        self.response_plain = base64_response.decode('base64')
        self.Response = HttpResponse(self.response_plain)
        
        base64_request = self.item.findall(".//request")[0].text
        self.request_plain = base64_request.decode('base64')
        self.Request = HttpRequest(self.request_plain)
        
        
    def GetUrl(self):
        url = self.item.findall(".//url")[0]
        return url.text
    
    def GetHost(self):
        host = self.item.findall(".//host")[0]
        return host.text
    
    def GetHostIP(self):
        host = self.item.findall(".//host")[0]
        ip = host.attrib["ip"]
        return ip
    
    def GetPort(self):
        port = self.item.findall(".//port")[0]
        return int(port)
    
    def GetProtocol(self):
        protocol = self.item.findall(".//protocol")[0]
        return protocol
    
    def GetMethod(self):
        method = self.item.findall(".//method")[0]
        return method.text
    
    def GetPath(self):
        path = self.item.findall(".//path")[0]
        return path.text
    
    def GetExtension(self):
        extension = self.item.findall(".//extension")[0]
        return extension.text
    
    def GetHttpRequestData(self):
        return self.request_plain
    
    def GetRequest(self):
        return self.Request
    
    def GetStatus(self):
        status = self.item.findall(".//status")[0].text
        return int(status.text)
    
    def GetResponseLength(self):
        responselength = self.item.findall(".//responselength")[0].text
        return int(responselength)
    
    def GetResponse(self):
        return self.Response
    
    def GetMiemType(self):
        miem_type = self.item.findall(".//mimetype")[0].text
        
    def GetHttpResponseData(self):
        return self.response_plain
    
    def GetComment(self):
        comment = self.item.findall(".//comment")[0].text
        if comment != None:
            return comment
        else:
            return ""
    
        

## Utilities

In [58]:
def GetListOfHistItemsFromFile(filename):
    xmlTree = ET.parse(filename)
    xmlRoot = xmlTree.getroot()
    
    HistoryItemList = list()
    
    for item in xmlTree.findall("//item"):
        tempHistoryItem = BurpHistoryItem(item)
        HistoryItemList.append(tempHistoryItem)
    
    return HistoryItemList

In [59]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [60]:
def MatchUrl(text):
    result_urls = list()
    
    # Search for:
    # url: http://url
    # url: https://url
    # url: //url 
    
    url_regexp = ur"http[s]?:\/\/[\w\/\\\%\.\?\&\=\-]+|\/\/[\w\/\\\%\.\?\&\=\-]+"
    
    for url in re.finditer(url_regexp, text):
        result_urls.append(url.group())
    return result_urls

In [61]:
def GetUrlParams(Url): # TODO Wtf ? Delete this shit ?!
    if Url.find('?') != -1:
        return re.findall("(\?.+)",Url)[0]
    else:
        return ""

In [62]:
def EscapeString(string):
    
    HTMLEscapeTable = {
     "&": "&amp;",
     '"': "&quot;",
     "'": "&apos;",
     ">": "&gt;",
     "<": "&lt;",
     }

    return "".join(HTMLEscapeTable.get(c,c) for c in string)

In [63]:
def AddPayloadToUrl(Url,VarName,Payload):
    ResultUrl = Url
    if Url.find('?') != -1:
        ResultUrl += "&"+VarName+"="+Payload
    else:
        ResultUrl += "?"+VarName+"="+Payload
    return ResultUrl

In [64]:
def GetPostVariables(Content):
    VarRegExp = ur"([a-zA-Z_]\w*)=(\w*);?"
    DictOfVars = dict()
    for Match in re.finditer(VarRegExp,Content):
        try:
            VarName = Match.groups()[0]
            VarValue = Match.groups()[1]
            DictOfVars[VarName] = VarValue
        except:
            pass
        
    return DictOfVars

## Variable Extractor

In [65]:
def FindJSVariables(PlainHtml):
    SetOfVariables = set()
    HTMLDocument = html.fromstring(PlainHtml)
    
    PageScripts = HTMLDocument.xpath(".//script")
    VarList = list()
    
    for Script in PageScripts:
        ScriptContent = Script.text
        if ScriptContent == None:
            continue
            
        for RegExp in VAR_SEARCH_REGEXP:
            try:  
                for Matched in re.finditer(RegExp,ScriptContent):
                    for VarName in Matched.groups():
                        if False == (VarName == None):
                            if VarName not in JAVASCRIPT_KEYWORDS:
                                VarList.append(VarName)
            except:
                continue
                
    VarList = list(set(VarList))
    VarList = [item.strip(' ') for item in VarList]
    
    return VarList
    

In [66]:
def FormVarsExtractor(PlainHtml,GrabableAttributes = ['name']):
    SetOfVariables = set()
    HTMLDocument = html.fromstring(PlainHtml)
    PageInputs = HTMLDocument.xpath(".//input")
    VarList = list()

    for Input in PageInputs:
        for Attribute in GrabableAttributes:
            if Attribute in Input.attrib:
                VarList.append(Input.attrib[Attribute])
                
    VarList = list(set(VarList))
    return VarList

In [67]:
def ScanSiteFromBurpHistory(HistItem,Payloads,UseProxy=True):
    
    Url = HistItem.GetUrl()
    Headers = HistItem.GetRequest().GetHeaders()
    
    try:
        if UseProxie == True:
            Response = requests.get(Url,headers=Headers,proxies=BURPSUITE_PROXIES, verify=CERT_FILE)
        else:
            Response = requests.get(Url,headers=Headers)      
    except:
        print "Could not download requested page!\n%s"%(Url)
        return
        
    JSVariableNames = FindJSVariables(Response.text)
    FormVariableNames = FormVarsExtractor(Responce.text) 
    
    print "Scanning %s Testing method: GET\n Try %d variables and %d types of payloads"%(
                                                    Url,
                                                    len(JSVariableNames)+len(FormVariableNames),
                                                    len(Payloads))
    
    for VariableName in set(JSVariableNames + FormVariableNames):
        PayloadCounter = Counter()
        for Payload in Payloads:
            TestUrl = AddPayloadToUrl(Url,VariableName,Payload)
            try:
                if UseProxie == True:
                    Response = requests.get(TestUrl,headers=Headers,proxies=BURPSUITE_PROXIES, verify=CERT_FILE)
                else:
                    Responce = requests.get(TestUrl,headers=Headers)
            except:
                print "Failed to load %s \nPage excluded from analys list"%(TestUrl)
                
            PlainHtml = Response.text     
            PlainHtml = re.sub(re.escape(TestUrl),"",PlainHtml)
            
            TestUrlParam = GetUrlParams(TestUrl)
            PlainHtml = re.sub(re.escape(TestUrlParam),"",PlainHtml)
            
            if  PlainHtml.find(Payload) != -1:
                for Match in re.finditer(Payload,PlainHtml):
                    PayloadCounter[Payload] += 1
                    print Match.groups()
                        
                print "Variable %s contain value %s with severity %s which occur %d times\n" %(
                                                VariableName,
                                                Payload,
                                                Payloads[Payload],
                                                PayloadCounter[Payload]
                                                )
    

In [78]:
def ProcessResultConcurrent(Arguments):
    
    RequestParams = Arguments[0]
    PayloadParams = Arguments[1]
    UseProxy = Arguments[2]
    LogFile = Arguments[3]
    
    ScanUrl, Headers = RequestParams
    Payload, Severity = PayloadParams
    log = ""
    
    if UseProxy == True:
        Response = requests.get(ScanUrl,headers=Headers,proxies=BURPSUITE_PROXIES, verify=CERT_FILE)
    else:
        Response = requests.get(ScanUrl,headers=Headers)
        
    PlainHtml = Response.text     
    PlainHtml = re.sub(re.escape(ScanUrl),"",PlainHtml)
            
    # TestUrlParam = GetUrlParams(TestUrl)
    # PlainHtml = re.sub(re.escape(TestUrlParam),"",PlainHtml) # TODO Delete this shit!
    
    PayloadCounter = Counter()
    
    if  PlainHtml.find(Payload) != -1:
        for Match in re.finditer(Payload,PlainHtml):
            PayloadCounter[Payload] += 1
                        
        log += "Page %s\n Contain value %s with severity %s which occur %d times\n" %(
                                                ScanUrl,
                                                Payload,
                                                Severity,
                                                PayloadCounter[Payload]
                                                )
        with open(LogFile,'a') as f:
            f.write(log)
    
def ScanSiteItemConcurrent(MaxWorkers,HistItem,Payloads,BatchSize,UseProxy,LogFile):
    Url = HistItem.GetUrl()
    Headers = HistItem.GetRequest().GetHeaders()
    Response = HistItem.GetHttpResponseData()
    
    JSVariableNames = set(FindJSVariables(Response))
    FormVariableNames = set(FormVarsExtractor(Response))
    
    Variables = set()
    Variables = Variables.union(JSVariableNames)
    Variables = Variables.union(FormVariableNames)
    
    log = ""
    log += "Scanning %s Testing method: GET\n Try %d variables and %d types of payloads\n"%(
                                                    Url,
                                                    len(Variables),
                                                    len(Payloads))
    with open(LogFile,'a') as f:
        f.write(log)
    
    ScanningQueue = list()
    BatchCommit = []
    
    for VariableName in Variables:
        PayloadCounter = Counter()
        BatchCommit.append(VariableName)
        
        if len(BatchCommit) >= BatchSize:
            for Payload in Payloads:
                ScanURL = Url
                for BatchVarName in BatchCommit:
                    ScanURL = AddPayloadToUrl(ScanURL,BatchVarName,Payload)
        
                ScanningQueue.append([(ScanURL,Headers),(Payload,Payloads[Payload]),UseProxy,LogFile])
            BatchCommit = []
            
    with ThreadPoolExecutor(max_workers = MaxWorkers) as Executor:
        for _ in Executor.map(ProcessResultConcurrent,ScanningQueue):
            pass
    
    

## Simple vulnerabilities scanner

In [72]:
#BurpHistoryFile = os.getcwd()+"/hist"
#BurpHistItems = GetListOfHistItemsFromFile(BurpHistoryFile)
#
#for item in BurpHistItems:
#    ScanSiteFromBurpHistory(item,COMMON_PAYLOADS_DICT,UseProxie=False)


## Simple Concurrent Scanner

In [80]:
BurpHistoryFile = '/home/ruslan/icq_hist'
BurpHistItems = GetListOfHistItemsFromFile(BurpHistoryFile)

with open('/home/ruslan/icq_scan.log','w') as f:
    f.write('Scan start...\n')

for item in BurpHistItems:
    ScanSiteItemConcurrent(6,item,COMMON_PAYLOADS_DICT,BatchSize=5,UseProxy=True,LogFile='/home/ruslan/icq_scan.log')



## Test & Debug

In [None]:
response = requests.get("https://e.mail.ru")

In [None]:
VarList = list()
for j in VAR_SEARCH_REGEXP:
    for i in re.finditer(j,response.text):
        for k in i.groups():
            if False == (k == None):
                if k not in JAVASCRIPT_KEYWORDS:
                    VarList.append(k)

WordCounter = Counter()
for item in VarList:
    WordCounter[item] += 1
    
for item in WordCounter:
    print "%s : %d"%(item,WordCounter[item])
    
print len([item for item in WordCounter])


In [19]:
r = requests.get("https://search.icq.com/")
doc = html.fromstring(r.text)

In [35]:
if 'id' in doc.xpath(".//input")[0].attrib:
    print doc.xpath(".//input")[0].attrib['id']

alrt_yes


In [42]:
FormVarsExtractor(r.text)

['range', 'prtn', 'ch_id', 'search_keyword']