In [22]:
import json
import subprocess

data = {}

banner_keys = {
    "ftp": [("banner",)],
    "smtp": [("banner",)],
    "telnet": [("banner",)],
    "ssh": [("server_id","raw")],
    "http": [("response","body"), ("response", "server")],
    "tls": [("handshake_log", "server_certificates", "certificate", "parsed", "subject")],
}

manual_labels = {
    "fortinet": "fortinet",
    "fortigate": "fortinet",
    "ios-self-signed-": "cisco",
    "cisco": "cisco",
    "cscoe": "cisco",
    "user access verification": "cisco",
    "mikrotik": "mikrotik",
    "gw-ff1-mx960": "juniper",
    "global-protect": "palo alto"
}

import operator
from functools import reduce

# def testRecog(banner):
#     cmd = "./recog-go/recog_match ./recog/xml \"" + banner + "\""
#     output = !{cmd}
#     return output
    
def getLabel(banner):
    for l in manual_labels.keys():
        if l in str(banner).lower():
            #print(testRecog(banner[0]))
            return manual_labels[l]
    return None

def getFromDict(dataDict, keyList):
    try:
        return reduce(operator.getitem, keyList, dataDict)
    except KeyError:
        return None

import sys
import csv

def labels_from_zgrab(zgrab_file, csvwriter):
    labels = []
    with open(zgrab_file) as f:
        for line in f:
            o = json.loads(line)
            ip = o["ip"]
            if ip not in data:
                data[ip] = {}
            if "data" not in o:
                continue
            scans = o["data"].values()
            for scan in scans:
                protocol = scan["protocol"]
                status = scan["status"]
                if status != "success": continue
                paths = banner_keys[protocol]
                banners = [getFromDict(scan["result"], path) for path in paths]
                label = getLabel(banners)
                if label is not None:
                    labels.append((ip, protocol, label))#, json.dumps(banners)))
                data[ip][protocol] = (label, json.dumps(banners))
                if label is not None:
                    csvwriter.writerow([ip, protocol, label, json.dumps(banners)])
    return labels


In [24]:
files = [
    "zgrab-az.jsonl",
    "zgrab-kz.jsonl",
    "zgrab-by.jsonl",
    #"zgrab-http.jsonl",
]
with open("labels.csv", "w") as f:
    writer = csv.writer(f)
    for file in files:
        print(file)
        banners = labels_from_zgrab(file, writer)
        print(banners)
#print(data)

zgrab-az.jsonl
[('185.26.184.254', 'ssh', 'cisco'), ('185.161.224.119', 'http', 'palo alto')]
zgrab-kz.jsonl
[('212.154.128.86', 'ftp', 'mikrotik'), ('212.154.128.86', 'http', 'mikrotik'), ('89.218.26.220', 'tls', 'fortinet'), ('89.218.26.220', 'http', 'fortinet'), ('212.154.128.86', 'telnet', 'mikrotik'), ('92.46.122.10', 'http', 'fortinet'), ('89.218.39.78', 'http', 'fortinet'), ('31.31.217.178', 'http', 'fortinet')]
zgrab-by.jsonl
[('93.125.42.250', 'http', 'mikrotik')]
