In [36]:
import json
import numpy as np
from arango import ArangoClient

In [2]:
file_path = "CVE_dump.json"


def load_json(file_path):
    with open(file_path, "r", encoding="utf-8") as json_file:
        data = json.load(json_file)
    return data


def add_VI_data(db, data):
    if not db.has_collection("CVE_VI"):
        collection = db.create_collection("CVE_VI")
    else:
        collection = db["CVE_VI"]
    count = 0
    for entry in data:
        entry["_key"] = entry["_id"]
        if not collection.has(entry["_key"]):
            collection.insert(entry)

In [3]:
client = ArangoClient()
BRON_db = client.db("BRON", username="root", password="changeme")
collections = BRON_db.collections()

In [None]:
data = load_json(file_path)
add_VI_data(BRON_db, data)

In [4]:
def build_vocab(db):

    aql_fetch = """
    FOR vuln IN CVE_VI
    RETURN {key: vuln._key, threat_type: vuln.threats[*].type}"""
    
    cursor = db.aql.execute(aql_fetch)

    threat_vocab = set()

    if db.has_graph('threat_graph'):
        threat_graph = db.graph('threat_graph')
    else:
        threat_graph = db.create_graph('threat_graph')

    if not db.has_collection("CVE_THREATS_VI"):
        threat_collection = db.create_collection("CVE_THREATS_VI")
    else:
        threat_collection = db["CVE_THREATS_VI"]

    if threat_graph.has_edge_definition('threat_edges'):
        threat_edges = threat_graph.edge_collection('threat_edges')
    else:
        threat_edges = threat_graph.create_edge_definition(
            edge_collection='threat_edges',
            from_vertex_collections=['CVE_VI'],
            to_vertex_collections=['CVE_THREATS_VI']
    )

    for document in cursor:
        print(document)
        for threat_type in document["threat_type"]:

            if threat_type not in threat_vocab:

                threat_vocab.add(threat_type)
                threat_doc = {"type" : threat_type, "_key" : threat_type}
                if not threat_collection.has(threat_doc["_key"]):
                    threat_collection.insert(threat_doc)

            edge = {
                "_from": "CVE_VI/" + document["key"],
                "_to": "CVE_THREATS_VI/" + threat_type
            }
            print(edge)
            threat_edges.insert(edge)

    print(threat_vocab)

        





In [81]:
def build_tags_vocab(db):

    batch_size = 1000

    aql_fetch = """
    FOR vuln IN CVE_VI
    RETURN {key: vuln._key, threat_type: vuln.threats[*].tags[*]}"""
    
    cursor = db.aql.execute(aql_fetch)

    threat_vocab = set()

    if db.has_graph('threat_type_graph'):
        threat_graph = db.graph('threat_type_graph')
    else:
        threat_graph = db.create_graph('threat_type_graph')

    if not db.has_collection("CVE_THREAT_TYPES_VI"):
        threat_collection = db.create_collection("CVE_THREAT_TYPES_VI")
    else:
        threat_collection = db["CVE_THREAT_TYPES_VI"]

    if threat_graph.has_edge_definition('threat_type_edges'):
        threat_edges = threat_graph.edge_collection('threat_type_edges')
    else:
        threat_edges = threat_graph.create_edge_definition(
            edge_collection='threat_type_edges',
            from_vertex_collections=['CVE_VI'],
            to_vertex_collections=['CVE_THREAT_TYPES_VI']
    )
        
        
    batch_edge_inserts = []
    batch_threat_inserts = []

    count = 0
    for document in cursor:
        if count % batch_size == 0:
            
            print(f"Processing count {count}")
            threat_edges.insert_many(batch_edge_inserts)
            # Batch insert threat types
            threat_collection.insert_many(batch_threat_inserts)

            # Reset the batch lists
            batch_edge_inserts = []
            batch_threat_inserts = []
        
        if len(document["threat_type"]):
            threat_types = [item for sublist in document["threat_type"] for item in sublist]
                    
        # print("threat types:" + str(threat_types))
        for threat_type in threat_types:

            if threat_type not in threat_vocab:
                threat_type = threat_type.replace(" ", "")
                threat_vocab.add(threat_type)
                threat_doc = {"type" : threat_type, "_key" : threat_type}
                # print(threat_doc)
                if not threat_collection.has(threat_doc["_key"]):
                    print("insert attempted")
                    batch_threat_inserts.append(threat_doc)

            edge = {
                "_from": "CVE_VI/" + document["key"],
                "_to": "CVE_THREAT_TYPES_VI/" + threat_type
            }
            # print(edge)
            batch_edge_inserts.append(edge)
        count += 1

    if batch_edge_inserts:
        print("Processing the final batch")
        threat_edges.insert_many(batch_edge_inserts)
    if batch_threat_inserts:
        threat_collection.insert_many(batch_threat_inserts)

        





In [82]:
build_tags_vocab(BRON_db)



Processing count 0
Processing count 1000
Processing count 2000
Processing count 3000
Processing count 4000
Processing count 5000
Processing count 6000
Processing count 7000
Processing count 8000
Processing count 9000
Processing count 10000
Processing count 11000
Processing count 12000
Processing count 13000
Processing count 14000
Processing count 15000
Processing count 16000
Processing count 17000
Processing count 18000
Processing count 19000
Processing count 20000
Processing count 21000
Processing count 22000
Processing count 23000
Processing count 24000
Processing count 25000
Processing count 26000
Processing count 27000
Processing count 28000
Processing count 29000
Processing count 30000
Processing count 31000
Processing count 32000
Processing count 33000
Processing count 34000
Processing count 35000
Processing count 36000
Processing count 37000
Processing count 38000
Processing count 39000
Processing count 40000
Processing count 41000
Processing count 42000
insert attempted
Process