In [1]:
#CONFIGURATION ----------------------------------------------------------------------------------------------------------

DATABASE_URI="bolt://localhost:7687";
USER="neo4j";
DB_NAME="migrateddatabase";
PASSWORD="root";

#------------------------------------------------------------------------------------------------------------------------

# Notebook 3: Compute path properties

Third part of the migration procedure: The defined properties are now computed.

#


In [2]:
#Neo4j connector class

from neo4j import GraphDatabase
# From https://neo4j.com/developer/python/

class NeoConnector:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        self.__error_counter=0
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            self.__error_counter=self.__error_counter+1
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
    def get_error_counter(self):
        return self.__error_counter;
    def query(self, query, db=DB_NAME):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            self.__error_counter=self.__error_counter+1
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
connector=NeoConnector(uri=DATABASE_URI,user=USER,pwd=PASSWORD);
 

Setting to 0 all properties:

In [4]:
#Resetting weights:
connector.query("MATCH (n) SET n._ws_acc_path=0")
connector.query("MATCH (n) SET n._ws_res_path=0")

connector.query("MATCH (n) SET n._md_acc_path=0")
connector.query("MATCH (n) SET n._md_res_path=0")

 
connector.query("MATCH (n) SET n._ws_acc_dep_all=0")
connector.query("MATCH (n) SET n._ws_res_dep_all=0")

connector.query("MATCH (n) SET n._md_acc_dep_all=0")
connector.query("MATCH (n) SET n._md_res_dep_all=0")


connector.query("MATCH (n) SET n._ws_acc_dep_atleastone=0")
connector.query("MATCH (n) SET n._ws_res_dep_atleastone=0")

connector.query("MATCH (n) SET n._md_acc_dep_atleastone=0")
connector.query("MATCH (n) SET n._md_res_dep_atleastone=0")



[]

Count number of MAIL_DOMAIN paths for each node:

In [5]:
 
mail_domains=connector.query("match (n:MAIL_DOMAIN) RETURN n.name AS name LIMIT 40");

for index,mail_domain in enumerate(mail_domains):
  print("Processing maiL_domain "+str(index)+ " / "+str(len(mail_domains)))
  res=connector.query("""match (d:MAIL_DOMAIN) 
      MATCH (end:AUTONOMOUS_SYSTEM)
      WHERE d.name=\""""+mail_domain["name"]+"""\"
      CALL apoc.path.expandConfig(d, {relationshipFilter: "BELONG>|CNAME>|COMPOSED_BY>|LOCATED>|MANAGED_BY>|MAPPED_IN>|PARENT>",minLevel:1, terminatorNodes: [end],uniqueness:'NODE_PATH'})
      YIELD path
      WITH d as root,collect(path) as paths, apoc.coll.toSet(apoc.coll.flatten(collect(nodes(path)))) as nodes

      FOREACH (n IN nodes| SET n.cnt_buffer_access=0,n.cnt_buffer_resolution=0)

      WITH root, paths, nodes

      UNWIND paths as path
            WITH *,any(node IN NODES(path) WHERE any(label in LABELS(node) WHERE label="NAME_SERVER")) AS isRes
            FOREACH (n IN NODES(path)|
                  SET n._md_acc_path = CASE isRes WHEN false THEN  n._md_acc_path+1 ELSE n._md_acc_path END,
                      n.cnt_buffer_access = CASE isRes WHEN false THEN  n.cnt_buffer_access+1 ELSE n.cnt_buffer_access END,
                      n._md_res_path= CASE isRes WHEN true THEN  n._md_res_path+1 ELSE n._md_res_path END,
                      n.cnt_buffer_resolution= CASE isRes WHEN true THEN  n.cnt_buffer_resolution+1 ELSE n.cnt_buffer_resolution END
            )

            WITH *, size(NODES(path)) AS length

      WITH root, nodes, collect(path) AS paths, collect(length) as lengths
            
      FOREACH (n IN nodes|
         SET n._md_res_dep_all= CASE WHEN n.cnt_buffer_resolution=root._md_res_path AND NOT(n.cnt_buffer_resolution=0) 
                                 THEN n._md_res_dep_all+1
                                 ELSE n._md_res_dep_all
                            END
         SET n._md_acc_dep_all= CASE WHEN  n.cnt_buffer_access=root._md_acc_path AND NOT (n.cnt_buffer_access=0) 
                                 THEN n._md_acc_dep_all+1
                                 ELSE n._md_acc_dep_all
                            END


         SET n._md_res_dep_atleastone= CASE WHEN NOT(n.cnt_buffer_resolution=0) 
                                     THEN n._md_res_dep_atleastone+1
                                     ELSE n._md_res_dep_atleastone
                                END
         SET n._md_acc_dep_atleastone= CASE WHEN NOT(n.cnt_buffer_access=0) 
                                     THEN n._md_acc_dep_atleastone+1
                                     ELSE n._md_acc_dep_atleastone
                                END
      )

      FOREACH (n IN nodes| REMOVE n.cnt_buffer_access,n.cnt_buffer_resolution)

      RETURN lengths
      
      
      """);
 

Processing maiL_domain 0 / 40
Processing maiL_domain 1 / 40
Processing maiL_domain 2 / 40
Processing maiL_domain 3 / 40
Processing maiL_domain 4 / 40
Processing maiL_domain 5 / 40
Processing maiL_domain 6 / 40
Processing maiL_domain 7 / 40
Processing maiL_domain 8 / 40
Processing maiL_domain 9 / 40
Processing maiL_domain 10 / 40
Processing maiL_domain 11 / 40
Processing maiL_domain 12 / 40
Processing maiL_domain 13 / 40
Processing maiL_domain 14 / 40
Processing maiL_domain 15 / 40
Processing maiL_domain 16 / 40
Processing maiL_domain 17 / 40
Processing maiL_domain 18 / 40
Processing maiL_domain 19 / 40
Processing maiL_domain 20 / 40
Processing maiL_domain 21 / 40
Processing maiL_domain 22 / 40
Processing maiL_domain 23 / 40
Processing maiL_domain 24 / 40
Processing maiL_domain 25 / 40
Processing maiL_domain 26 / 40
Processing maiL_domain 27 / 40
Processing maiL_domain 28 / 40
Processing maiL_domain 29 / 40
Processing maiL_domain 30 / 40
Processing maiL_domain 31 / 40
Processing maiL_do

Count number of WEB_SITE paths for each node:

In [6]:
 
web_sites=connector.query("match (n:WEB_SITE) RETURN n.name AS name");

for index,web_site in enumerate(web_sites):
  connector.query("""match (d:WEB_SITE) 
      MATCH (end:AUTONOMOUS_SYSTEM)
      WHERE d.name=\""""+web_site["name"]+"""\"
      CALL apoc.path.expandConfig(d, {relationshipFilter: "BELONG>|CNAME>|COMPOSED_BY>|LOCATED>|MANAGED_BY>|MAPPED_IN>|PARENT>",minLevel:1,  terminatorNodes: [end],uniqueness:'NODE_PATH'})
      YIELD path
      WITH d as root,collect(path) as paths, apoc.coll.toSet(apoc.coll.flatten(collect(nodes(path)))) as nodes

      FOREACH (n IN nodes| SET n.cnt_buffer_access=0,n.cnt_buffer_resolution=0)

      WITH root, paths, nodes

      UNWIND paths as path
            WITH *,any(node IN NODES(path) WHERE any(label in LABELS(node) WHERE label="NAME_SERVER")) AS isRes
            FOREACH (n IN NODES(path)|
                  SET n._ws_acc_path = CASE isRes WHEN false THEN  n._ws_acc_path+1 ELSE n._ws_acc_path END,
                      n.cnt_buffer_access = CASE isRes WHEN false THEN  n.cnt_buffer_access+1 ELSE n.cnt_buffer_access END,
                      n._ws_res_path= CASE isRes WHEN true THEN  n._ws_res_path+1 ELSE n._ws_res_path END,
                      n.cnt_buffer_resolution= CASE isRes WHEN true THEN  n.cnt_buffer_resolution+1 ELSE n.cnt_buffer_resolution END
            )
      WITH root, nodes, collect(path) AS paths
            
      FOREACH (n IN nodes|
         SET n._ws_res_dep_all= CASE WHEN n.cnt_buffer_resolution=root._ws_res_path AND NOT(n.cnt_buffer_resolution=0) 
                                 THEN n._ws_res_dep_all+1
                                 ELSE n._ws_res_dep_all
                            END
         SET n._ws_acc_dep_all= CASE WHEN  n.cnt_buffer_access=root._ws_acc_path AND NOT (n.cnt_buffer_access=0) 
                                 THEN n._ws_acc_dep_all+1
                                 ELSE n._ws_acc_dep_all
                            END
         SET n._ws_res_dep_atleastone= CASE WHEN NOT(n.cnt_buffer_resolution=0) 
                                     THEN n._ws_res_dep_atleastone+1
                                     ELSE n._ws_res_dep_atleastone
                                END
         SET n._ws_acc_dep_atleastone= CASE WHEN NOT(n.cnt_buffer_access=0) 
                                     THEN n._ws_acc_dep_atleastone+1
                                     ELSE n._ws_acc_dep_atleastone
                                END
      )

      FOREACH (n IN nodes| REMOVE n.cnt_buffer_access,n.cnt_buffer_resolution)""");
      
  print("Processed web_site "+web_site["name"]+" [number: "+str(index)+"]");

Set total properties:

In [7]:
connector.query("MATCH (n) SET n._ws_path=n._ws_acc_path+n._ws_res_path");
connector.query("MATCH (n) SET n._md_path=n._md_acc_path+n._md_res_path");
connector.query("MATCH (n) SET n._ws_dep_all=n._ws_acc_dep_all+n._ws_res_dep_all");
connector.query("MATCH (n) SET n._md_dep_all=n._md_acc_dep_all+n._md_res_dep_all");

Numero di path calcolati per il database:

In [8]:
acc_path=connector.query("MATCH (n) WHERE n:WEB_SITE OR n:MAIL_DOMAIN RETURN SUM(n._ws_acc_path+n._md_acc_path) AS acc_path");
res_path=connector.query("MATCH (n) WHERE n:WEB_SITE OR n:MAIL_DOMAIN RETURN SUM(n._ws_res_path+n._md_res_path) AS res_path");

In [9]:
print(acc_path);
print(res_path);

[<Record acc_path=62>]
[<Record res_path=6369>]
