In [1]:
# Default imports
import numpy as np
from numpy import random as rnd
from matplotlib import pyplot as plt 
import os,sys,datetime,time,math,itertools,warnings

# Pandas import
import pandas as pd 

# Neo4j import
from neo4j import GraphDatabase

In [345]:
# Empty query template to write a new entity to the Knowledge Graph
query_template = '''
    CREATE (s:DQ_Standard{{
        flag_name:\'{v_flag_name}\',
        definition_freetext:\'{v_def_text}\',
        definition_sql:\'{v_def_sql}\',
        flag_target_row_identifier_field:\'{v_row_id}\',
        violation_severity:{v_violation_severity},
        record_effective_date:datetime(),
        record_end_date:datetime(\'9999-12-31T00:00:00\')
    }})
    MERGE (m:ETL_Module{{
        name:\'{v_etl_name}\',
        etl_zone:\'{v_etl_zone}\',
        link:\'{v_etl_link}\',
        etl_schema:\'{v_etl_schema}\'
    }})
    MERGE (t:EDM_Table{{
        TABLE_CATALOG:\'{v_table_catalog}\',
        TABLE_SCHEMA:\'{v_table_schema}\',
        TABLE_NAME:\'{v_table_name}\'
    }})
    MERGE (e1:employee{{name:\'{v_empname_owner}\'}})
    MERGE (e2:employee{{name:\'{v_empname_wfowner}\'}})
    MERGE (di:DQ_Dimension{{name:\'{v_dq_dimension}\'}})
    MERGE (do:Domain{{name:\'{v_domain}\'}})
    CREATE (s)-[indom:IN_DOMAIN]->(do)
    CREATE (s)-[catas:CATEGORIZED_AS]->(di)
    CREATE (s)-[repin:EDM_REPORTED_IN]->(t)
    CREATE (s)-[calcas:CALCULATED_IN]->(m)
    CREATE (e1)-[owner:STANDARD_OWNER]->(s)
    CREATE (e2)-[wf_owner:WORKFLOW_MODIFICATION_OWNER]->(s)
    RETURN s
'''

In [2]:
# Path configuration
db_uri = r'neo4j+ssc://8ce1cc2b.databases.neo4j.io:7687'
cred_link = r'C:\\Users\\z003mxpm\\Desktop\\neo4j.txt'
flags_excel = r'C:\\Users\\z003mxpm\\Desktop\\DQ Flags in CDC - working file.xlsx'

# Connect to credentials file on local machine and store to credentials tuple
db_auth = ()
with open(cred_link,'r') as f:
    line = f.readline()
    while len(line)>0:
        if line[0]=='0':
            db_auth += (line.split(':')[-1].strip(),)
        if line[0]=='1':
            db_auth += (line.split(':')[-1].strip(),)
        line = f.readline()

# Create Neo4j driver and interrupt in case of error
driver = GraphDatabase.driver(db_uri,auth=db_auth)
driver.verify_connectivity()

# Use driver to create a session
session = driver.session()

In [347]:
# Read Excel with stored flag info
ff = pd.read_excel(io=flags_excel,sheet_name='Test Set')

# Determine flags already present in knowledge graph
blocked_flags_qresult = session.run(query='MATCH (s:DQ_Standard) RETURN DISTINCT s.flag_name')
blocked_flagnames = [k.data()['s.flag_name'] for k in list(blocked_flags_qresult)]

# Filter out already present flags
ff = ff[~ff['Flag Name'].isin(blocked_flagnames)]

In [4]:
blocked_flags_qresult

NameError: name 'blocked_flags_qresult' is not defined

In [350]:
# Iterate all rows
for key,item in ff.iterrows():
    # Assemble row query
    query = query_template.format(
        # DQ Standard Config
        v_flag_name=item['Flag Name'],
        v_def_text=item['Business Definition'].replace('\'','\\\''),
        v_def_sql=item['SQL Definition'].replace('\'','\\\''),
        v_row_id=item['Row Identifier'],
        v_violation_severity='2',
        # ETL Module Config
        v_etl_name=item['Calculation ETL Name'],
        v_etl_zone=item['Calculation ETL Zone'],
        v_etl_link=item['Calculation WF Link'],
        v_etl_schema=item['Calculation ETL Schema'],
        # EDM Table Config
        v_table_catalog=item['Table Catalog'],
        v_table_schema=item['Table Schema'],
        v_table_name=item['Table'],
        # Details Config
        v_empname_owner=item['Owner'],
        v_empname_wfowner=item['WF Mod Owner'],
        v_dq_dimension=item['Dimension'],
        v_domain=item['Domain']
    )
     # Run the query
    result = session.run(query=query)

In [351]:
# Close driver and session
session.close()
driver.close()