In [1]:

%pprint

Pretty printing has been turned OFF



---
# Load needed libraries and functions

In [2]:

from datetime import datetime
import humanize
import sys
import time

# Insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../py')

In [3]:

t0 = time.time()

# Get the Neo4j driver
from storage import Storage
s = Storage()

from ha_utils import HeaderAnalysis
ha = HeaderAnalysis(s=s, verbose=False)

from scrape_utils import WebScrapingUtilities
wsu = WebScrapingUtilities(s=s)
uri = wsu.secrets_json['neo4j']['connect_url']
user =  wsu.secrets_json['neo4j']['username']
password = wsu.secrets_json['neo4j']['password']

# Get the neo4j object
from cypher_utils import CypherUtilities
cu = CypherUtilities(uri=uri, user=user, password=password, driver=None, s=s, ha=ha)

duration_str = humanize.precisedelta(time.time() - t0, minimum_unit='seconds', format='%0.0f')
print(f'Utility libraries created in {duration_str}')
print(f'Last run on {datetime.now()}')

Utility libraries created in 1 second
Last run on 2023-03-07 16:09:28.797280


In [6]:

def generate_child_strs(verbose=False):
    def do_cypher_tx(tx):
        cypher_str = '''
            // Find all NavigableParents nodes in the graph
            MATCH (np:NavigableParents)

            // That do not have any parts-of-speech symbol
            WHERE
                (np.is_header = 'True')
                AND NOT (
                    (np.is_task_scope = 'True')
                    OR (np.is_minimum_qualification = 'True')
                    OR (np.is_preferred_qualification = 'True')
                    OR (np.is_legal_notification = 'True')
                    OR (np.is_job_title = 'True')
                    OR (np.is_office_location = 'True')
                    OR (np.is_job_duration = 'True')
                    OR (np.is_supplemental_pay = 'True')
                    OR (np.is_educational_requirement = 'True')
                    OR (np.is_interview_procedure = 'True')
                    OR (np.is_corporate_scope = 'True')
                    OR (np.is_posting_date = 'True')
                    OR (np.is_other = 'True')
                    )

            // Return the navigable parent
            RETURN np.navigable_parent AS navigable_parent;'''
        results_list = tx.run(query=cypher_str, parameters={})

        return [dict(record.items()) for record in results_list]
    with cu.driver.session() as session:
        row_objs_list = session.write_transaction(do_cypher_tx)
    if verbose:
        print(len(row_objs_list))
    for row_obj in row_objs_list:
        if 'navigable_parent' in row_obj:
            yield row_obj['navigable_parent']

In [7]:

CHILD_STRS_LIST = list(generate_child_strs(verbose=False))
len(CHILD_STRS_LIST)

8

In [8]:

ZMQInteractiveShell_obj = get_ipython()
def get_cypher_code():
    output_str = ''
    tag_str = CHILD_STRS_LIST.pop()
    output_str += f'\n# {len(CHILD_STRS_LIST):,} to go\n'
    if "'" in tag_str:
        tag_str = tag_str.replace('"', '\\"')
        output_str += f'child_str = "{tag_str}"\n'
    else:
        output_str += f"child_str = '{tag_str}'\n"
    output_str += "def do_cypher_tx(tx, navigable_parent, verbose=False):\n"
    output_str += "    cypher_str = '''\n"
    output_str += "        MATCH (np:NavigableParents {navigable_parent: $navigable_parent})\n"
    output_str += "        SET\n"
    output_str += "            np.is_header = 'True',\n"
    output_str += "            np.is_task_scope = 'False',\n"
    output_str += "            np.is_minimum_qualification = 'False',\n"
    output_str += "            np.is_preferred_qualification = 'False',\n"
    output_str += "            np.is_educational_requirement = 'False',\n"
    output_str += "            np.is_legal_notification = 'False',\n"
    output_str += "            np.is_other = 'False',\n"
    output_str += "            np.is_corporate_scope = 'False',\n"
    output_str += "            np.is_job_title = 'False',\n"
    output_str += "            np.is_office_location = 'False',\n"
    output_str += "            np.is_job_duration = 'False',\n"
    output_str += "            np.is_supplemental_pay = 'False',\n"
    output_str += "            np.is_interview_procedure = 'False',\n"
    output_str += "            np.is_posting_date = 'False'\n"
    output_str += "        ''' + cu.return_everything_str + ';'\n"
    output_str += "    results_list = tx.run(query=cypher_str, parameters={'navigable_parent': navigable_parent})\n"
    output_str += "    \n"
    output_str += "    return [dict(record.items()) for record in results_list]\n"
    output_str += "with cu.driver.session() as session:\n"
    output_str += "    row_objs_list = session.write_transaction(do_cypher_tx, navigable_parent=child_str, verbose=False)\n"
    output_str += "row_objs_list"
    
    return output_str

In [None]:

ZMQInteractiveShell_obj.set_next_input(text=get_cypher_code(), replace=True)

In [25]:

ZMQInteractiveShell_obj.set_next_input(text=get_cypher_code(), replace=True)

IndexError: pop from empty list