Convert the Physics Derivation Graph's v7 JSON format to Neo4j Cypher

To load the data into ui_v8,

```bash
docker exec `docker ps | grep community | cut -d' ' -f1` bin/cypher-shell --file dumping_grounds/symbols.cypher 
docker exec `docker ps | grep community | cut -d' ' -f1` bin/cypher-shell --file dumping_grounds/infrules.cypher 
```

In [1]:
import json
#import pandas
import time
import re
import random

In [2]:
SYMBOL_PREFIX="000"
INFRULE_PREFIX="111"
OPERATOR_PREFIX="2222"

random.seed(10)

# data

In [3]:
# from v7_pickle_web_interface/flask/
with open('data.json','r') as file_handle:
    data = json.loads(file_handle.read())

In [4]:
data.keys()

dict_keys(['derivations', 'expr local to global', 'expressions', 'inference rules', 'measures', 'operators', 'symbols', 'units'])

# expressions [done]

In [5]:
my_str = "symb('pdg1939') and another symb('pdg9991') for"

result = re.sub(r"pdg(\d\d\d\d)", r"pdg"+SYMBOL_PREFIX+"\\1", my_str)

print(result)

symb('pdg0001939') and another symb('pdg0009991') for


In [6]:
str_to_prnt = ""
for expr_ID, expr_dict in data['expressions'].items():

    expr_dict['latex'] = expr_dict['latex'].replace(r"\frac",r"\\frac")
    expr_dict['latex'] = expr_dict['latex'].replace(r"\n",r"\\n") # \nabla
    expr_dict['latex'] = expr_dict['latex'].replace(r"\r",r"\\r") # \rho
    expr_dict['latex'] = expr_dict['latex'].replace(r"\t",r"\\t") # \times
    expr_dict['latex'] = expr_dict['latex'].replace(r"\b",r"\\b") # \times
    
    #print(expr_dict['latex'])
    
    result_AST = re.sub(r"pdg(\d\d\d\d)", r"pdg"+SYMBOL_PREFIX+"\\1", expr_dict['AST'])
    
    if "=" not in expr_dict['latex']:
#        print("feed")
        str_to_prnt += ":begin"+"\n"
        str_to_prnt += 'UNWIND [{id:"'+expr_ID+'",\n'
        str_to_prnt += '         properties:{lean:"", '+'\n'
        str_to_prnt += '                     author_name_latex:"ben", '+'\n'
        str_to_prnt += '                     sympy:"'+result_AST+'", '+'\n'
        str_to_prnt += '                     latex:"'+expr_dict['latex']+'"}}] AS row'+'\n'
        str_to_prnt += 'CREATE (n:feed{id: row.id}) SET n += row.properties;'+'\n'
        str_to_prnt += ':commit'+'\n'
        str_to_prnt += 'CALL db.awaitIndexes(300);'+'\n'
        
    else:
#        print("expr")
        list_of_latex = expr_dict['latex'].split("=")
        if len(list_of_latex)!=2:
            #print(expr_dict['latex'])
            list_of_latex = expr_dict['latex'].split(" = ")
    
        str_to_prnt += ":begin"+"\n"
        str_to_prnt += 'UNWIND [{id:"'+expr_ID+'",\n'
        str_to_prnt += '         properties:{sympy_lhs:"",'+'\n'
        str_to_prnt += '                     reference_latex:"",'+'\n'
        str_to_prnt += '                     sympy_rhs:"",'+'\n'
        str_to_prnt += '                     description_latex:"'+expr_dict['notes']+'",'+'\n'
        str_to_prnt += '                     latex_lhs:"'+list_of_latex[0].strip()+'",'+'\n'
        str_to_prnt += '                     latex_rhs:"'+list_of_latex[1].strip()+'",'+'\n'
        str_to_prnt += '                     latex_condition:"",'+'\n'
        str_to_prnt += '                     name_latex:"'+expr_dict['name']+'",'+'\n'
        str_to_prnt += '                     lean:"",'+'\n'
        str_to_prnt += '                     author_name_latex:"ben",'+'\n'
        str_to_prnt += '                     sympy:"'+result_AST+'",'+'\n'
        str_to_prnt += '                     latex_relation:"="}}] AS row'+'\n'
        str_to_prnt += 'CREATE (n:expression{id: row.id}) SET n += row.properties;'+'\n'
        str_to_prnt += ':commit'+'\n'
        str_to_prnt += 'CALL db.awaitIndexes(300);'+'\n'
        
with open("expr_and_feed.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)

# symbols [done]

In [7]:
str_to_prnt = ""
for symbol_ID, symbol_dict in data['symbols'].items():
    ref_str = ""
    if 'references' in symbol_dict.keys():
        for this_ref in symbol_dict['references']:
            ref_str += this_ref + " and "
        ref_str = ref_str[:-5]

    
    str_to_prnt += ":begin"+"\n"
    str_to_prnt += 'UNWIND [{id:"'+SYMBOL_PREFIX+symbol_ID+'",\n'
    str_to_prnt += '         properties:{reference_latex:"'+ref_str+'",\n'
    str_to_prnt += '                     dimension_time: '+str(symbol_dict['dimensions']['time'])+',\n'
    str_to_prnt += '                     dimension_electric_charge:'+str(symbol_dict['dimensions']['electric charge'])+',\n'
    str_to_prnt += '                     dimension_luminous_intensity:'+str(symbol_dict['dimensions']['luminous intensity'])+',\n'
    str_to_prnt += '                     dimension_length:'+str(symbol_dict['dimensions']['length'])+',\n'
    str_to_prnt += '                     dimension_amount_of_substance:'+str(symbol_dict['dimensions']['amount of substance'])+',\n'
    str_to_prnt += '                     dimension_mass:'+str(symbol_dict['dimensions']['mass'])+',\n'
    str_to_prnt += '                     dimension_temperature:'+str(symbol_dict['dimensions']['temperature'])+',\n'
    str_to_prnt += '                     description_latex:"",\n'
    str_to_prnt += '                     latex:"'+symbol_dict['latex']+'",\n'
    if 'name' in symbol_dict.keys():
        str_to_prnt += '                     name_latex:"'+symbol_dict['name']+'",\n'
    else:
        str_to_prnt += '                     name_latex:"",\n'
    str_to_prnt += '                     variable_or_constant:"'+symbol_dict['category']+'",\n'
    str_to_prnt += '                     author_name_latex:"ben",\n'
    if type(symbol_dict['scope'])==type([]):
        str_to_prnt += '                     scope:"'+symbol_dict['scope'][0]+'",\n'
    else:
        str_to_prnt += '                     scope:"'+symbol_dict['scope']+'",\n'
    str_to_prnt += '                     domain:"any"}}] AS row'+'\n'
    str_to_prnt += 'CREATE (n:scalar{id: row.id}) SET n += row.properties SET n:symbol;'+'\n'
    str_to_prnt += ':commit'+'\n'
    str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'

    if symbol_dict['category']=="constant":
    
        VALUE_ID = str(random.randint(100000,999999))
        str_to_prnt += 'UNWIND [{id:"'+VALUE_ID+'",'+'\n'
        str_to_prnt += '// '+str(symbol_dict['values'])+'\n'
        str_to_prnt += '         properties:{number_power:,'+'\n'
        str_to_prnt += '                     number_decimal:,'+'\n'
        str_to_prnt += '                     author_name_latex:"ben",'+'\n'
        str_to_prnt += '                     dimension__unit:"",'+'\n'
        str_to_prnt += '                     dimension__unit:""}}] AS row'+'\n'
        str_to_prnt += 'CREATE (n:value_with_units{id: row.id}) SET n += row.properties;'+'\n'

        str_to_prnt += ':begin'+'\n'
        str_to_prnt += 'UNWIND [{start: {id:"'+SYMBOL_PREFIX+symbol_ID+'"}, end: {id:"'+VALUE_ID+'"}, properties:{}}] AS row'+'\n'
        str_to_prnt += 'MATCH (start:scalar{id: row.start.id})'+'\n'
        str_to_prnt += 'MATCH (end:value_with_units{id: row.end.id})'+'\n'
        str_to_prnt += 'CREATE (start)-[r:HAS_VALUE]->(end) SET r += row.properties;'+'\n'
        str_to_prnt += ':commit'+'\n'
        str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'
        
with open("symbols.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)


# inference rules [enacted; need to account for extra fields]

has to come before steps so that the numeric ID per infrule is consistent

In [8]:
list_of_IDs = random.sample(list(range(100, 1000)),len(data['inference rules']))

In [9]:
infrule_ID_dict = {}

index = -1
for infrule_name, infrule_dict in data['inference rules'].items():
    index+=1
    infrule_ID_dict[infrule_name] = INFRULE_PREFIX+str(list_of_IDs[index])

In [10]:
str_to_prnt = ""
for infrule_name, infrule_dict in data['inference rules'].items():
    str_to_prnt +=':begin'+'\n'
    str_to_prnt +='UNWIND [{id:"'+infrule_ID_dict[infrule_name]+'",'+'\n'
    str_to_prnt +='         properties:{name_latex:"'+infrule_name+'",'+'\n'
    str_to_prnt +='                     number_of_inputs:'+str(infrule_dict['number of inputs'])+','+'\n'
    str_to_prnt +='                     number_of_feeds:'+str(infrule_dict['number of feeds'])+','+'\n'
    str_to_prnt +='                     number_of_outputs:'+str(infrule_dict['number of outputs'])+','+'\n'
    str_to_prnt +='                     author_name_latex:"ben",'+'\n'
    if len(infrule_dict['notes'])>1:
        str_to_prnt +='                     TODO_notes_latex:"'+infrule_dict['notes'].replace('"',"'")+'",'+'\n'
    else:
        str_to_prnt +='                     TODO_notes_latex:"",'+'\n'
    if len(infrule_dict['assumptions'])>1:
        str_to_prnt +='                     TODO_assumptions_latex:"'+infrule_dict['assumptions']+'",'+'\n'
    else:
        str_to_prnt +='                     TODO_assumptions_latex:"",'+'\n'
    str_to_prnt +='                     latex:"'+infrule_dict['latex']+'"}}] AS row'+'\n'
    str_to_prnt +='CREATE (n:inference_rule{id: row.id}) SET n += row.properties;'+'\n'
    str_to_prnt +=':commit'+'\n'
    str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'
    
with open("infrules.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)


# derivations [done]

In [11]:
str_to_prnt = ""
for deriv_ID, deriv_dict in data['derivations'].items():
    if ((" " not in deriv_dict['notes']) and (deriv_dict['notes'].startswith("http"))):
        reference = deriv_dict['notes']
        abstract = ""
    else:
        abstract = deriv_dict['notes']
        reference = ""
    
    str_to_prnt +=':begin'+'\n'
    str_to_prnt +='UNWIND [{id:"'+deriv_ID+'",'+'\n'
    str_to_prnt +='         properties:{abstract_latex:"'+abstract+'",'+'\n'
    str_to_prnt +='                     name_latex:"'+deriv_dict['name']+'",'+'\n'
    str_to_prnt +='                     reference_latex:"'+reference+'",'+'\n'
    str_to_prnt +='                     author_name_latex:"ben"}}] AS row'+'\n'
    str_to_prnt +='CREATE (n:derivation{id: row.id}) SET n += row.properties;'+'\n'
    str_to_prnt +=':commit'+'\n'
    str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'
        
with open("deriv.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)


# steps [done]

In [12]:
str_to_prnt = ""
for deriv_ID, deriv_dict in data['derivations'].items():
    for step_id, step_dict in deriv_dict['steps'].items():
        
        #print(step_dict)
        
        str_to_prnt += ('UNWIND [{id:"'+step_id+'",'+'\n'
                        '         properties:{note_after_step_latex:"",'+'\n'+
                        '                     author_name_latex:"ben",'+'\n'+
                        '                     note_before_step_latex:"'+step_dict['notes']+'"}}] AS row'+'\n')
        str_to_prnt += 'CREATE (n:step{id: row.id}) SET n += row.properties;'+'\n'
        
        str_to_prnt += ('UNWIND [{start: {id:"'+deriv_ID+
                               '"}, end: {id:"'+step_id+'"}, '+
                           'properties:{sequence_index:'+str(step_dict['linear index'])+'}}] AS row'+'\n')
        str_to_prnt += 'MATCH (start:derivation{id: row.start.id})'+'\n'
        str_to_prnt += 'MATCH (end:step{id: row.end.id})'+'\n'
        str_to_prnt += 'CREATE (start)-[r:HAS_STEP]->(end) SET r += row.properties;'+'\n'

        str_to_prnt += 'UNWIND [{start: {id:"'+step_id+'"}, end: {id:"'+infrule_ID_dict[step_dict['inf rule']]+'"}, properties:{}}] AS row'+'\n'
        str_to_prnt += 'MATCH (start:step{id: row.start.id})'+'\n'
        str_to_prnt += 'MATCH (end:inference_rule{id: row.end.id})'+'\n'
        str_to_prnt += 'CREATE (start)-[r:HAS_INFERENCE_RULE]->(end) SET r += row.properties;'+'\n'
        
        input_index = -1
        for input_id in step_dict['inputs']:
            input_index +=1
            str_to_prnt += ('UNWIND [{start: {id:"'+step_id+
                                  '"}, end: {id:"'+data["expr local to global"][input_id]+
                         '"}, properties:{sequence_index:"'+str(input_index)+'"}}] AS row'+'\n')
            str_to_prnt += 'MATCH (start:step{id: row.start.id})'+'\n'
            str_to_prnt += 'MATCH (end:expression{id: row.end.id})'+'\n'
            str_to_prnt += 'CREATE (start)-[r:HAS_INPUT]->(end) SET r += row.properties;'+'\n'
        
        feed_index = -1
        for feed_id in step_dict['feeds']:
            feed_index +=1
            str_to_prnt += ('UNWIND [{start: {id:"'+step_id+
                                  '"}, end: {id:"'+data["expr local to global"][feed_id]+
                         '"}, properties:{sequence_index:"'+str(feed_index)+'"}}] AS row'+'\n')
            str_to_prnt += 'MATCH (start:step{id: row.start.id})'+'\n'
            str_to_prnt += 'MATCH (end:feed{id: row.end.id})'+'\n'
            str_to_prnt += 'CREATE (start)-[r:HAS_FEED]->(end) SET r += row.properties;'+'\n'
        
        output_index = -1
        for output_id in step_dict['outputs']:
            output_index +=1
            str_to_prnt += ('UNWIND [{start: {id:"'+step_id+
                                  '"}, end: {id:"'+data["expr local to global"][output_id]+
                         '"}, properties:{sequence_index:"'+str(output_index)+'"}}] AS row'+'\n')
            str_to_prnt += 'MATCH (start:step{id: row.start.id})'+'\n'
            str_to_prnt += 'MATCH (end:expression{id: row.end.id})'+'\n'
            str_to_prnt += 'CREATE (start)-[r:HAS_OUTPUT]->(end) SET r += row.properties;'+'\n'
        str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'
        
with open("steps.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)

# units [won't do]

content from `units` has been moved into 
- https://github.com/allofphysicsgraph/ui_v8_website_flask_neo4j/issues/42
- `list_of_valid.py`

In [13]:
data['units'].keys()

dict_keys(['Ampere', 'Farad', 'Kelvin', 'Tesla', 'hand', 'kilogram', 'light-year', 'meter', 'mol', 'parsec', 'second', 'sol'])

# operators [enacted; need to account for extra fields]

In [14]:
data['operators'].keys()

dict_keys(['addition', 'cosine', 'cross product', 'curl', 'definite integral', 'divergence', 'division', 'dot product', 'element-wise addition', 'equals', 'function', 'gradient', 'indefinite intergral', 'multiplication', 'sine', 'spatial vector differential', 'subtraction', 'summation'])

In [15]:
list_of_IDs = random.sample(list(range(100, 1000)),len(data['operators']))

In [16]:
str_to_prnt = ""
index=-1
for operator_name, op_dict in data['operators'].items():
    index+=1
    
    if operator_name=="equals":
        str_to_prnt +=':begin'+'\n'
        str_to_prnt +='UNWIND [{id:"'+OPERATOR_PREFIX+str(list_of_IDs[index])+'",'+'\n'
        str_to_prnt +='         properties:{name_latex:"equals",'+'\n'
        str_to_prnt +='                     reference_latex:"",'+'\n'
        str_to_prnt +='                     author_name_latex:"ben",'+'\n'
        str_to_prnt +='                     description_latex:"LHS = RHS",'+'\n'
        str_to_prnt +='                     TODO_scope:'+str(op_dict['scope'])+',\n'
        str_to_prnt +='                     TODO_latex_macro_list:'+str(op_dict['latex macros'])+',\n'
        str_to_prnt +='                     latex:"="}}] AS row'+'\n'
        str_to_prnt +='CREATE (n:relation{id: row.id}) SET n += row.properties;'+'\n'
        str_to_prnt +=':commit'+'\n'
        str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'
    else:
        str_to_prnt +=':begin'+'\n'
        str_to_prnt +='UNWIND [{id:"'+OPERATOR_PREFIX+str(list_of_IDs[index])+'",'+'\n'
        str_to_prnt +='         properties:{name_latex:"'+operator_name+'",'+'\n'
        str_to_prnt +='                     reference_latex:"",'+'\n'
        str_to_prnt +='                     argument_count:'+str(op_dict['argument count'])+','+'\n'
        str_to_prnt +='                     author_name_latex:"ben",'+'\n'
        str_to_prnt +='                     description_latex:"",'+'\n'
        str_to_prnt +='                     TODO_scope:'+str(op_dict['scope'])+',\n'
        str_to_prnt +='                     TODO_latex_macro_list:'+str(op_dict['latex macros'])+',\n'
        str_to_prnt +='                     latex:"'+op_dict['latex']+'"}}] AS row'+'\n'
        str_to_prnt +='CREATE (n:operation{id: row.id}) SET n += row.properties;'+'\n'
        str_to_prnt +=':commit'+'\n'
        str_to_prnt += "CALL db.awaitIndexes(300);"+'\n'

with open("operators.cypher","w") as file_handle:
    file_handle.write(str_to_prnt)

# measures [won't do]

In [17]:
data['measures'].keys()

dict_keys(['amount of substance', 'electric current', 'length', 'luminous intensity', 'mass', 'temperature', 'time'])