In [22]:
from neo4j import GraphDatabase

In [37]:
class Neo4jChemicalGraph:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_regulation(self, name):
        """Create a regulation node."""
        query = """
        MERGE (r:Regulation {name: $name})
        RETURN r
        """
        with self.driver.session() as session:
            session.run(query, name=name)

    def create_chemical(self, name, cas=None, ec=None):
        """Create a Chemical node if CAS is provided, otherwise just create a ChemicalName node."""
        if cas:
            query = """
            MERGE (c:Chemical {CAS: $cas})
            ON CREATE SET c.EC = $ec
            MERGE (cn:ChemicalName {name: $name})
            MERGE (cn)-[:BELONGS_TO]->(c)
            RETURN c, cn
            """
            with self.driver.session() as session:
                session.run(query, name=name, cas=cas, ec=ec)
        else:
            # Only create ChemicalName if CAS is missing
            query = """
            MERGE (cn:ChemicalName {name: $name})
            RETURN cn
            """
            with self.driver.session() as session:
                session.run(query, name=name)

    def link_regulation_to_chemical(self, reg_name, cas):
        """Link a regulation to an existing chemical."""
        query = """
        MATCH (c:Chemical {CAS: $cas}), (r:Regulation {name: $reg_name})
        MERGE (c)-[:REGULATED_BY]->(r)
        """
        with self.driver.session() as session:
            session.run(query, cas=cas, reg_name=reg_name)

    def link_regulation_to_chemical_name(self, reg_name, chem_name):
        """Link a regulation to an existing chemical name."""
        query = """
        MATCH (cn:ChemicalName {name: $chem_name}), (r:Regulation {name: $reg_name})
        MERGE (cn)-[:REGULATED_BY]->(r)
        """
        with self.driver.session() as session:
            session.run(query, chem_name=chem_name, reg_name=reg_name)

    def import_json(self, json_data):
        """Import JSON into Neo4j while handling cases with missing CAS/EC numbers."""
        regulation_name = json_data["regulation"]["name"]
        self.create_regulation(regulation_name)

        for chemical in json_data["regulation"]["chemicals"]:
            chem_name = chemical["name"]
            cas = chemical.get("cas")  # CAS might be missing
            ec = chemical.get("ec")   # EC might be missing

            # Create chemical and chemical name if they do not exist
            self.create_chemical(chem_name, cas, ec)
            
            # Link regulation to existing chemical (if CAS is provided) or chemical name (in case only name is provided)
            if cas:
                self.link_regulation_to_chemical(regulation_name, cas)
            self.link_regulation_to_chemical_name(regulation_name, chem_name)

In [38]:
# Set up connection
neo4j_uri = "bolt://localhost:7687"  # Change this to your Neo4j instance
neo4j_user = "neo4j"
neo4j_password = "password"

db = Neo4jChemicalGraph(neo4j_uri, neo4j_user, neo4j_password)




In [39]:
# Load JSON
json_data = {
    "regulation": {
        "name": "Apple Regulated Substances Specification 069-0135-M",
        "chemicals": [
            {"name": "Arsenic", "cas": "7440-38-2", "ec": "231-148-6"},
            {"name": "Benzene", "cas": "71-43-2", "ec": "200-753-7"},
            {"name": "Beryllium", "cas": "7440-41-7", "ec": "231-150-7"},
            {"name": "Bisphenol A (ΒΡΑ)", "cas": "80-05-7", "ec": "201-245-8"},
            {"name": "Bromine", "cas": "7726-95-6", "ec": "231-778-1"},
            {"name": "Cadmium", "cas": "7440-43-9", "ec": "231-152-8"},
            {"name": "Chlorine", "cas": "7782-50-5", "ec": "231-959-5"},
            {"name": "Dimethylfumarate (DMFu)", "cas": "624-49-7", "ec": "210-849-0"},
            {"name": "Formaldehyde", "cas": "50-00-0", "ec": "200-001-8"},
            {"name": "Lead", "cas": "7439-92-1", "ec": "231-100-4"},
            {"name": "Mercury", "cas": "7439-97-6", "ec": "231-106-7"},
            {"name": "n-Hexane", "cas": "110-54-3", "ec": "203-777-6"},
            {"name": "Nickel", "cas": "7440-02-0", "ec": "231-111-4"}
        ]
    }
}

# Import JSON data into Neo4j
db.import_json(json_data)

In [40]:
Epa_Json = { "regulation" : { "name": "40 CFR Part 423 (up to date as of 2/11/2025) Steam Electric Power Generating Point Source Category", "chemicals": [ {"name": "Acenaphthene", "cas": ""}, {"name": "Acrolein", "cas": ""}, {"name": "Acrylonitrile", "cas": ""}, {"name": "Benzene", "cas": ""}, {"name": "Benzidine", "cas": ""}, {"name": "Carbon tetrachloride (tetrachloromethane)", "cas": ""}, {"name": "Chlorobenzene", "cas": ""}, {"name": "1,2,4-Trichlorobenzene", "cas": ""}, {"name": "Hexachlorobenzene", "cas": ""}, {"name": "1,2-Dichloroethane", "cas": ""}, {"name": "1,1,1-Trichloroethane", "cas": ""}, {"name": "Hexachloroethane", "cas": ""}, {"name": "1,1-Dichloroethane", "cas": ""}, {"name": "1,1,2-Trichloroethane", "cas": ""}, {"name": "1,1,2,2-Tetrachloroethane", "cas": ""}, {"name": "Chloroethane", "cas": ""}, {"name": "Bis(2-chloroethyl) ether", "cas": ""}, {"name": "2-Chloroethyl vinyl ether (mixed)", "cas": ""}, {"name": "2-Chloronaphthalene", "cas": ""}, {"name": "2,4,6-Trichlorophenol", "cas": ""}, {"name": "Parachlorometa cresol", "cas": ""}, {"name": "Chloroform (trichloromethane)", "cas": ""}, {"name": "2-Chlorophenol", "cas": ""}, {"name": "1,2-Dichlorobenzene", "cas": ""}, {"name": "1,3-Dichlorobenzene", "cas": ""}, {"name": "1,4-Dichlorobenzene", "cas": ""}, {"name": "3,3'-Dichlorobenzidine", "cas": ""}, {"name": "1,1-Dichloroethylene", "cas": ""}, {"name": "1,2-Trans-Dichloroethylene", "cas": ""}, {"name": "2,4-Dichlorophenol", "cas": ""}, {"name": "1,2-Dichloropropane", "cas": ""}, {"name": "1,2-Dichloropropylene (1,3-Dichloropropene)", "cas": ""}, {"name": "2,4-Dimethylphenol", "cas": ""}, {"name": "2,4-Dinitrotoluene", "cas": ""}, {"name": "2,6-Dinitrotoluene", "cas": ""}, {"name": "1,2-Diphenylhydrazine", "cas": ""}, {"name": "Ethylbenzene", "cas": ""}, {"name": "Fluoranthene", "cas": ""}, {"name": "4-Chlorophenyl phenyl ether", "cas": ""}, {"name": "4-Bromophenyl phenyl ether", "cas": ""}, {"name": "Bis(2-chloroisopropyl) ether", "cas": ""}, {"name": "Bis(2-chloroethoxy) methane", "cas": ""}, {"name": "Methylene chloride (dichloromethane)", "cas": ""}, {"name": "Methyl chloride (dichloromethane)", "cas": ""}, {"name": "Methyl bromide (bromomethane)", "cas": ""}, {"name": "Bromoform (tribromomethane)", "cas": ""}, {"name": "Dichlorobromomethane", "cas": ""}, {"name": "Chlorodibromomethane", "cas": ""}, {"name": "Hexachlorobutadiene", "cas": ""}, {"name": "Hexachlorocyclopentadiene", "cas": ""}, {"name": "Isophorone", "cas": ""}, {"name": "Naphthalene", "cas": ""}, {"name": "Nitrobenzene", "cas": ""}, {"name": "2-Nitrophenol", "cas": ""}, {"name": "4-Nitrophenol", "cas": ""}, {"name": "2,4-Dinitrophenol", "cas": ""}, {"name": "4,6-Dinitro-o-cresol", "cas": ""}, {"name": "N-Nitrosodimethylamine", "cas": ""}, {"name": "N-Nitrosodiphenylamine", "cas": ""}, {"name": "N-Nitrosodi-n-propylamine", "cas": ""}, {"name": "Pentachlorophenol", "cas": ""}, {"name": "Phenol", "cas": ""}, {"name": "Bis(2-ethylhexyl) phthalate", "cas": ""}, {"name": "Butyl benzyl phthalate", "cas": ""}, {"name": "Di-n-butyl phthalate", "cas": ""}, {"name": "Di-n-octyl phthalate", "cas": ""}, {"name": "Diethyl phthalate", "cas": ""}, {"name": "Dimethyl phthalate", "cas": ""}, {"name": "2,3,7,8-Tetrachlorodibenzo-p-dioxin (TCDD)", "cas": ""}, {"name": "Antimony", "cas": ""}, {"name": "Arsenic", "cas": ""}, {"name": "Barium", "cas": ""}, {"name": "Beryllium", "cas": ""}, {"name": "Cadmium", "cas": ""}, {"name": "Chromium", "cas": ""}, {"name": "Cobalt", "cas": ""}, {"name": "Copper", "cas": ""}, {"name": "Lead", "cas": ""}, {"name": "Magnesium", "cas": ""}, {"name": "Manganese", "cas": ""}, {"name": "Mercury", "cas": ""}, {"name": "Molybdenum", "cas": ""}, {"name": "Nickel", "cas": ""}, {"name": "Selenium", "cas": ""}, {"name": "Silver", "cas": ""}, {"name": "Thallium", "cas": ""}, {"name": "Titanium", "cas": ""}, {"name": "Vanadium", "cas": ""}, {"name": "Zinc", "cas": ""} ] } }

In [31]:
db = Neo4jChemicalGraph(neo4j_uri, neo4j_user, neo4j_password)

In [41]:
db.import_json(Epa_Json)
db.close()