In [1]:
# https://python.langchain.com/v0.1/docs/use_cases/graph/constructing/

In [1]:
%%bash
pip install --upgrade --quiet langchain langchain-neo4j langchain-openai langgraph

In [9]:
# %%bash
# export JAVA_HOME=/home/jovyan/java/jdk-21.0.5
# /opt/neo4j-community-5.26.0/bin/neo4j-admin dbms set-initial-password password
# /opt/neo4j-community-5.26.0/bin/neo4j start

In [6]:
# Import modules.
from neo4j import GraphDatabase
import pandas as pd
import json
import time

In [7]:
# Connect to Neo4j DB.
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

In [8]:
# Load JSON data.
import json

with open('Full_Drug_Interaction_Data.json') as f:
    data = json.load(f)


In [10]:
print("Hello World" in "Hello World, My name is Jeffery.")

True


In [1]:
from neo4j import GraphDatabase

# ---------- 连接数据库 ----------
NEO4J_URI      = "bolt://localhost:7687"
NEO4J_USER     = "neo4j"
NEO4J_PASSWORD = "password"           # 初次启动设定的密码

driver = GraphDatabase.driver(
    NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)
)

# ---------- 可选：创建唯一约束 ----------
# 避免重复的 Drug 节点 & 重复的关系
with driver.session() as session:
    session.run("CREATE CONSTRAINT IF NOT EXISTS "
                "FOR (d:Drug) REQUIRE d.name IS UNIQUE")
    session.run("CREATE CONSTRAINT IF NOT EXISTS "
                "FOR ()-[r:INTERACTS_WITH]-() "
                "REQUIRE (r.source, r.target) IS UNIQUE")

# ---------- 批量写入 ----------
query = """
UNWIND $pairs AS p
MERGE (a:Drug {name: p.source})
MERGE (b:Drug {name: p.target})
WITH a, b, p,
     (CASE WHEN p.source < p.target THEN [p.source, p.target]
           ELSE [p.target, p.source] END) AS key
MERGE (a)-[r:INTERACTS_WITH {source: key[0], target: key[1]}]->(b)
ON CREATE SET r.description = p.description
ON MATCH  SET r.description =
    CASE WHEN NOT p.description IN split(r.description, '\\n')
         THEN r.description + '\\n' + p.description
         ELSE r.description END
"""

with driver.session() as session:
    session.write_transaction(lambda tx: tx.run(query, pairs=drug_pairs))

driver.close()

ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ('127.0.0.1:7687', '[::1]:7687')):
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [Errno 111] Connection refused)
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [Errno 99] Cannot assign requested address)

In [14]:
drug_pairs_list = list(drug_pairs_set)

In [15]:
drug_pairs_list[0]

('Efonidipine', 'Sulfadiazine')

In [9]:
for i in data["links"][:10]:
    print(i)

{'description': 'Apixaban may increase the anticoagulant activities of Lepirudin.', 'id': '0', 'source': 'Lepirudin', 'target': 'Apixaban'}
{'description': 'Dabigatran etexilate may increase the anticoagulant activities of Lepirudin.', 'id': '1', 'source': 'Lepirudin', 'target': 'Dabigatran etexilate'}
{'description': 'The risk or severity of bleeding and hemorrhage can be increased when Dasatinib is combined with Lepirudin.', 'id': '2', 'source': 'Lepirudin', 'target': 'Dasatinib'}
{'description': 'The risk or severity of gastrointestinal bleeding can be increased when Lepirudin is combined with Deferasirox.', 'id': '3', 'source': 'Lepirudin', 'target': 'Deferasirox'}
{'description': 'The risk or severity of bleeding and bruising can be increased when Lepirudin is combined with Ursodeoxycholic acid.', 'id': '4', 'source': 'Lepirudin', 'target': 'Ursodeoxycholic acid'}
{'description': 'The risk or severity of bleeding and bruising can be increased when Lepirudin is combined with Glycoc

In [10]:
clean_data = {
    "nodes": [],
    "links": [],
}

def cleaning(raw_data):
    start_time = time.time()
    for i, node in enumerate(data['nodes']):
        if node["label"] not in filtered: continue
        clean_data["nodes"].append(node)
    
    for i, link in enumerate(data['links']):
        if link["target"] not in filtered or link["source"] not in filtered: continue
        clean_data["links"].append(link)
        
        if i % 50000 == 0 and i != 0:
            current_time = time.time()
            print(f"Created {i + 1} links, used {current_time - start_time} seconds.")

cleaning(data)

Created 450001 links, used 0.1029202938079834 seconds.


In [11]:
len(data["nodes"]), len(data["links"]), len(clean_data["nodes"]), len(clean_data["links"])

(16582, 2839610, 471, 74784)

In [12]:
# Function to create nodes and relationships
def create_graph(tx, data):
    start_time = time.time()
    for i, node in enumerate(data['nodes']):
        if node["label"] not in filtered: continue
        
        tx.run("CREATE (n:Node {id: $id, label: $label})", id=node['id'], label=node['label'])
    
    for i, link in enumerate(data['links']):
        if link["target"] not in filtered or link["source"] not in filtered: continue
        
        tx.run("""
        MATCH (a:Node {id: $source}), (b:Node {id: $target})
        CREATE (a)-[:RELATIONSHIP {description: $description}]->(b)
        """, source=link['source'], target=link['target'], description=link['description'])
        
        if i % 1000 == 0 and i != 0:
            current_time = time.time()
            print(f"Created {i + 1} links, used {current_time - start_time} seconds.")

In [13]:
# Insert data into Neo4j.
with driver.session() as session:
    session.execute_write(create_graph, clean_data)

driver.close()

Created 1001 links, used 47.253438234329224 seconds.
Created 2001 links, used 78.05210161209106 seconds.
Created 3001 links, used 109.42906284332275 seconds.
Created 4001 links, used 139.51378798484802 seconds.
Created 5001 links, used 170.41807389259338 seconds.
Created 6001 links, used 200.89821529388428 seconds.
Created 7001 links, used 231.37676239013672 seconds.
Created 8001 links, used 261.3435318470001 seconds.
Created 9001 links, used 291.7904486656189 seconds.
Created 10001 links, used 321.69237422943115 seconds.
Created 11001 links, used 351.78085494041443 seconds.
Created 12001 links, used 382.70699191093445 seconds.
Created 13001 links, used 412.89570593833923 seconds.
Created 14001 links, used 443.2705523967743 seconds.
Created 15001 links, used 473.3867337703705 seconds.
Created 16001 links, used 504.01982378959656 seconds.
Created 17001 links, used 534.0837721824646 seconds.
Created 18001 links, used 564.3774394989014 seconds.
Created 19001 links, used 594.6533403396606 

<Record n=<Node element_id='4:1f17da69-1a9c-425d-a392-112358e1506f:0' labels=frozenset({'Node'}) properties={'id': 'Lepirudin', 'label': 'Lepirudin'}> r=<Relationship element_id='5:1f17da69-1a9c-425d-a392-112358e1506f:0' nodes=(<Node element_id='4:1f17da69-1a9c-425d-a392-112358e1506f:0' labels=frozenset({'Node'}) properties={'id': 'Lepirudin', 'label': 'Lepirudin'}>, <Node element_id='4:1f17da69-1a9c-425d-a392-112358e1506f:1' labels=frozenset({'Node'}) properties={'id': 'Apixaban', 'label': 'Apixaban'}>) type='RELATIONSHIP' properties={'description': 'Apixaban may increase the anticoagulant activities of Lepirudin.'}> m=<Node element_id='4:1f17da69-1a9c-425d-a392-112358e1506f:1' labels=frozenset({'Node'}) properties={'id': 'Apixaban', 'label': 'Apixaban'}>>
