In [6]:
import pandas as pd
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable

# ---------- CONFIG ----------
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# --- Relationship Definitions ---
DDI_RELATIONSHIPS = [
    "DRUGBANK::ddi-interactor-in::Compound:Compound"
]

SIDE_EFFECT_RELATIONSHIPS = [
    "Hetionet::CcSE::Compound:Side Effect",
    "GNBR::Sa::Compound:Disease"
]

# --- Query Functions ---

def get_global_stats(session):
    """
    Fetches global node and relationship counts.
    CORRECTED: This now parses the .Entity property to get node types,
    since labels like :Compound are missing.
    """
    print("--- 1. Fetching Global Stats ---")
    
    try:
        # Node counts (Query 1.1) - CORRECTED
        node_query = """
        MATCH (n)
        WHERE n.Entity IS NOT NULL
        WITH split(n.Entity, '::')[0] AS node_type, count(n) AS count
        RETURN node_type, count
        ORDER BY count DESC
        LIMIT 20
        """
        node_records = session.run(node_query)
        print("Node Type Distribution (from Entity property):")
        for rec in node_records:
            print(f"  {rec['node_type']}: {rec['count']}")
            
        print("\n")

        # Relationship counts (Query 1.2) - (This query was already correct)
        rel_query = """
        MATCH ()-[r]->() 
        RETURN r.Relationship AS rel_type, count(r) AS count 
        ORDER BY count DESC LIMIT 10
        """
        rel_records = session.run(rel_query)
        print("Top 10 Relationship Types:")
        for rec in rel_records:
            print(f"  {rec['rel_type']}: {rec['count']}")
        print("-" * 30 + "\n")
        
    except Exception as e:
        print(f"✗ ERROR fetching global stats: {e}")


def get_coverage_stats(session, drug_list, category_name):
    """
    Runs the main coverage & connectivity query (Query 2.1)
    for a specific list of drugs.
    CORRECTED: Removed label checks (:Compound) and now filters
    on the .Entity property. Also fixed CALL deprecation.
    """
    if not drug_list:
        print(f"--- Skipping category '{category_name}' (empty list) ---")
        return None

    print(f"--- 2. Analyzing Category: {category_name} ({len(drug_list)} drugs) ---")
    
    coverage_query = """
    WITH $drug_list AS target_drugs
    WITH target_drugs, size(target_drugs) AS list_total
    UNWIND target_drugs AS drug_id
    OPTIONAL MATCH (c {Entity: drug_id}) // <-- FIX 1: Removed :Compound label

    // Get DDI counts
    CALL(c) { // <-- FIX 2: Fixed deprecation warning
        WITH c
        WITH c WHERE c IS NOT NULL 
        OPTIONAL MATCH (c)-[r_ddi]-(partner) // <-- FIX 3: Removed :Compound label
        WHERE r_ddi.Relationship IN $ddi_rels AND partner.Entity STARTS WITH 'Compound::' // <-- FIX 4: Added entity filter
        RETURN count(DISTINCT partner) AS ddi_count
    }

    // Get Side Effect counts
    CALL(c) { // <-- FIX 2: Fixed deprecation warning
        WITH c
        WITH c WHERE c IS NOT NULL 
        OPTIONAL MATCH (c)-[r_se]-(effect) // <-- FIX 5: Removed labels
        WHERE r_se.Relationship IN $se_rels AND 
              (effect.Entity STARTS WITH 'SideEffect::' OR effect.Entity STARTS WITH 'Disease::') // <-- FIX 6: Added entity filter
        RETURN count(DISTINCT effect) AS se_count
    }

    // Aggregate the results for the entire list
    RETURN
        list_total,
        count(c) AS found_in_drkg,
        CASE WHEN list_total > 0 THEN toFloat(count(c)) / list_total ELSE 0 END AS coverage_percent,
        
        avg(ddi_count) AS avg_ddi,
        stdev(ddi_count) AS stddev_ddi,
        max(ddi_count) AS max_ddi,
        
        avg(se_count) AS avg_se,
        stdev(se_count) AS stddev_se,
        max(se_count) AS max_se
    """
    
    try:
        result = session.run(coverage_query, 
                             drug_list=drug_list, 
                             ddi_rels=DDI_RELATIONSHIPS, 
                             se_rels=SIDE_EFFECT_RELATIONSHIPS)
        
        data = result.single()
        
        if data:
            data_dict = dict(data)
            data_dict['category'] = category_name
            print(f"  ✓ Processed '{category_name}'.")
            return data_dict
            
    except Exception as e:
        print(f"✗ ERROR processing category {category_name}: {e}")
        
    return None

def find_drugs_with_no_ddis(session, drug_list, category_name):
    """
    Finds drugs in a given list that have 0 DDI relationships (Query 3.1).
    CORRECTED: Removed label checks (:Compound) and now filters
    on the .Entity property.
    """
    if not drug_list:
        return []

    print(f"--- 3. Finding Drugs with 0 DDIs in: {category_name} ---")
    
    query = """
    WITH $drug_list AS target_drugs
    UNWIND target_drugs AS drug_id
    MATCH (c {Entity: drug_id}) // <-- FIX 1: Removed :Compound label

    WHERE NOT EXISTS {
      (c)-[r]-(partner) // <-- FIX 2: Removed :Compound label
      WHERE r.Relationship IN $ddi_rels AND partner.Entity STARTS WITH 'Compound::' // <-- FIX 3: Added entity filter
    }

    RETURN c.Entity AS drug_with_no_ddi, c.name AS drug_name
    LIMIT 25
    """
    
    drugs_found = []
    try:
        records = session.run(query, 
                              drug_list=drug_list, 
                              ddi_rels=DDI_RELATIONSHIPS)
        
        for rec in records:
            drugs_found.append(rec['drug_with_no_ddi'])
            print(f"  Found: {rec['drug_with_no_ddi']} (Name: {rec['drug_name']})")
            
        if not drugs_found:
            print(f"  ✓ All drugs in '{category_name}' have at least one DDI.")
            
    except Exception as e:
        print(f"✗ ERROR finding 0-DDI drugs in {category_name}: {e}")
        
    print("-" * 30 + "\n")
    return drugs_found


# --- Main Execution ---

def main():
    #
    # === ⬇️ ACTION REQUIRED ⬇️ ===
    #
    # Populate these lists with your actual normalized DRKG drug IDs
    # (e.g., "Compound::DB00123") for each category.
    #
    common_meds_list = [
        "Compound::DB00945", # Metformin
        "Compound::DB00217", # Simvastatin
        "Compound::DB00482", # Amlodipine
        "Compound::DB01060"  # Omeprazole
    ]
    
    orphan_drugs_list = [
        "Compound::DB00007", # Leuprolide
        "Compound::DB00014", # Goserelin
        "Compound::DB00034"  # Somatropin
    ]
    
    # This list drives the analysis loop
    categories_to_analyze = [
        ("Common Medications", common_meds_list),
        ("Orphan Drugs", orphan_drugs_list),
    ]

    # --- Connect to Neo4j ---
    try:
        driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
        driver.verify_connectivity()
        print(f"✓ Successfully connected to Neo4j at {NEO4J_URI}.\n")
    except ServiceUnavailable as e:
        print(f"✗ ERROR: Could not connect to Neo4j at {NEO4J_URI}.")
        print("  Please check your connection details and ensure the database is running.")
        print(f"  Details: {e}")
        return
    except Exception as e:
        print(f"✗ An unexpected error occurred during connection: {e}")
        return

    all_category_stats = []
    
    with driver.session() as session:
        
        # 1. Run Global Stats (for the "Data" section of your paper)
        get_global_stats(session)
        
        # 2. Run Coverage Stats for each category
        for category_name, drug_list in categories_to_analyze:
            stats = get_coverage_stats(session, drug_list, category_name)
            if stats:
                all_category_stats.append(stats)
        
        # 3. Format Coverage Stats into the final table
        if all_category_stats:
            print("--- 4. Summary Table: Coverage & Connectivity ---")
            df = pd.DataFrame(all_category_stats)
            df = df.set_index('category')
            
            df['coverage_percent'] = (df['coverage_percent'] * 100).map('{:,.1f}%'.format)
            df['avg_ddi'] = df['avg_ddi'].map('{:,.2f}'.format)
            df['avg_se'] = df['avg_se'].map('{:,.2f}'.format)
            
            cols_to_show = [
                'list_total',
                'found_in_drkg', 
                'coverage_percent', 
                'avg_ddi', 
                'max_ddi',
                'avg_se',
                'max_se'
            ]
            
            # Print as Markdown
            print(df[cols_to_show].to_markdown(floatfmt=".2f"))
            print("-" * 30 + "\n")

        
        # 4. Run Deeper Analysis
        find_drugs_with_no_ddis(session, orphan_drugs_list, "Orphan Drugs")

    driver.close()
    print("✓ Analysis complete. Connection closed.")

if __name__ == "__main__":
    main()

✓ Successfully connected to Neo4j at bolt://localhost:7687.

--- 1. Fetching Global Stats ---
Node Type Distribution (from Entity property):
  Gene: 78440
  Compound: 48626
  Biological Process: 22762
  Side Effect: 11402
  Disease: 10206
  Atc: 8096
  Molecular Function: 5768
  Pathway: 3644
  Cellular Component: 2782
  Symptom: 830
  Anatomy: 800
  Pharmacologic Class: 690
  Tax: 430


Top 10 Relationship Types:
  DRUGBANK::ddi-interactor-in::Compound:Compound: 1377081
  Hetionet::GpBP::Gene:Biological Process: 559504
  Hetionet::AeG::Anatomy:Gene: 526407
  STRING::OTHER::Gene:Gene: 310690
  Hetionet::Gr>G::Gene:Gene: 261460
  STRING::REACTION::Gene:Gene: 202791
  STRING::BINDING::Gene:Gene: 177054
  STRING::CATALYSIS::Gene:Gene: 170413
  Hetionet::CcSE::Compound:Side Effect: 138944
  INTACT::PHYSICAL ASSOCIATION::Gene:Gene: 113741
------------------------------

--- 2. Analyzing Category: Common Medications (4 drugs) ---
  ✓ Processed 'Common Medications'.
--- 2. Analyzing Category:

In [14]:
import os
import pandas as pd
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable

# ---------- CONFIG ----------
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# Define relationship patterns
DDI_RELATIONSHIPS = [
    "DRUGBANK::ddi-interactor-in::Compound:Compound"
]

def find_specific_interaction(session, drug1_id, drug2_id, drug1_name, drug2_name):
    """
    Queries Neo4j for a specific DDI relationship between two drugs.
    """
    
    print(f"\n{'='*60}")
    print(f"--- Analyzing Case Study: {drug1_name} + {drug2_name} ---")
    print(f"{'='*60}")
    
    # This query is simple, direct, and avoids the bugs from before
    query = """
    MATCH (d1 {Entity: $drug1_id})
    MATCH (d2 {Entity: $drug2_id})
    
    // Find a direct interaction path between them
    MATCH (d1)-[r]-(d2)
    WHERE r.Relationship IN $ddi_rels
    
    RETURN r.Relationship AS relation
    LIMIT 1 // We just need to know if one exists
    """
    
    try:
        result = session.run(
            query,
            drug1_id=drug1_id,
            drug2_id=drug2_id,
            ddi_rels=DDI_RELATIONSHIPS
        )
        
        record = result.single()
        
        if record:
            print(f"  ✓ SUCCESS: Found direct interaction!")
            print(f"    Path: ({drug1_name})-[{record['relation']}]-({drug2_name})")
            return True
        else:
            print(f"  ✗ No direct interaction path found in the graph.")
            return False

    except Exception as e:
        print(f"  ✗ ERROR: {e}")
        return False

# --- Main Execution ---
def main():
    # Case Study: Warfarin + Primaquine
    case_study = {
        "drug1_name": "Warfarin",
        "drug1_id": "Compound::DB00682",
        "drug2_name": "Primaquine",
        "drug2_id": "Compound::DB01087"
    }
    
    try:
        driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
        driver.verify_connectivity()
        print(f"✓ Successfully connected to Neo4j at {NEO4J_URI}.\n")
    except Exception as e:
        print(f"✗ ERROR: Could not connect to Neo4j: {e}")
        return

    with driver.session() as session:
        found = find_specific_interaction(
            session,
            case_study["drug1_id"],
            case_study["drug2_id"],
            case_study["drug1_name"],
            case_study["drug2_name"]
        )
    
    driver.close()
    
    if found:
        print("\n---")
        print("💡 **Next Step:** We have confirmed the direct interaction.")
        print("We can now build this case study for your paper.")
    else:
        print("\n---")
        print("💡 **Analysis:** No direct 1-hop DDI link was found.")
        print("This means the interaction is *not* captured by the 'ddi-interactor-in' relationship.")
        print("Our next step would be to search for a multi-hop *mechanistic* path (e.g., via a shared gene).")

    print("\n✓ Analysis complete.")

if __name__ == "__main__":
    main()

✓ Successfully connected to Neo4j at bolt://localhost:7687.


--- Analyzing Case Study: Warfarin + Primaquine ---
  ✓ SUCCESS: Found direct interaction!
    Path: (Warfarin)-[DRUGBANK::ddi-interactor-in::Compound:Compound]-(Primaquine)

---
💡 **Next Step:** We have confirmed the direct interaction.
We can now build this case study for your paper.

✓ Analysis complete.


In [17]:
import os
import pandas as pd
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable

# ---------- CONFIG (Global Scope) ----------
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"

# --- Define the relationship to EXCLUDE from the search ---
DIRECT_DDI_REL = "DRUGBANK::ddi-interactor-in::Compound:Compound"

# --- Define Case Study Drugs (Global Scope) ---
WARFARIN_ID = "Compound::DB00682"
WARFARIN_NAME = "Warfarin"

PRIMAQUINE_ID = "Compound::DB01087"
PRIMAQUINE_NAME = "Primaquine"


def find_shortest_mechanistic_path(session, drug1_id, drug2_id, drug1_name, drug2_name):
    """
    Finds the shortest path (BFS) between two drugs,
    while excluding the direct DDI link.
    This finds the explanatory "why" path.
    """
    print(f"\n{'='*60}")
    print(f"Query: Finding Shortest Mechanistic Path (BFS) for:")
    print(f"  {drug1_name} <---> {drug2_name}")
    print(f"{'='*60}")
    
    # This query uses shortestPath() which implements a BFS.
    # We limit the path length to 4 hops (e.g., Drug->Gene->Gene->Drug)
    # We exclude the direct DDI relationship.
    
    query = """
    MATCH (d1 {Entity: $drug1_id}), (d2 {Entity: $drug2_id})
    
    // Find the shortest path up to 4 hops
    MATCH p = shortestPath((d1)-[rels*..4]-(d2))
    
    // WHERE clause to filter the relationships in the path
    // We ensure NONE of the relationships are the direct DDI link
    WHERE ALL(r IN rels WHERE r.Relationship <> $exclude_rel)
    
    RETURN 
        nodes(p) AS path_nodes,    // Get all nodes in the path
        relationships(p) AS path_rels // Get all relationships in the path
    LIMIT 1 // We only want the single shortest path
    """
    
    try:
        result = session.run(
            query,
            drug1_id=WARFARIN_ID,
            drug2_id=PRIMAQUINE_ID,
            exclude_rel=DIRECT_DDI_REL
        )
        
        record = result.single()
        
        if not record:
            print("  ✗ No indirect (mechanistic) path found within 4 hops.")
            return

        # --- If we found a path, print it beautifully ---
        print("  ✓ SUCCESS: Found a mechanistic path!")
        
        nodes = record["path_nodes"]
        rels = record["path_rels"]
        
        path_str = ""
        
        # Iterate through the path components
        for i in range(len(rels)):
            # Get node names or IDs
            start_node = nodes[i]
            end_node = nodes[i+1]
            start_name = start_node['name'] or start_node['Entity']
            end_name = end_node['name'] or end_node['Entity']
            
            # Get relationship type
            rel_type = rels[i]['Relationship']
            
            # This logic assumes the path is returned in order
            path_str += f"  ({start_name})\n"
            path_str += f"      -[{rel_type}]->\n"
        
        # Add the very last node
        final_node = nodes[-1]
        final_name = final_node['name'] or final_node['Entity']
        path_str += f"  ({final_name})\n"
        
        print(path_str)
        print(f"  This {len(rels)}-hop path explains the 'why' behind the interaction.")

    except Exception as e:
        print(f"  ✗ ERROR: {e}")

# --- Main Execution (Global Scope) ---

try:
    # 1. Connect to the database
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    driver.verify_connectivity()
    print(f"✓ Successfully connected to Neo4j at {NEO4J_URI}.\n")

    # 2. Create a session
    with driver.session() as session:
        # 3. Run the BFS query
        find_shortest_mechanistic_path(
            session, 
            WARFARIN_ID, 
            PRIMAQUINE_ID,
            WARFARIN_NAME,
            PRIMAQUINE_NAME
        )
    
    # 4. Close the connection
    driver.close()
    
    print("\n✓ Analysis complete.")

except Exception as e:
    print(f"✗ An unexpected error occurred: {e}")

✓ Successfully connected to Neo4j at bolt://localhost:7687.


Query: Finding Shortest Mechanistic Path (BFS) for:
  Warfarin <---> Primaquine
  ✓ SUCCESS: Found a mechanistic path!
  (Compound::DB00682)
      -[Hetionet::CbG::Compound:Gene]->
  (Gene::1544)
      -[Hetionet::CbG::Compound:Gene]->
  (Compound::DB01087)

  This 2-hop path explains the 'why' behind the interaction.

✓ Analysis complete.


In [19]:
import os
import pandas as pd
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable

# ---------- CONFIG (Global Scope) ----------
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"
MAX_RESULTS = 50 # Get the top 50 interacting drugs

# Define relationship patterns
DDI_RELATIONSHIPS = [
    "DRUGBANK::ddi-interactor-in::Compound:Compound"
]

# --- Define Drug to Study (Global Scope) ---
DRUG_ID_TO_STUDY = "Compound::DB00854"
DRUG_NAME_TO_STUDY = "Cefpodoxime (Cepodem)"


def find_interactions_for_drug(session, drug_id, drug_name):
    """
    Queries Neo4j for all direct DDIs for a SINGLE drug.
    This query is corrected to ONLY return other Compounds.
    """
    
    print(f"\n{'='*60}")
    print(f"Querying interactions for: {drug_name} ({drug_id})")
    print(f"{'='*60}")
    
    # This query finds a drug by its ID, follows the DDI relationship,
    # and ensures the partner node is ALSO a Compound.
    query = """
    MATCH (drug {Entity: $drug_id})-[r]-(partner)
    WHERE r.Relationship IN $ddi_rels
      AND partner.Entity STARTS WITH 'Compound::' 
    
    RETURN 
        partner.Entity AS interacting_drug_id,
        partner.name AS interacting_drug_name
    LIMIT $max_results
    """
    
    interactions = []
    try:
        result = session.run(
            query,
            drug_id=drug_id,
            ddi_rels=DDI_RELATIONSHIPS,
            max_results=MAX_RESULTS
        )
        
        for record in result:
            interactions.append({
                "id": record["interacting_drug_id"],
                "name": record["interacting_drug_name"] or "Unknown" 
            })
            
        if not interactions:
            print(f"  ✗ No direct interactions found for {drug_name} in the graph.")
            return []

        print(f"✓ Found {len(interactions)} interacting drugs for {drug_name}:")
        
        # Format as a clean table
        df = pd.DataFrame(interactions)
        print(df.to_markdown(index=False))
        
        return interactions

    except Exception as e:
        print(f"  ✗ ERROR: {e}")
        return []

# --- Main Execution (Global Scope) ---

try:
    # 1. Connect to the database
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    driver.verify_connectivity()
    print(f"✓ Successfully connected to Neo4j at {NEO4J_URI}.\n")

    # 2. Create a session
    with driver.session() as session:
        # 3. Run the query for Cefpodoxime
        interactions_list = find_interactions_for_drug(
            session, 
            DRUG_ID_TO_STUDY,
            DRUG_NAME_TO_STUDY
        )
    
    # 4. Close the connection
    driver.close()
    
    if interactions_list:
        print("\n---")
        print("💡 **Next Step:**")
        print("Please pick one drug from this list (e.g., 'Compound::DB...').")
        print("We can then use our BFS query to find the *mechanistic path* that connects it to Cefpodoxime.")
    
    print("\n✓ Analysis complete.")

except Exception as e:
    print(f"✗ An unexpected error occurred: {e}")

✓ Successfully connected to Neo4j at bolt://localhost:7687.


Querying interactions for: Cefpodoxime (Cepodem) (Compound::DB00854)
✓ Found 50 interacting drugs for Cefpodoxime (Cepodem):
| id                | name    |
|:------------------|:--------|
| Compound::DB00669 | Unknown |
| Compound::DB00391 | Unknown |
| Compound::DB00409 | Unknown |
| Compound::DB00625 | Unknown |
| Compound::DB00696 | Unknown |
| Compound::DB00502 | Unknown |
| Compound::DB00623 | Unknown |
| Compound::DB00831 | Unknown |
| Compound::DB00715 | Unknown |
| Compound::DB00524 | Unknown |
| Compound::DB00476 | Unknown |
| Compound::DB00690 | Unknown |
| Compound::DB00320 | Unknown |
| Compound::DB00377 | Unknown |
| Compound::DB00245 | Unknown |
| Compound::DB00427 | Unknown |
| Compound::DB00434 | Unknown |
| Compound::DB00418 | Unknown |
| Compound::DB00728 | Unknown |
| Compound::DB00283 | Unknown |
| Compound::DB00150 | Unknown |
| Compound::DB00483 | Unknown |
| Compound::DB00332 | Unknown |
| Compound::D