In [2]:
import os
import tarfile
import urllib.request
import numpy as np
import pandas as pd

In [5]:
drugs = pd.read_csv("drugs.csv")
medicines = drugs["medicines"]
sample_medicine = medicines[0]
sample_medicine

'NIVEOLI MDI, LEVOLIN INHALER MDI, MONTAIR FX TAB'

In [6]:
def get_medicines_from_string(medicines):
    return medicines.split(", ")

In [7]:
get_medicines_from_string(sample_medicine)

['NIVEOLI MDI', 'LEVOLIN INHALER MDI', 'MONTAIR FX TAB']

In [60]:
import urllib.parse
import urllib.request
import json

def normalize_drug(input_name):
    """
    Normalize any drug input (brand name, generic, or full name)
    to a DRKG-compatible node (Compound::CHEMBLxxxx or Compound::DBxxxx)
    """
    if not input_name:
        return None

    # Use the first word as heuristic
    query_name = input_name.split()[0].strip()
    print(f"Normalizing '{input_name}' using '{query_name}'")

    base_url = "https://mychem.info/v1/query"
    params = {
        "q": query_name,
        "fields": "drugbank.id,chembl.molecule_chembl_id,synonyms",
        "size": 1
    }
    url = f"{base_url}?{urllib.parse.urlencode(params)}"

    try:
        with urllib.request.urlopen(url) as response:
            data = json.loads(response.read().decode())
    except Exception as e:
        print(f"Error fetching '{query_name}': {e}")
        return None

    hits = data.get("hits", [])
    if not hits:
        print(f"No hits found for '{query_name}'")
        return None

    hit = hits[0]
    chembl = hit.get("chembl", {}).get("molecule_chembl_id")
    drugbank = hit.get("drugbank", {}).get("id")
    synonyms = hit.get("synonyms", [])

    # Optional: check if the input_name matches any synonyms (case-insensitive)
    if input_name.lower() in [s.lower() for s in synonyms]:
        print(f"Input '{input_name}' matches synonyms: {synonyms}")

    if chembl:
        print(chembl)
        return f"Compound::{chembl}"
    elif drugbank:
        print(drugbank)
        return f"Compound::{drugbank}"
    else:
        print(f"No ChemBL or DrugBank ID found for '{query_name}'")
        return None


In [62]:
normalized_drugs = [normalize_drug(drug) for drug in get_medicines_from_string(medicines[2])]
normalized_drugs

Normalizing 'CEPODEM XP 325MG TAB' using 'CEPODEM'
No hits found for 'CEPODEM'
Normalizing 'DOLO TAB 650MG' using 'DOLO'
No ChemBL or DrugBank ID found for 'DOLO'
Normalizing 'MONTAIR FX TAB' using 'MONTAIR'
CHEMBL1200681
Normalizing 'BETADINE GARGLE' using 'BETADINE'
No ChemBL or DrugBank ID found for 'BETADINE'


[None, None, 'Compound::CHEMBL1200681', None]

In [18]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-6.0.2-py3-none-any.whl.metadata (5.2 kB)
Downloading neo4j-6.0.2-py3-none-any.whl (325 kB)
Installing collected packages: neo4j
Successfully installed neo4j-6.0.2



[notice] A new release of pip is available: 25.0 -> 25.2
[notice] To update, run: C:\Users\vshmk\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [37]:
from neo4j import GraphDatabase
from collections import defaultdict

# ---------- CONFIG ----------
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "12345678"  # replace with your password
MAX_NEIGHBORS = 200

# Relation patterns
DDI_REL = "DRUGBANK::ddi-interactor-in::Compound:Compound"
SE_SIDE_REL_PATTERNS = [
    "Hetionet::CcSE::Compound:Side Effect",
    "GNBR::Sa::Compound:Disease",
]

# ---------- FUNCTION TO QUERY NEO4J ----------
def query_drug_interactions_neo4j(drug_nodes, max_neighbors=200):
    """
    Input: list of normalized DRKG node IDs (Compound::CHEMBLxxxx)
    Output: dictionary {drug_node: list of textual interaction/side-effect facts}
    """
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    results = {}

    with driver.session() as session:
        for seed in drug_nodes:
            try:
                print(f"\nQuerying 1-hop neighbors for: {seed} ...")
                # 1-hop neighbors
                query = """
                MATCH (seed {Entity: $seed})-[r]-(neighbor)
                RETURN seed.Entity AS seed, r.Relationship AS relation, neighbor.Entity AS neighbor
                LIMIT $max_neighbors
                """
                records = session.run(query, seed=seed, max_neighbors=max_neighbors)
                
                chunks = []
                neighbors = set()
                found = False
                for rec in records:
                    found = True
                    relation = rec["relation"]
                    neighbor = rec["neighbor"]
                    neighbors.add(neighbor)
                    chunks.append(f"{seed} -- {relation} --> {neighbor}")
                
                if not found:
                    print(f"WARNING: No neighbors found for {seed} in the database.")
                    results[seed] = []
                    continue

                # DDIs and Side-Effects for neighbors
                for n in neighbors:
                    # DDIs
                    try:
                        ddi_query = """
                        MATCH (n {Entity: $n})-[r]-(partner)
                        WHERE r.Relationship = $ddi_rel
                        RETURN n.Entity AS neighbor, r.Relationship AS relation, partner.Entity AS partner
                        """
                        ddi_records = session.run(ddi_query, n=n, ddi_rel=DDI_REL)
                        for rec in ddi_records:
                            chunks.append(f"{rec['neighbor']} -- {rec['relation']} --> {rec['partner']}")
                    except Exception as e:
                        print(f"Error querying DDIs for {n}: {e}")

                    # Side-effects
                    try:
                        se_query = f"""
                        MATCH (n {{Entity: $n}})-[r]-(se)
                        WHERE {' OR '.join([f"r.Relationship=$pat{i}" for i in range(len(SE_SIDE_REL_PATTERNS))])}
                        RETURN n.Entity AS neighbor, r.Relationship AS relation, se.Entity AS se_entity
                        """
                        se_params = {"n": n}
                        for i, pat in enumerate(SE_SIDE_REL_PATTERNS):
                            se_params[f"pat{i}"] = pat
                        se_records = session.run(se_query, **se_params)
                        for rec in se_records:
                            chunks.append(f"{rec['neighbor']} -- {rec['relation']} --> {rec['se_entity']}")
                    except Exception as e:
                        print(f"Error querying side-effects for {n}: {e}")

                results[seed] = list(dict.fromkeys(chunks))  # deduplicate
                print(f"Found {len(results[seed])} facts for {seed}.")

            except Exception as e:
                print(f"ERROR: Failed to query {seed}: {e}")
                results[seed] = []

    driver.close()
    return results

In [55]:
normalized_drugs = [normalize_drug(drug) for drug in get_medicines_from_string(medicines[3])]
normalized_drugs

ALLEGRA
AMBROLITE
AZITHRAL
LOOZ
PEGURA
ZINCOVIT
OROFER
MONTAIR
LEVOSALBUTAMOL


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 'Compound::CHEMBL1200681',
 'Compound::CHEMBL1002']

In [56]:
interaction_results = query_drug_interactions_neo4j(normalized_drugs)

for drug, facts in interaction_results.items():
    print(f"\n=== Drug: {drug} ===")
    print(f"Found {len(facts)} interaction/side-effect facts")
    for f in facts[:10]:  # print first 10 for brevity
        print("-", f)


Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: None ...

Querying 1-hop neighbors for: Compound::CHEMBL1200681 ...
Found 1 facts for Compound::CHEMBL1200681.

Querying 1-hop neighbors for: Compound::CHEMBL1002 ...

=== Drug: None ===
Found 0 interaction/side-effect facts

=== Drug: Compound::CHEMBL1200681 ===
Found 1 interaction/side-effect facts
- Compound::CHEMBL1200681 -- DGIDB::ANTAGONIST::Gene:Compound --> Gene::10800

=== Drug: Compound::CHEMBL1002 ===
Found 0 interaction/side-effect facts
