# Initializing the Guide to Pharmacology Database

---



## Setup and Data Loading

First, let's review the setup and data loading process:

In [None]:
!pip install -q rdflib-neo4j openpyxl

In [None]:
from rdflib_neo4j import Neo4jStoreConfig
from rdflib_neo4j import HANDLE_VOCAB_URI_STRATEGY
# from google.colab import userdata

In [None]:
import os

# NEO_DB_URI = 'bolt://172.18.176.1:7687'
NEO_DB_URI = os.getenv('NEO4J_LCL_URI')
NEO_DB_USERNAME = os.getenv('NEO4J_USERNAME')
NEO_DB_PWD = os.getenv('NEO4J_LCL_PASSWORD')


In [None]:
auth_data = {'uri': NEO_DB_URI,
             'database': "neo4j",
             'user': NEO_DB_USERNAME,
             'pwd': NEO_DB_PWD}

## Define namespaces and configuration


In [None]:
from rdflib import Namespace

prefixes = {
    'gtpo': Namespace('https://rdf.guidetopharmacology.org/ns/gtpo#'),
    'grac': Namespace('https://rdf.guidetopharmacology.org/GRAC/'),
    'cito': Namespace('http://purl.org/spar/cito/'),
    'dcat': Namespace('http://www.w3.org/ns/dcat#'),
    'dctypes': Namespace('http://purl.org/dc/dcmitype/'),
    'dct': Namespace('http://purl.org/dc/terms/'),
    'foaf': Namespace('http://xmlns.com/foaf/0.1/'),
    'freq': Namespace('http://purl.org/cld/freq/'),
    'idot': Namespace('http://identifiers.org/idot/'),
    'lexvo': Namespace('http://lexvo.org/ontology#'),
    'pav': Namespace('http://purl.org/pav/'),
    'prov': Namespace('http://www.w3.org/ns/prov#'),
    'rdf': Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
    'rdfs': Namespace('http://www.w3.org/2000/01/rdf-schema#'),
    'schemaorg': Namespace('http://schema.org/'),
    'sd': Namespace('http://www.w3.org/ns/sparql-service-description#'),
    'sio': Namespace('http://semanticscience.org/resource/'),
    'void': Namespace('http://rdfs.org/ns/void#'),
    'void-ext': Namespace('http://ldf.fi/void-ext#'),
    'xsd': Namespace('http://www.w3.org/2001/XMLSchema#'),
    # Add other required prefixes based on your data inspection
}

In [None]:
config = Neo4jStoreConfig(auth_data=auth_data,
                          custom_prefixes=prefixes,
                          handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE,
                          batching=True)

## Load the ontology data

In [None]:
from rdflib_neo4j import Neo4jStore
from rdflib import Graph
file_path = 'https://www.guidetopharmacology.org/DATA/rdf/2024.3/gtp-rdf.n3'

graph_store = Graph(store=Neo4jStore(config=config))
graph_store.parse(file_path,format="nt")
graph_store.close(True)

This setup process installs the necessary libraries, defines authentication data, sets up namespaces, and loads the Guide to Pharmacology ontology data into a Neo4j graph database.

## Querying the Guide to Pharmacology data

Now, let's explore the GtoP database data using a Cypher query:

In [None]:
from neo4j import GraphDatabase

# Initialize Neo4j driver
driver = GraphDatabase.driver(
    NEO_DB_URI,
    auth=(NEO_DB_USERNAME, NEO_DB_PWD)
)


#### Setup reused functions

In [None]:
def run_cypher_query(query):
    with driver.session(database="neo4j") as session:
        result = session.run(query)
        # Fetch all results and convert them into a list of dictionaries
        return [record.data() for record in result]

In [None]:
cypher_query = """
WITH ["EnzymeTargetFamily",
      "OtherProteinTargetFamily",
      "TransporterTargetFamily",
      "GProteinCoupledReceptorTargetFamily",
      "CatalyticReceptorTargetFamily",
      "LigandGatedIonChannelTargetFamily",
      "VoltageGatedIonChannelTargetFamily",
      "NuclearHormoneReceptorTargetFamily",
      "OtherIonChannelTargetFamily",
      "GroupingTargetFamily",
      "LigandTargetFamily"] AS targetFamilyTypes,

     ["SyntheticOrganicLigand",
      "NaturalProductLigand",
      "PeptideLigand",
      "MetaboliteLigand",
      "AntibodyLigand",
      "InorganicLigand"] AS ligandTypes,

     ["AntagonistInteraction",
      "AgonistInteraction",
      "ActivatorInteraction",
      "Interaction",
      "InhibitorInteraction",
      "AntibodyInteraction",
      "ChannelBlockerInteraction",
      "AllostericModulatorInteraction",
      "GatingInhibitorInteraction",
      "SubunitSpecificInteraction"] AS interactionTypes

UNWIND targetFamilyTypes AS targetFamilyType
UNWIND ligandTypes AS ligandType
UNWIND interactionTypes AS interactionType

CALL apoc.cypher.run("
    MATCH (tf:`" + targetFamilyType + "` {label: $label})<-[r1:hasTargetFamily]-(t:Target)
    MATCH (i:`" + interactionType + "`)-[r2:hasTarget]->(t)
    MATCH (i)-[r3:hasLigand]->(l:`" + ligandType + "`)
    RETURN tf, r1, t, r2, i, r3, l
    LIMIT 30
", {label: "STE7 family"}) YIELD value

RETURN value.tf AS tf, value.r1 AS r1, value.t AS t, value.r2 AS r2, value.i AS i, value.r3 AS r3, value.l AS l
"""

In [None]:
from pprint import pprint

# Execute the query and display the results
results = run_cypher_query(cypher_query)
pprint(results)

This query retrieves information about the STE7 target family, including related targets, interactions, and ligands.

### Querying for a list of drug compounds

In [None]:
cypher_query = """
MATCH (l:Resource)
WHERE l.ligandName IN ['RGFP966', 'belinostat', 'ricolinostat', 'nexturastat A', 'pyroxamide', 'entinostat', 'resminostat', 'panobinostat', 'R306465', 'M 344', 'AR-42']
MATCH path = (l)-[:hasLigand]-(i)-[:hasTarget]-(t)-[:hasTargetFamily]-(tf)
    OPTIONAL MATCH (i)-[:hasAction]-(a)
    OPTIONAL MATCH (i)-[:hasAffinity]-(af)
    OPTIONAL MATCH (i)-[:hasReference]-(r)
    OPTIONAL MATCH (l)-[:xref]-(xr)
RETURN path, a, af, r
"""

In [None]:
# Execute the query and display the results
results = run_cypher_query(cypher_query)

In [None]:
from pprint import pprint

pprint(results)

In [None]:
cypher_query_hdac = """
// HDAC Inhibitors
MATCH (l:Resource)
WHERE l.ligandName IN ['RGFP966', 'belinostat', 'ricolinostat', 'nexturastat A', 'pyroxamide', '5-nitroso-8-quinolinol', 'entinostat', 'resminostat', 'panobinostat', 'R306465', 'M 344', 'AR-42']
MATCH path = (l)-[:hasLigand]-(i)-[:hasTarget]-(t)-[:hasTargetFamily]-(tf)
    OPTIONAL MATCH (i)-[:hasAction]-(a)
    OPTIONAL MATCH (i)-[:hasAffinity]-(af)
    OPTIONAL MATCH (i)-[:hasReference]-(r)
    OPTIONAL MATCH (l)-[:xref]-(xr)
    OPTIONAL MATCH (af)-[:hasUnits]-(afu)
RETURN 
    l.label as Compound,
    l.approved as FDA_Approved,
    t.label as Target,
    tf.label as Target_Family,
    a.uri as Action,
    af.hasMedianValue as Affinity_Median,
    af.hasLowValue as Affinity_Low,
    af.hasHighValue as Affinity_High,
    afu.uri as Affinity_Units,
    r.uri as Reference,
    l.comment as Description,
    l.inChIKey as InChIKey,
    l.canonicalSMILES as SMILES,
    xr.uri as CHEMBL
ORDER BY l.ligandName, t.nomenclature
"""

cypher_query_cdk = """
// CDK Inhibitors
MATCH (l:Resource)
WHERE l.ligandName IN ['indisulam', 'dinaciclib', 'flavopiridol', 'Bms-265246', 'AT7519', 'PHA-793887', 'riviciclib', 'R-547', '7-hydroxystaurosporine', 'SB1317/TG02', 'Sns-032', 'AZD5438']
MATCH path = (l)-[:hasLigand]-(i)-[:hasTarget]-(t)-[:hasTargetFamily]-(tf)
    OPTIONAL MATCH (i)-[:hasAction]-(a)
    OPTIONAL MATCH (i)-[:hasAffinity]-(af)
    OPTIONAL MATCH (i)-[:hasReference]-(r)
    OPTIONAL MATCH (l)-[:xref]-(xr)
    OPTIONAL MATCH (af)-[:hasUnits]-(afu)
RETURN 
    l.label as Compound,
    l.approved as FDA_Approved,
    t.label as Target,
    tf.label as Target_Family,
    a.uri as Action,
    af.hasMedianValue as Affinity_Median,
    af.hasLowValue as Affinity_Low,
    af.hasHighValue as Affinity_High,
    afu.uri as Affinity_Units,
    r.uri as Reference,
    l.comment as Description,
    l.inChIKey as InChIKey,
    l.canonicalSMILES as SMILES,
    xr.uri as CHEMBL
ORDER BY l.ligandName, t.nomenclature
"""

cypher_query_aurk = """
// AURK Inhibitors
MATCH (l:Resource)
WHERE l.ligandName IN ['AZD1152-HQPA', 'tozasertib', 'SNS-314 mesylate', 'AMG 900', 'ZM-447439', 'ENMD-2076', 'CYC116', 'alisertib', 'PF-03814735', 'CCT129202', 'CCT137690']
MATCH path = (l)-[:hasLigand]-(i)-[:hasTarget]-(t)-[:hasTargetFamily]-(tf)
    OPTIONAL MATCH (i)-[:hasAction]-(a)
    OPTIONAL MATCH (i)-[:hasAffinity]-(af)
    OPTIONAL MATCH (i)-[:hasReference]-(r)
    OPTIONAL MATCH (l)-[:xref]-(xr)
    OPTIONAL MATCH (af)-[:hasUnits]-(afu)
RETURN 
    l.label as Compound,
    l.approved as FDA_Approved,
    t.label as Target,
    tf.label as Target_Family,
    a.uri as Action,
    af.hasMedianValue as Affinity_Median,
    af.hasLowValue as Affinity_Low,
    af.hasHighValue as Affinity_High,
    afu.uri as Affinity_Units,
    r.uri as Reference,
    l.comment as Description,
    l.inChIKey as InChIKey,
    l.canonicalSMILES as SMILES,
    xr.uri as CHEMBL
ORDER BY l.ligandName, t.nomenclature
"""

In [None]:
import pandas as pd

# Execute the query and display the results
results_hdac = run_cypher_query(cypher_query_hdac)
results_cdk = run_cypher_query(cypher_query_cdk)
results_aurk = run_cypher_query(cypher_query_aurk)

df_hdac = pd.DataFrame(results_hdac)
df_cdk = pd.DataFrame(results_cdk)
df_aurk = pd.DataFrame(results_aurk)

In [None]:
df_hdac

In [None]:
df_cdk

In [None]:
df_cdk

In [None]:
# export each DataFrame to a xlsx file
df_hdac.to_excel('hdac_inhibitors_data.xlsx', index=False)
df_cdk.to_excel('cdk_inhibitors_data.xlsx', index=False)
df_aurk.to_excel('aurk_inhibitors_data.xlsx', index=False)

In [None]:
driver.close()

### NOTES:  additional tools

#### Nulling out a Neo4j database

In [None]:
# uncomment and use only when required - a quick way to clear the database

# cypher_query = """
# MATCH (n) DETACH DELETE n
# """

# results = run_cypher_query(cypher_query)
# driver.close()


## Citation

**BibTeX:**

```
@article{10.1093/nar/gkad944,
    author = {Harding, Simon D and Armstrong, Jane F and Faccenda, Elena and Southan, Christopher and Alexander, Stephen P H and Davenport, Anthony P and Spedding, Michael and Davies, Jamie A},
    title = "{The IUPHAR/BPS Guide to PHARMACOLOGY in 2024}",
    journal = {Nucleic Acids Research},
    volume = {52},
    number = {D1},
    pages = {D1438-D1449},
    year = {2023},
    month = {10},
    abstract = "{The IUPHAR/BPS Guide to PHARMACOLOGY (GtoPdb; https://www.guidetopharmacology.org) is an open-access, expert-curated, online database that provides succinct overviews and key references for pharmacological targets and their recommended experimental ligands. It includes over 3039 protein targets and 12 163 ligand molecules, including approved drugs, small molecules, peptides and antibodies. Here, we report recent developments to the resource and describe expansion in content over the six database releases made during the last two years. The database update section of this paper focuses on two areas relating to important global health challenges. The first, SARS-CoV-2 COVID-19, remains a major concern and we describe our efforts to expand the database to include a new family of coronavirus proteins. The second area is antimicrobial resistance, for which we have extended our coverage of antibacterials in partnership with AntibioticDB, a collaboration that has continued through support from GARDP. We discuss other areas of curation and also focus on our external links to resources such as PubChem that bring important synergies to the resources.}",
    issn = {0305-1048},
    doi = {10.1093/nar/gkad944},
    url = {https://doi.org/10.1093/nar/gkad944},
    eprint = {https://academic.oup.com/nar/article-pdf/52/D1/D1438/55039511/gkad944.pdf},
}
```