## Imports

In [2]:
import pandas as pd
import json


### DD-DB (Disease Drug-Database) (2017) <br>
Input: Disease (NDF RT Notation) <br>
Output: Drug (SNOMED Notation) <br>
(484 diseases; 324 drugs; 3589 interactions) <br>
*(There has to be a better one out there, will ask Dr. Narhi) <br>


In [5]:
DDDB_PATH = "./Data/DDDB/NIHMS851432-supplement-1.csv"
DDDB = pd.read_csv(DDDB_PATH)
print(DDDB.head(5))

        NDF-RT     SNOMED
0  N0000004713   26929004
1  N0000004713   56267009
2  N0000004713   80098002
3  N0000004713  386806002
4  N0000004713  425390006


In [31]:
import requests

def get_drug_name_from_snomed(snomed_code):
    # Base URL for the RXNorm API
    base_url = "https://rxnav.nlm.nih.gov/REST"

    # URL for resolving SNOMED code to RXNorm concept
    url = f"{base_url}/rxcui?query={snomed_code}"

    # Make the API request
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        
        # Extract the drug names from the API response
        if 'rxnormGroup' in data:
            drug_names = [entry['name'] for entry in data['rxnormGroup']['rxnormConcept']]
            return drug_names
        else:
            return f"No drug found for SNOMED code {snomed_code}"
    else:
        return f"Error: {response.status_code}"

# Example usage
snomed_code = "26929004"  # Replace with your SNOMED code
drug_names = get_drug_name_from_snomed(snomed_code)
print(f"Drug name(s) for SNOMED code {snomed_code}: {drug_names}")


ImportError: cannot import name 'get_host' from 'urllib3.util.url' (c:\Users\richa\Desktop\CodingWorkspaces\DGI-Hypergraph\datatestingvenv\lib\site-packages\urllib3\util\url.py)

### DGI-DB (Drug Gene Interaction-Database) (2024) <br>
Input: Drug (Common Name) <br>
Output: Gene Name (Common Name) <br>

In [16]:
DGIDB_PATH = "./Data/DGIDB/interactions.tsv"
DGIDB = pd.read_csv(DGIDB_PATH, sep="\t")
print(DGIDB.head(2))

  gene_claim_name gene_concept_id gene_name interaction_source_db_name  \
0          CYP2D6       hgnc:2625    CYP2D6                        DTC   
1           PPARG       hgnc:9236     PPARG                        DTC   

  interaction_source_db_version interaction_type  interaction_score  \
0                        9/2/20              NaN           0.017709   
1                        9/2/20              NaN           0.840123   

       drug_claim_name       drug_concept_id             drug_name approved  \
0           RACLOPRIDE          ncit:C152139            RACLOPRIDE    False   
1  KALOPANAX-SAPONIN F  chembl:CHEMBL1833984  CHEMBL:CHEMBL1833984    False   

  immunotherapy anti_neoplastic  
0         False           False  
1         False           False  


In [18]:
# Filter the rows where drug_name is 'lithium'
# lithium is a common bipolar drug
lithium_rows = DGIDB[DGIDB['drug_name'].str.contains('lithium', case=False, na=False)]
# Print the first 2 rows
print(lithium_rows["gene_claim_name"])

439            FAM178B
2722            CACNG2
2781             ASIC2
2782             SH2B1
2784            RABEP1
5820             SCN5A
8058             CREB1
8059             ADCY1
8063            OR52J3
12496            GADL1
12497              BCR
14152             TPH1
17463             CRY1
18800            ADCY2
20163         FAM177A1
20166           OR52E2
22999            MYO1H
24290            EPHX2
24519            HTR1B
25719            ABCB1
27210          GRAMD1B
27215          OR52J2P
27218            FKBP5
28558            HTR2A
30355            INPP1
36007         TNFRSF1B
38315    NCBIGENE:2030
40403             DRD2
40432            GNRH1
40482              MPO
41950             GFAP
42902            HSPA4
42950            NTRK2
45482              NTS
46330    NCBIGENE:1463
47200             LMO2
48267              FAS
48291             IL1B
48322           NFE2L2
51295           MAPK14
52626              MYC
52969             BDNF
54206           CAMK2G
54311      

In [19]:
from Bio import Entrez

def get_ncbi_gene_id(gene_claim_name):
    # Set your email address for Entrez (NCBI API)
    Entrez.email = "richard_huang@ucsb.edu"
    
    # Search for the gene using the gene_claim_name
    handle = Entrez.esearch(db="gene", term=gene_claim_name, retmax=1)
    record = Entrez.read(handle)
    
    # Extract the gene ID from the search results
    if record['IdList']:
        gene_id = record['IdList'][0]
        return gene_id
    else:
        return None

# Example usage
gene_claim_name = "FAM178B"
gene_id = get_ncbi_gene_id(gene_claim_name)
print(f"NCBI Gene ID for {gene_claim_name}: {gene_id}")


NCBI Gene ID for FAM178B: 90678


### Human Net (Gene - Gene dataset) (2022) <br>
Input: Gene (NCBI Notation) <br>
Output: Gene (NCBI Notation) <br>
Used in DriverRWH

In [3]:
HUMANNET_PATH = "./Data/HumanNet/HumanNet-GSP.tsv"
HUMANNET = pd.read_csv(HUMANNET_PATH, sep="\t")
print(HUMANNET.head(5))

   Gene1  Gene2
0  54936   7405
1  10298    306
2   6366   6370
3  11021  51199
4   4940   8372


In [7]:
FAM_GENE_ROWS = HUMANNET[HUMANNET['Gene1'].astype(str) == '1838']
print(FAM_GENE_ROWS)


Empty DataFrame
Columns: [Gene1, Gene2]
Index: []


### MSigDB (Molecular Signatures Database) <br>
Input: Any given pathway or condition leading to a pathway <br>
Output: Array of genes related to that pathway


In [9]:
MSIGDB_PATH = "Data/MSigDB/c2.all.v2024.1.Hs.json"
with open(MSIGDB_PATH, 'r') as file:
    MSIGDB = json.load(file)
print(MSIGDB["PETRETTO_BLOOD_PRESSURE_UP"]["geneSymbols"])
# keys = MSIGDB.keys()
# print(list(keys))

['CHIT1', 'EMC3', 'FOXC2', 'MAP1A', 'MUSK', 'MYO1E', 'NOVA2', 'NTRK1', 'NXPH4', 'RHOV', 'RYR2', 'ZBTB7A']


In [4]:
NCBI_PATH = "Data/NIH_NCBI/gene_info.gz"
df = pd.read_csv(NCBI_PATH, compression="gzip", sep="\t")


  interactivity=interactivity, compiler=compiler, result=result)


In [8]:
print(df.keys())


Index(['#tax_id', 'GeneID', 'Symbol', 'LocusTag', 'Synonyms', 'dbXrefs',
       'chromosome', 'map_location', 'description', 'type_of_gene',
       'Symbol_from_nomenclature_authority',
       'Full_name_from_nomenclature_authority', 'Nomenclature_status',
       'Other_designations', 'Modification_date', 'Feature_type'],
      dtype='object')


In [10]:
symbols = df["Symbol"].unique()

In [12]:
gsk3b_row = df[df['Symbol'] == 'GSK3B']

In [14]:
print(gsk3b_row.head())

          #tax_id     GeneID Symbol LocusTag Synonyms  \
8035671      7888  122801850  GSK3B        -        -   
10233591     8296  138525726  GSK3B        -        -   
10294486     8319  138248789  GSK3B        -        -   
10366969     8345  128652194  GSK3B        -        -   
10473646     8384  120994338  GSK3B        -        -   

                                  dbXrefs chromosome map_location  \
8035671                                 -    5.part0            -   
10233591                                -         7p            -   
10294486                                -          8            -   
10366969                                -          3            -   
10473646  EnsemblRapid:ENSBBFG00005005729          3            -   

                              description    type_of_gene  \
8035671   glycogen synthase kinase 3 beta  protein-coding   
10233591  glycogen synthase kinase 3 beta  protein-coding   
10294486  glycogen synthase kinase 3 beta  protein-coding  