# GTRx
- from API url listed in [github issue](https://github.com/TranslatorSRI/Benchmarks/issues/20)
    - [API documentation](https://github.com/biothings/GTRx/tree/3bd9d1cfb63056dc455e5770b52690894d1ba7c9/)
- Inxight IDs are the same as UNII according to [this comment](https://github.com/NCATS-Tangerine/translator-api-registry/pull/109#issuecomment-1367609032)

In [1]:
import pandas as pd
import requests

In [2]:
url = 'https://biothings.ncats.io/gtrx/query?q=predicate:treated_by&fetch_all=True'
data = requests.get(url)
drugs = data.json()
gtrx = drugs['hits']
#gtrx

In [3]:
df = pd.DataFrame(columns=['_id', 'drug_inxight', 'disease_omim', 'drug_name', 'disease_name', 'nref'])

for entry in gtrx:
    # association id
    _id = entry['_id']
    
    # drug info
    # all drug names/ids hyphenated if multiple entries
    drug_name = ''
    drug_inxight = ''
    for drug in entry['object']['intervention']:
        drug_name += (drug['description'] + '-')
        drug_inxight += ('UNII:' + drug['inxight'] + '-')
        
    drug_name = drug_name[:-1]
    drug_inxight = drug_inxight[:-1]
    
    # disease info
    disease_name = entry['subject']['condition_name']
    #disease_name = ','.join(entry['subject']['alternate_names'])
    disease_omim = 'OMIM:' + entry['subject']['omim']
    
    # number of references
    nref = len(entry['references'])
    
    new_record = {
        '_id': _id,
        'drug_name': drug_name,
        'drug_inxight': drug_inxight,
        'disease_name': disease_name,
        'disease_omim': disease_omim,
        'nref': nref
    }
    df.loc[len(df)] = new_record

df

Unnamed: 0,_id,drug_inxight,disease_omim,drug_name,disease_name,nref
0,10031-OMIM:250250-66Y330CJHS,UNII:66Y330CJHS,OMIM:250250,Human immunoglobulin G,CARTILAGE-HAIR HYPOPLASIA; CHH,20
1,10031-OMIM:250250-H1250JIK0A,UNII:H1250JIK0A,OMIM:250250,Clarithromycin,CARTILAGE-HAIR HYPOPLASIA; CHH,20
2,10031-OMIM:250250-MZ1IW7Q79D,UNII:MZ1IW7Q79D,OMIM:250250,Valaciclovir,CARTILAGE-HAIR HYPOPLASIA; CHH,20
3,10301-OMIM:612562-J06Y7MXW4D-V8G4MOF2V9-2BTY8K...,UNII:J06Y7MXW4D-UNII:V8G4MOF2V9-UNII:2BTY8KH53L,OMIM:612562,Deferoxamine-Deferasirox-Deferiprone,DIAMOND-BLACKFAN ANEMIA 7,80
4,1037-OMIM:612924-A3ULP0F556,UNII:A3ULP0F556,OMIM:612924,Eculizumab,"HEMOLYTIC UREMIC SYNDROME, ATYPICAL, SUSCEPTIB...",8
...,...,...,...,...,...,...
690,9832-OMIM:603554-T5UM7HB19N,UNII:T5UM7HB19N,OMIM:603554,Hypochlorite,OMENN SYNDROME,22
691,9863-OMIM:616326-19QM69HH21-L16PUN799N-3982TWQ96G,UNII:19QM69HH21-UNII:L16PUN799N-UNII:3982TWQ96G,OMIM:616326,Pyridostigmine-Ambenonium-Neostigmine,"MYASTHENIC SYNDROME, CONGENITAL, 11, ASSOCIATE...",9
692,9863-OMIM:616326-GN83C131XS,UNII:GN83C131XS,OMIM:616326,Ephedrine,"MYASTHENIC SYNDROME, CONGENITAL, 11, ASSOCIATE...",9
693,9863-OMIM:616326-QF8SVZ843E,UNII:QF8SVZ843E,OMIM:616326,Salbutamol,"MYASTHENIC SYNDROME, CONGENITAL, 11, ASSOCIATE...",9


In [4]:
# steps from DrugCentral_creative get_indications.py

# sort
df = df.sort_values('_id')

df.to_csv("data_full.tsv", sep="\t", index=False)

# remove interventions with multiple drugs
df = df[~df['drug_inxight'].str.contains('-')]

# restrict to only 1 drug per disease
df = df.drop_duplicates('disease_omim', keep=False)
df

Unnamed: 0,_id,drug_inxight,disease_omim,drug_name,disease_name,nref
4,1037-OMIM:612924-A3ULP0F556,UNII:A3ULP0F556,OMIM:612924,Eculizumab,"HEMOLYTIC UREMIC SYNDROME, ATYPICAL, SUSCEPTIB...",8
5,10483-OMIM:145600-F64QU97QCR,UNII:F64QU97QCR,OMIM:145600,Dantrolene,"MALIGNANT HYPERTHERMIA, SUSCEPTIBILITY TO, 1",1
35,10591-OMIM:168300-K94FTS1806,UNII:K94FTS1806,OMIM:168300,Flecainide,PARAMYOTONIA CONGENITA OF VON EULENBURG; PMC,12
52,10593-OMIM:601144-L628TT009W,UNII:L628TT009W,OMIM:601144,Isoprenaline,BRUGADA SYNDROME 1; BRGDA1,10
56,10593-OMIM:608567-N7Z035406B,UNII:N7Z035406B,OMIM:608567,Cilostazol,SICK SINUS SYNDROME 1; SSS1,11
...,...,...,...,...,...,...
660,8979-OMIM:615214-66Y330CJHS,UNII:66Y330CJHS,OMIM:615214,Human immunoglobulin G,"AGAMMAGLOBULINEMIA 7, AUTOSOMAL RECESSIVE",3
663,9115-OMIM:212065-O26FZP769L,UNII:O26FZP769L,OMIM:212065,Lorazepam,"CONGENITAL DISORDER OF GLYCOSYLATION, TYPE Ia;...",3
670,9413-OMIM:615966-66Y330CJHS,UNII:66Y330CJHS,OMIM:615966,Human immunoglobulin G,IMMUNODEFICIENCY 26 WITH OR WITHOUT NEUROLOGIC...,65
673,9577-OMIM:614023-452VLY9402,UNII:452VLY9402,OMIM:614023,Serine,PHOSPHOSERINE PHOSPHATASE DEFICIENCY,5


In [5]:
data_tsv = df.sample(50, random_state=3000)
data_tsv.to_csv("data.tsv", sep="\t", index=False)
data_tsv

Unnamed: 0,_id,drug_inxight,disease_omim,drug_name,disease_name,nref
122,11429-OMIM:603552-6PLQ3CP4P3,UNII:6PLQ3CP4P3,OMIM:603552,Etoposide,"HEMOPHAGOCYTIC LYMPHOHISTIOCYTOSIS, FAMILIAL, ...",6
575,704-OMIM:617718-66Y330CJHS,UNII:66Y330CJHS,OMIM:617718,Human immunoglobulin G,IMMUNODEFICIENCY 71 WITH INFLAMMATORY DISEASE ...,3
115,11117-OMIM:253400-5Z9SP3X666,UNII:5Z9SP3X666,OMIM:253400,Nusinersen,"SPINAL MUSCULAR ATROPHY, TYPE III",5
273,19750-OMIM:243150-66Y330CJHS,UNII:66Y330CJHS,OMIM:243150,Human immunoglobulin G,GASTROINTESTINAL DEFECTS AND IMMUNODEFICIENCY ...,10
155,1366-OMIM:274150-U5SFU33XUX,UNII:U5SFU33XUX,OMIM:274150,Apadamtase alfa,"THROMBOTIC THROMBOCYTOPENIC PURPURA, HEREDITARY",25
465,4696-OMIM:253220-7XZ4062R17,UNII:7XZ4062R17,OMIM:253220,Vestronidase alfa,"MUCOPOLYSACCHARIDOSIS, TYPE VII",5
324,2578-OMIM:306400-21K6M2I7AG,UNII:21K6M2I7AG,OMIM:306400,Interferon gamma-1b,"GRANULOMATOUS DISEASE, CHRONIC, X-LINKED; CGDX",77
329,2591-OMIM:202010-WI4X0X7BPJ,UNII:WI4X0X7BPJ,OMIM:202010,Hydrocortisone,"ADRENAL HYPERPLASIA, CONGENITAL, DUE TO STEROI...",5
526,5964-OMIM:613148-B72HH48FLU,UNII:B72HH48FLU,OMIM:613148,Infliximab,"INFLAMMATORY BOWEL DISEASE 28, AUTOSOMAL RECES...",42
408,3551-OMIM:306900-27Y83O992Q,UNII:27Y83O992Q,OMIM:306900,Nonacog beta pegol,HEMOPHILIA B,23
