In [1]:
COV_disease_list = [
"DOID:0060500",
"DOID:0050744",
"DOID:0050771",
"MeSH:D006520",
]

In [2]:
drug_list = [
"HGNC:42",
"HGNC:51",
"HGNC:53",
"HGNC:54",
"HGNC:55",
"HGNC:56",
"HGNC:59",
"HGNC:60",
"HGNC:52",
"HGNC:61",
"HGNC:66",
"HGNC:67",
"HGNC:68",
"HGNC:69",
"HGNC:70",
"HGNC:71",
"HGNC:72",
"HGNC:73",
"HGNC:74",
"HGNC:76",
"HGNC:77",
"HGNC:78",
"HGNC:79",
"HGNC:80",
"HGNC:81",
]

In [3]:
treatment = [
"Semantic_Relation_GG",     
"Inferred_Relation_DDi",  
"Inferred_Relation_DiG",   
]

In [4]:
entity_idmap_file = 'data/entities.tsv'
relation_idmap_file = 'data/relations.tsv'

# Get embeddings for diseases and drugs 

In [5]:
# Get drugname/disease name to entity ID mappings
import csv
entity_map = {}
entity_id_map = {}
relation_map = {}
with open(entity_idmap_file, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=['id','name'])
    for row_val in reader:
        #print(row_val)
        entity_map[row_val['name']] = int(row_val['id'])
        entity_id_map[int(row_val['id'])] = row_val['name']
        
with open(relation_idmap_file, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=['id','name'])
    for row_val in reader:
        relation_map[row_val['name']] = int(row_val['id'])
        
# handle the ID mapping
drug_ids = []
disease_ids = []
for drug in drug_list:
    drug_ids.append(entity_map[drug])
    
for disease in COV_disease_list:
    disease_ids.append(entity_map[disease])

treatment_rid = [relation_map[treat]  for treat in treatment]

In [6]:
treatment_rid

[4, 1, 0]

In [7]:
# Load embeddings
import torch as th
import pandas as pd
import numpy as np
import sys
#TransR
entity_emb = np.load('ckpts/TransR_TONY_2/TONY_TransR_entity.npy')
rel_emb = np.load('ckpts/TransR_TONY_2/TONY_TransR_relation.npy')
proj_emb = np.load('ckpts/TransR_TONY_2/TONY_TransRprojection.npy')
proj_emb = th.tensor(proj_emb)


#ComplEx
# entity_emb = np.load('ckpts/ComplEx_TONY_2/TONY_ComplEx_entity.npy')
# rel_emb = np.load('ckpts/ComplEx_TONY_2/TONY_ComplEx_relation.npy')

#RotatE
# entity_emb = np.load('ckpts/RotatE_TONY_4/TONY_RotatE_entity.npy')
# rel_emb = np.load('ckpts/RotatE_TONY_4/TONY_RotatE_relation.npy')

drug_ids = th.tensor(drug_ids).long()
disease_ids = th.tensor(disease_ids).long()
treatment_rid = th.tensor(treatment_rid) 

drug_emb = th.tensor(entity_emb[drug_ids])
disease_emb_test = th.tensor(entity_emb[disease_ids])
treatment_embs = [th.tensor(rel_emb[rid]) for rid in treatment_rid]


In [10]:
import torch.nn.functional as fn
from score_functions import *

scores_per_disease = []
dids = []
for rid in range(len(treatment_embs)):
    treatment_emb=treatment_embs[rid]
    for disease_id in disease_ids:
        disease_emb = entity_emb[disease_id]
        score = fn.logsigmoid(transR(drug_emb, treatment_emb, disease_emb, proj_emb, treatment_rid[rid]))
#         score = fn.logsigmoid(transE_l2(drug_emb, treatment_emb, disease_emb))
#         score = fn.logsigmoid(complEx(drug_emb, treatment_emb, disease_emb))
#         score = fn.logsigmoid(rotatE(drug_emb, treatment_emb, disease_emb))
        scores_per_disease.append(score)
        dids.append(drug_ids)
scores = th.cat(scores_per_disease)
dids = th.cat(dids)


In [11]:
# sort scores in decending order
idx = th.flip(th.argsort(scores), dims=[0])
scores = scores[idx].numpy()
dids = dids[idx].numpy()

In [12]:
_, unique_indices = np.unique(dids, return_index=True)
topk=10
topk_indices = np.sort(unique_indices)[:topk]
proposed_dids = dids[topk_indices]
proposed_scores = scores[topk_indices]

In [13]:
proposed_dids[0]

5458

In [14]:
for i in range(topk):
    drug = int(proposed_dids[i])
    score = proposed_scores[i]
    
    print("{}\t{}".format(entity_id_map[drug], score))

HGNC:74	-0.006118614226579666
HGNC:55	-0.007556545082479715
HGNC:61	-0.014218335039913654
HGNC:42	-0.01582128368318081
HGNC:56	-0.016762128099799156
HGNC:79	-0.016790613532066345
HGNC:53	-0.02067175693809986
HGNC:76	-0.02750255912542343
HGNC:59	-0.03474847972393036
HGNC:67	-0.05467724800109863


In [15]:
clinical_drugs_file = './COVID19_clinical_trial_drugs.tsv'
clinical_drug_map = {}
with open(clinical_drugs_file, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=['id', 'drug_name','drug_id'])
    for row_val in reader:
        clinical_drug_map[row_val['drug_id']] = row_val['drug_name']
        
for i in range(topk):
    drug = entity_id_map[int(proposed_dids[i])][10:17]
    if clinical_drug_map.get(drug, None) is not None:
        score = proposed_scores[i]
        print("[{}]\t{}\t{}".format(i, clinical_drug_map[drug],score , proposed_scores[i]))

FileNotFoundError: [Errno 2] No such file or directory: './COVID19_clinical_trial_drugs.tsv'