# GUI test

In [3]:
import numpy as np
from numpy import random as rnd
from matplotlib import pyplot as plt
import pandas as pd

In [4]:
import tkinter as tk
from tkinter.ttk import *

## Creation of toy data
Embeddings and weight matrices. Replace this section with real data when real simulations finish.

In [5]:
n_drugs = 20 # Number of drugs
n_se = 20 # Number of polypharmacy side effects
dim = 32 # Dimension of embeddings

In [6]:
side_eff = pd.read_csv('../../Thesis_datasets/Decagon/bio-decagon-effectcategories.csv',\
                 usecols=[0,1],index_col=False)
side_eff = side_eff.sample(n_se)
side_eff

Unnamed: 0,Side Effect,Side Effect Name
401,C0027947,Neutropenia
29,C0476089,Endometrial cancer
380,C0699791,Gastric Cancer
21,C0038019,spondylosis
468,C0020456,hyperglycaemia
422,C0003869,Arthritis infective
418,C1442903,bone spur
265,C0006118,brain neoplasm
541,C0152025,polyneuropathy
386,C0023467,Acute myeloblastic leukemia


In [7]:
se_name2code = {side_eff.iloc[i,1]:side_eff.iloc[i,0] for i in range(n_se)}
se2idx = {se:i for i,se in enumerate(side_eff['Side Effect Name'].values)}
se2idx

{'Neutropenia': 0,
 'Endometrial cancer': 1,
 'Gastric Cancer': 2,
 'spondylosis': 3,
 'hyperglycaemia': 4,
 'Arthritis infective': 5,
 'bone spur': 6,
 'brain neoplasm': 7,
 'polyneuropathy': 8,
 'Acute myeloblastic leukemia': 9,
 'phimosis': 10,
 'rabies': 11,
 'periostitis': 12,
 'Acne': 13,
 'coeliac disease': 14,
 'radiculopathy': 15,
 'mitral valve disease NOS': 16,
 'lung edema': 17,
 'schizoaffective disorder': 18,
 'osteomalacia': 19}

In [53]:
idx2se = {i:se for i,se in enumerate(side_eff['Side Effect Name'].values)}
idx2se

{0: 'Neutropenia',
 1: 'Endometrial cancer',
 2: 'Gastric Cancer',
 3: 'spondylosis',
 4: 'hyperglycaemia',
 5: 'Arthritis infective',
 6: 'bone spur',
 7: 'brain neoplasm',
 8: 'polyneuropathy',
 9: 'Acute myeloblastic leukemia',
 10: 'phimosis',
 11: 'rabies',
 12: 'periostitis',
 13: 'Acne',
 14: 'coeliac disease',
 15: 'radiculopathy',
 16: 'mitral valve disease NOS',
 17: 'lung edema',
 18: 'schizoaffective disorder',
 19: 'osteomalacia'}

In [38]:
emb = rnd.rand(n_drugs,dim) # Embedding matrix
R = np.abs(rnd.normal(loc=0,scale=0.25,size=(dim,dim))) # Global trainable parameter R
D_list = [] # Side effect specific trainable matrices D_r
for d in range(n_se):
    D_list.append(np.diag(rnd.normal(loc=0,scale=0.25,size=(dim))))

In [39]:
D_list

[array([[-0.09298245,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , -0.16613266,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        , -0.17572981, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ..., -0.13226851,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.14379033,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        , -0.12793918]]),
 array([[-0.15897117,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.38914395,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        , -0.10612638, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  

In [9]:
drugs = pd.read_csv('../../Thesis_datasets/Decagon/bio-decagon-combo.csv',usecols=[0])
drugs = drugs['STITCH 1'].unique()
drugs = rnd.choice(drugs,size=n_drugs)
drugs

array(['CID000001071', 'CID001349907', 'CID000004893', 'CID000004425',
       'CID000001986', 'CID000003639', 'CID000003826', 'CID000060754',
       'CID000074989', 'CID000072054', 'CID000060852', 'CID000002676',
       'CID000004730', 'CID000002435', 'CID000004675', 'CID000003454',
       'CID000004168', 'CID000002578', 'CID000005466', 'CID000004691'],
      dtype=object)

In [10]:
# Probably not neessary
drug2idx = {d: i for i, d in enumerate(drugs)}
drug2idx

{'CID000001071': 0,
 'CID001349907': 1,
 'CID000004893': 2,
 'CID000004425': 3,
 'CID000001986': 4,
 'CID000003639': 5,
 'CID000003826': 6,
 'CID000060754': 7,
 'CID000074989': 8,
 'CID000072054': 9,
 'CID000060852': 10,
 'CID000002676': 11,
 'CID000004730': 12,
 'CID000002435': 13,
 'CID000004675': 14,
 'CID000003454': 15,
 'CID000004168': 16,
 'CID000002578': 17,
 'CID000005466': 18,
 'CID000004691': 19}

In [11]:
DATA = pd.DataFrame(drug2idx.keys())
DATA.to_csv('d_codes.csv',index=False)

### Intermediate step:
To retrieve the drug names, the exported files with the STITCH codes is uploaded to the STITCH webpage and an HTML with the translation is downloaded and cleaned using bash. A textfile with the names is imported and cleaned in the following steps:

In [12]:
DF = pd.read_csv('d_names.txt',sep="'>",engine='python',header=None,index_col=False,\
                usecols=[2],names=['drug'])
d_names = DF['drug'].str.split('<',expand=True)[0].values
d_names

array(['2-fluoroadenosine', 'epinephrine solution', 'chlordiazepoxide',
       'desmopressin', 'phentermine', 'omeprazole', '5-fluorouracil',
       'AC1L1DSH', 'Dantrolene-13C3', 'morphine-D6', 'ranolazine',
       'iopromide', 'doxazosin', 'trimipramine', 'aripiprazole',
       'flurbiprofen', 'Eunades', "2',3'-dideoxyinosine", 'cromoglicate',
       'oxaprozin'], dtype=object)

In [13]:
name2idx = {drug:i for i,drug in enumerate(d_names)}
name2idx

{'2-fluoroadenosine': 0,
 'epinephrine solution': 1,
 'chlordiazepoxide': 2,
 'desmopressin': 3,
 'phentermine': 4,
 'omeprazole': 5,
 '5-fluorouracil': 6,
 'AC1L1DSH': 7,
 'Dantrolene-13C3': 8,
 'morphine-D6': 9,
 'ranolazine': 10,
 'iopromide': 11,
 'doxazosin': 12,
 'trimipramine': 13,
 'aripiprazole': 14,
 'flurbiprofen': 15,
 'Eunades': 16,
 "2',3'-dideoxyinosine": 17,
 'cromoglicate': 18,
 'oxaprozin': 19}

In [14]:
def sigmoid(x):
        return 1. / (1 + np.exp(-x))

In [54]:
def calculate_prob(drug1,drug2,n):
    """ Calculate sthe probability of the 'n' most likely side effects of the combination of
        'drug1' and 'drug2'
    """
    probs = np.zeros([n_se]) # Initialization of probability vector in zero
    idx1 = name2idx[drug1]
    idx2 = name2idx[drug2]
    # Embedding retrieving
    emb1 = emb[idx1,:]
    emb2 = emb[idx2,:].transpose()
    # Tensor factorization for each side effect
    for i in range(n_se):
        probs[i] = sigmoid(emb1@D_list[i]@R@D_list[i]@emb2)
    inds = probs.argsort()[-n:][::-1]
    drug_probs = {idx2se[i]:probs[i] for i in inds}
    return drug_probs

In [55]:
calculate_prob('phentermine','ranolazine',2)

{'bone spur': 0.6053613557907187, 'phimosis': 0.5756076493889599}