In [7]:
import pandas as pd
from tqdm import tqdm_notebook
import numpy as np
from tensorflow import keras

import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers

import sys
from numpy import dot
from numpy.linalg import norm
from tensorflow.keras.models import load_model

import pickle

In [2]:
tf.config.set_visible_devices([], 'GPU')

In [3]:
def get_atten_prop(test_name, prop, idx_a, idx_b):
    
    for i in range(len(prop)):
        test_name.loc[idx_a[i], 'A_atten'] = prop[i]
        test_name.loc[idx_b[i], 'B_atten'] = prop[i]
        
    return test_name

def search(drug1, drug2, test_name, df):
    
    selected = test_name[(test_name['DrugA']==drug1)&(test_name['DrugB']==drug2)]
    
    if len(selected)==0:
        print('Not in testset')
        return;
          
    if selected['A_atten'].values[0] is None:
        print('{} highlighted padding vector. Try again'.format(drug1))
        return;
    
    if selected['B_atten'].values[0] is None:
        print('{} highlighted padding vector. Try again'.format(drug2))
        return;
    
    # attention 된 속성명
    a_atten = selected['A_atten'].iloc[0]
    b_atten = selected['B_atten'].iloc[0]
    
    # 입력된 약물의 속성 데이터프레임
    a_atten_df = df[df['name']==drug1]
    b_atten_df = df[df['name']==drug2]
    
    # attention된 속성의 전체 text
    text_a = a_atten_df[a_atten].values[0]
    text_b = b_atten_df[b_atten].values[0]
    
    sen_list_a = text_a.split('.')[:-1]
    sen_list_b = text_b.split('.')[:-1]
    
    max_a = counter[drug1][a_atten]
    max_b = counter[drug2][b_atten]
    
    where_a = (max_a-selected['A'].values[0]+1)*-1
    where_b = (max_b-selected['B'].values[0]+1)*-1
    
    final_sen_a = sen_list_a[where_a]
    final_sen_b = sen_list_b[where_b]
    
    print('[{}] highlighted [{}] in [{}]'.format(drug1, final_sen_a, a_atten))
    print('\n')
    print('[{}] highlighted [{}] in [{}]'.format(drug2, final_sen_b, b_atten))
    
def get_model(model_path):
    model_best = load_model(model_path, compile=False)
    model_best.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.AUC(name='auc')])
    
    return model_best

# side = 'DrugA' / 'DrugB'
def get_prop_idx(checker, test_name, side):
    
    des_idx = []
    ind_idx = []
    phar_idx = []
    mech_idx = []
    tox_idx = []
    
    for i in tqdm_notebook(range(len(test_name))):
        drug = test_name[side].iloc[i]
        high = test_name.iloc[i][side[-1]] 

        checker = counter[drug]

        if high <= checker['description']:
            des_idx.append(test_name.iloc[i].name)
            continue;
        elif (high > checker['description']) & (high <= checker['indication']):
            ind_idx.append(test_name.iloc[i].name)
            continue;
        elif (high > checker['indication']) & (high <= checker['pharmacodynamics']):
            phar_idx.append(test_name.iloc[i].name)
            continue;
        elif (high > checker['pharmacodynamics']) & (high <= checker['mechanism_of_action']):
            mech_idx.append(test_name.iloc[i].name)
            continue;
        elif (high > checker['mechanism_of_action']) & (high <= checker['toxicity']):
            tox_idx.append(test_name.iloc[i].name)
            continue;   
            
    return des_idx, ind_idx, phar_idx, mech_idx, tox_idx

with open(',,/Data/class_dic.pickle', 'rb') as f:
    class_dic = pickle.load(f)
    
with open('../Data/feature counter.pickle', 'rb') as f:
    counter = pickle.load(f)
    
for k in list(counter.keys()):
    counter[k.lower()] = counter.pop(k)

model_path = '../Model/Model_save/model_final.h5'

test_set = np.load('../Generated_data/test_set.npz')

test_A = test_set['test_A']
test_B = test_set['test_B']

one_hot_test = pd.read_csv('../Generated_data/one_hot_test.csv', index_col=0)

temp = one_hot_test.reset_index(drop=True)

label = list(one_hot_test.columns)

y_test = []

for i in tqdm_notebook(range(len(temp))):
    y_test.append(int(label[list(temp.iloc[i]).index(1)]))
    
df = pd.read_csv('../Data/properties.csv', index_col=0)

model_best = get_model(model_path)

name = pd.read_csv('../Data/base_df.csv')[['DrugA', 'DrugB', 'class_label']]
test_name = name.iloc[one_hot_test.index]

pred_score = model_best.predict({'inputA':test_A, 'inputB':test_B})
pred_z = []

for i in tqdm_notebook(range(len(pred_score))):
    per_max = max(pred_score[i])
    pred_z.append(int(label[np.where(pred_score[i]==per_max)[0][0]]))
    
test_name['pred_label'] = pred_z
true_idx = test_name[test_name['class_label']==test_name['pred_label']].index

t = tf.keras.Model(inputs = model_best.inputs, outputs = [model_best.layers[18].output,model_best.layers[19].output])
attn = t.predict({'inputA':test_A, 'inputB':test_B})

df_attn_1 = pd.DataFrame(attn[0].squeeze())
df_attn_2 = pd.DataFrame(attn[1].squeeze())

df_A_T = df_attn_1.T
df_B_T = df_attn_2.T

max_pro_A = []
max_pro_B = []

# 0부터 시작하는 값이며, counter와 매핑해야 하므로 1을 더함
for col in df_A_T.columns:
    max_pro_A.append(df_A_T[col].idxmax()+1)
    
for col in df_B_T.columns:
    max_pro_B.append(df_B_T[col].idxmax()+1)
    
test_name['A'] = max_pro_A
test_name['B'] = max_pro_B

test_name = test_name.loc[true_idx]

des_idx, ind_idx, phar_idx, mech_idx, tox_idx = get_prop_idx(counter, test_name, 'DrugA')
des2_idx, ind2_idx, phar2_idx, mech2_idx, tox2_idx = get_prop_idx(counter, test_name, 'DrugB')

test_name['A_atten'] = None
test_name['B_atten'] = None

prop = ['description', 'indication', 'pharmacodynamics', 'mechanism_of_action', 'toxicity']
idx_a = [des_idx, ind_idx, phar_idx, mech_idx, tox_idx]
idx_b = [des2_idx, ind2_idx, phar2_idx, mech2_idx, tox2_idx]

new_test_name = get_atten_prop(test_name, prop, idx_a, idx_b)

In [None]:
######################################
#drug pairs must be in new_test_name! 
######################################
search('salmeterol', 'flecainide', new_test_name, df)