In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
# The GPU id to use, usually either "0" or "1";
os.environ["CUDA_VISIBLE_DEVICES"] = "0";

In [2]:
from keras.layers import Layer
from keras.layers import Conv2D
from keras.layers import Concatenate, concatenate, Reshape
from keras.layers import Conv1D, GlobalMaxPooling1D, MaxPooling1D



from keras import initializers




def _conv_layer1d(ip, t_n, f_n, filters, kernel_size, strides=1, padding='same', name=None):
    

    conv1 = Conv1D(filters, kernel_size, strides=strides, padding=padding,
                  use_bias=True, kernel_initializer='glorot_normal', name=name)(ip)
    
    reshape = Reshape((t_n, 1, filters))(conv1)
    
    return reshape



def _conv_layer1r(ip, t_n, f_n, filters, kernel_size, strides=1, padding='same', name=None):
    
    reshape1 = Reshape((t_n, f_n))(ip)
    

    conv1 = Conv1D(filters, kernel_size, strides=strides, padding=padding,
                  use_bias=True, kernel_initializer='glorot_normal', name=name)(reshape1)
    
    reshape2 = Reshape((t_n, 1, filters))(conv1)
    
    return reshape2


def _normalize_depth_vars(depth_k, depth_v, filters):
    """
    Accepts depth_k and depth_v as either floats or integers
    and normalizes them to integers.
    Args:
        depth_k: float or int.
        depth_v: float or int.
        filters: number of output filters.
    Returns:
        depth_k, depth_v as integers.
    """

    if type(depth_k) == float:
        depth_k = int(filters * depth_k)
    else:
        depth_k = int(depth_k)

    if type(depth_v) == float:
        depth_v = int(filters * depth_v)
    else:
        depth_v = int(depth_v)

    return depth_k, depth_v

Using TensorFlow backend.


In [3]:
def cindex_score(y_true, y_pred):

    g = tf.subtract(tf.expand_dims(y_pred, -1), y_pred)
    g = tf.cast(g == 0.0, tf.float32) * 0.5 + tf.cast(g > 0.0, tf.float32)

    f = tf.subtract(tf.expand_dims(y_true, -1), y_true) > 0.0
    f = tf.compat.v1.matrix_band_part(tf.cast(f, tf.float32), -1, 0)

    g = tf.reduce_sum(tf.multiply(g, f))
    f = tf.reduce_sum(f)

    return tf.where(tf.equal(g, 0), 0.0, g/f) #select

In [4]:
def get_cindex(Y, P):
#     sys.exit()
    summ = 0
    pair = 0
    
    for i in range(1, len(Y)):
        for j in range(0, i):
            if i is not j:
                if(Y[i] > Y[j]):
                    pair +=1
                    summ +=  1* (P[i] > P[j]) + 0.5 * (P[i] == P[j])
        
            
    if pair != 0:
        return summ/pair
    else:
        return 0

In [5]:
import keras
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K
# from conv import *
from keras.layers import Reshape
from keras import initializers

## ######################## ##
#
#  AttentionAugmentation2D Class
#
## ######################## ## 

class AttentionAugmentation2D(Layer):

    def __init__(self, depth_k, depth_v, num_heads, relative=True, **kwargs):
        """
        Applies attention augmentation on a convolutional layer
        output.
        Args:
            depth_k: float or int. Number of filters for k.
            Computes the number of filters for `v`.
            If passed as float, computed as `filters * depth_k`.
        depth_v: float or int. Number of filters for v.
            Computes the number of filters for `k`.
            If passed as float, computed as `filters * depth_v`.
        num_heads: int. Number of attention heads.
            Must be set such that `depth_k // num_heads` is > 0.
        relative: bool, whether to use relative encodings.
        Raises:
            ValueError: if depth_v or depth_k is not divisible by
                num_heads.
        Returns:
            Output tensor of shape
            -   [Batch, Height, Width, Depth_V] if
                channels_last data format.
            -   [Batch, Depth_V, Height, Width] if
                channels_first data format.
        """
        super(AttentionAugmentation2D, self).__init__(**kwargs)

        if depth_k % num_heads != 0:
            raise ValueError('`depth_k` (%d) is not divisible by `num_heads` (%d)' % (
                depth_k, num_heads))

        if depth_v % num_heads != 0:
            raise ValueError('`depth_v` (%d) is not divisible by `num_heads` (%d)' % (
                depth_v, num_heads))

        if depth_k // num_heads < 1.:
            raise ValueError('depth_k / num_heads cannot be less than 1 ! '
                             'Given depth_k = %d, num_heads = %d' % (
                             depth_k, num_heads))

        if depth_v // num_heads < 1.:
            raise ValueError('depth_v / num_heads cannot be less than 1 ! '
                             'Given depth_v = %d, num_heads = %d' % (
                                 depth_v, num_heads))

        self.depth_k = depth_k
        self.depth_v = depth_v
        self.num_heads = num_heads
        self.relative = relative

        self.axis = 1 if K.image_data_format() == 'channels_first' else -1

    def build(self, input_shape):
        self._shape = input_shape

        # normalize the format of depth_v and depth_k
        self.depth_k, self.depth_v = _normalize_depth_vars(self.depth_k, self.depth_v,
                                                           input_shape)

        if self.axis == 1:
            _, channels, height, width = input_shape
        else:
            _, height, width, channels = input_shape

        if self.relative:
            dk_per_head = self.depth_k // self.num_heads
            
            # print(dk_per_head)

            if dk_per_head == 0:
                print('dk per head', dk_per_head)

            self.key_relative_w = self.add_weight('key_rel_w',
                                                  shape=tf.TensorShape([2 * width - 1, dk_per_head]),
                                                  initializer=initializers.RandomNormal(stddev=dk_per_head ** -0.5))
            # 2 * width - 1

            self.key_relative_h = self.add_weight('key_rel_h',
                                                  shape=tf.TensorShape([2 * height - 1, dk_per_head]),
                                                  initializer=initializers.RandomNormal(stddev=dk_per_head ** -0.5))
            # 2 * height - 1

        else:
            self.key_relative_w = None
            self.key_relative_h = None

    def call(self, inputs, **kwargs):
        if self.axis == 1:
            # If channels first, force it to be channels last for these ops
            inputs = K.permute_dimensions(inputs, [0, 2, 3, 1])

        q, k, v = tf.split(inputs, [self.depth_k, self.depth_k, self.depth_v], axis=-1)

        q = self.split_heads_2d(q)
        k = self.split_heads_2d(k)
        v = self.split_heads_2d(v)

        # scale query
        depth_k_heads = self.depth_k / self.num_heads
        q *= (depth_k_heads ** -0.5)

        # [Batch, num_heads, height * width, depth_k or depth_v] if axis == -1
        qk_shape = [self._batch, self.num_heads, self._height * self._width, self.depth_k // self.num_heads]
        v_shape = [self._batch, self.num_heads, self._height * self._width, self.depth_v // self.num_heads]
        flat_q = K.reshape(q, K.stack(qk_shape))
        flat_k = K.reshape(k, K.stack(qk_shape))
        flat_v = K.reshape(v, K.stack(v_shape))

        # [Batch, num_heads, HW, HW]
        logits = tf.matmul(flat_q, flat_k, transpose_b=True)

        # Apply relative encodings
        if self.relative:
            h_rel_logits, w_rel_logits = self.relative_logits(q)
            logits += h_rel_logits
            logits += w_rel_logits

        weights = K.softmax(logits, axis=-1)
        attn_out = tf.matmul(weights, flat_v)

        attn_out_shape = [self._batch, self.num_heads, self._height, self._width, self.depth_v // self.num_heads]
        attn_out_shape = K.stack(attn_out_shape)
        attn_out = K.reshape(attn_out, attn_out_shape)
        attn_out = self.combine_heads_2d(attn_out)
        # [batch, height, width, depth_v]

        if self.axis == 1:
            # return to [batch, depth_v, height, width] for channels first
            attn_out = K.permute_dimensions(attn_out, [0, 3, 1, 2])

        return attn_out

    def compute_output_shape(self, input_shape):
        output_shape = list(input_shape)
        output_shape[self.axis] = self.depth_v
        return tuple(output_shape)

    def split_heads_2d(self, ip):
        tensor_shape = K.shape(ip)

        # batch, height, width, channels for axis = -1
        tensor_shape = [tensor_shape[i] for i in range(len(self._shape))]

        batch = tensor_shape[0]
        height = tensor_shape[1]
        width = tensor_shape[2]
        channels = tensor_shape[3]

        # Save the spatial tensor dimensions
        self._batch = batch
        self._height = height
        self._width = width

        ret_shape = K.stack([batch, height, width,  self.num_heads, channels // self.num_heads])
        split = K.reshape(ip, ret_shape)
        transpose_axes = (0, 3, 1, 2, 4)
        split = K.permute_dimensions(split, transpose_axes)

        return split

    def relative_logits(self, q):
        shape = K.shape(q)
        # [batch, num_heads, H, W, depth_v]
        shape = [shape[i] for i in range(5)]

        height = shape[2]
        width = shape[3]

        rel_logits_w = self.relative_logits_1d(q, self.key_relative_w, height, width,
                                               transpose_mask=[0, 1, 2, 4, 3, 5])

        rel_logits_h = self.relative_logits_1d(
            K.permute_dimensions(q, [0, 1, 3, 2, 4]),
            self.key_relative_h, width, height,
            transpose_mask=[0, 1, 4, 2, 5, 3])

        return rel_logits_h, rel_logits_w

    def relative_logits_1d(self, q, rel_k, H, W, transpose_mask):
        rel_logits = tf.einsum('bhxyd,md->bhxym', q, rel_k)
        rel_logits = K.reshape(rel_logits, [-1, self.num_heads * H, W, 2 * W - 1])
        rel_logits = self.rel_to_abs(rel_logits)
        rel_logits = K.reshape(rel_logits, [-1, self.num_heads, H, W, W])
        rel_logits = K.expand_dims(rel_logits, axis=3)
        rel_logits = K.tile(rel_logits, [1, 1, 1, H, 1, 1])
        rel_logits = K.permute_dimensions(rel_logits, transpose_mask)
        rel_logits = K.reshape(rel_logits, [-1, self.num_heads, H * W, H * W])
        return rel_logits

    def rel_to_abs(self, x):
        shape = K.shape(x)
        shape = [shape[i] for i in range(3)]
        B, Nh, L, = shape
        col_pad = K.zeros(K.stack([B, Nh, L, 1]))
        x = K.concatenate([x, col_pad], axis=3)
        flat_x = K.reshape(x, [B, Nh, L * 2 * L])
        flat_pad = K.zeros(K.stack([B, Nh, L - 1]))
        flat_x_padded = K.concatenate([flat_x, flat_pad], axis=2)
        final_x = K.reshape(flat_x_padded, [B, Nh, L + 1, 2 * L - 1])
        final_x = final_x[:, :, :L, L - 1:]
        return final_x

    def combine_heads_2d(self, inputs):
        # [batch, num_heads, height, width, depth_v // num_heads]
        transposed = K.permute_dimensions(inputs, [0, 2, 3, 1, 4])
        # [batch, height, width, num_heads, depth_v // num_heads]
        shape = K.shape(transposed)
        shape = [shape[i] for i in range(5)]

        a, b = shape[-2:]
        ret_shape = K.stack(shape[:-2] + [a * b])
        # [batch, height, width, depth_v]
        return K.reshape(transposed, ret_shape)

    def get_config(self):
        config = {
            'depth_k': self.depth_k,
            'depth_v': self.depth_v,
            'num_heads': self.num_heads,
            'relative': self.relative,
        }
        base_config = super(AttentionAugmentation2D, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
def augmented_conv1d(ip, shape, filters, kernel_size=3, strides=1, padding = 'same',
                     depth_k=0.2, depth_v=0.2, num_heads=2, relative_encodings=True):
    """
    Builds an Attention Augmented Convolution block.
    Args:
        ip: keras tensor.
        filters: number of output filters.
        kernel_size: convolution kernel size.
        strides: strides of the convolution.
        depth_k: float or int. Number of filters for k.
            Computes the number of filters for `v`.
            If passed as float, computed as `filters * depth_k`.
        depth_v: float or int. Number of filters for v.
            Computes the number of filters for `k`.
            If passed as float, computed as `filters * depth_v`.
        num_heads: int. Number of attention heads.
            Must be set such that `depth_k // num_heads` is > 0.
        relative_encodings: bool. Whether to use relative
            encodings or not.
    Returns:
        a keras tensor.
    """

    
    if type(kernel_size) == int:
        pass
    else:
        kernel_size = kernel_size[0]
        
    if type(strides) == int:
        pass
    else:
        strides = strides[0]
        
    t_n = shape[0]
    f_n = shape[1]
        
    # input_shape = K.int_shape(ip)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    depth_k, depth_v = _normalize_depth_vars(depth_k, depth_v, filters)
    
    # print(kernel_size)
    # print(strides)

    conv_out = _conv_layer1d(ip, t_n, f_n, filters - depth_v, kernel_size, strides, padding = 'same')

    # Augmented Attention Block
    qkv_conv = _conv_layer1d(ip, t_n, f_n,  2 * depth_k + depth_v, 1, strides, padding = 'same')
    attn_out = AttentionAugmentation2D(depth_k, depth_v, num_heads, relative_encodings)(qkv_conv)
    attn_out = _conv_layer1r(attn_out, t_n, depth_v,  depth_v, 1, strides, padding = 'same')
    
    output = keras.layers.concatenate([conv_out, attn_out], axis=-1)
#     output = Concatenate(axis=channel_axis)([conv_out, attn_out])
   
    reshape = Reshape((t_n, filters))(output)

    return reshape

In [6]:
from numpy import loadtxt
import keras
from keras.models import load_model
 
# load model
#model = load_model('data/model.h5')
dependencies = {
    'cindex_score': cindex_score,
    'AttentionAugmentation2D': AttentionAugmentation2D
}
model = load_model('data/model.h5', custom_objects=dependencies)

#from keras.utils import CustomObjectScope
from tensorflow.keras.utils import CustomObjectScope


# model.summary()

2022-02-24 17:50:29.464463: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-02-24 17:50:29.660691: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: 
pciBusID: 0000:04:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1
coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s
2022-02-24 17:50:29.660972: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-02-24 17:50:29.663140: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-02-24 17:50:29.665298: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2022-02-24 17:50:29.665608: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10


In [7]:
def prepare_interaction_pairs(XD, XT,  Y, rows, cols):
    drugs = []
    targets = []
    targetscls = []
    affinity=[] 

    drug = XD[0]
    drugs.append(drug)

    target=XT[0]
    targets.append(target)

    drug_data = np.stack(drugs)
    target_data = np.stack(targets)
    return drug_data,target_data

In [8]:
#print(predicted_labels)
#def general_nfold_cv(XD, XT,  Y, label_row_inds, label_col_inds, prfmeasure, runmethod, FLAGS, labeled_sets, val_sets): ## BURAYA DA FLAGS LAZIM????

In [9]:
import argparse
import os
parser = argparse.ArgumentParser()
# for model
parser.add_argument(
  '--seq_window_lengths',
  type=int,
  nargs='+',
  help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)'
)
parser.add_argument(
  '--smi_window_lengths',
  type=int,
  nargs='+',
  help='Space seperated list of motif filter lengths. (ex, --window_lengths 4 8 12)'
)
parser.add_argument(
  '--num_windows',
  type=int,
  nargs='+',
  help='Space seperated list of the number of motif filters corresponding to length list. (ex, --num_windows 100 200 100)'
)
parser.add_argument(
  '--num_hidden',
  type=int,
  default=0,
  help='Number of neurons in hidden layer.'
)
parser.add_argument(
  '--num_classes',
  type=int,
  default=0,
  help='Number of classes (families).'
)
parser.add_argument(
  '--max_seq_len',
  type=int,
  default=0,
  help='Length of input sequences.'
)
parser.add_argument(
  '--max_smi_len',
  type=int,
  default=0,
  help='Length of input sequences.'
)
# for learning
parser.add_argument(
  '--learning_rate',
  type=float,
  default=0.001,
  help='Initial learning rate.'
)
parser.add_argument(
  '--num_epoch',
  type=int,
  default=100,
  help='Number of epochs to train.'
)
parser.add_argument(
  '--batch_size',
  type=int,
  default=256,
  help='Batch size. Must divide evenly into the dataset sizes.'
)
parser.add_argument(
  '--dataset_path',
  type=str,
  default='data/kiba/',
  help='Directory for input data.'
)
parser.add_argument(
  '--problem_type',
  type=int,
  default=1,
  help='Type of the prediction problem (1-4)'
)
parser.add_argument(
  '--binary_th',
  type=float,
  default=0.0,
  help='Threshold to split data into binary classes'
)
parser.add_argument(
  '--is_log',
  type=int,
  default=0,
  help='use log transformation for Y'
)
parser.add_argument(
  '--checkpoint_path',
  type=str,
  default='',
  help='Path to write checkpoint file.'
)
parser.add_argument(
  '--log_dir',
  type=str,
  default='/tmp',
  help='Directory for log data.'
)
FLAGS, unparsed = parser.parse_known_args()

In [10]:
class DataSet(object):
  def __init__(self, fpath, setting_no, seqlen, smilen, need_shuffle = False):
    self.SEQLEN = seqlen
    self.SMILEN = smilen
    #self.NCLASSES = n_classes
    self.charseqset = CHARPROTSET
    self.charseqset_size = CHARPROTLEN

    self.charsmiset = CHARISOSMISET ###HERE CAN BE EDITED
    self.charsmiset_size = CHARISOSMILEN
    self.PROBLEMSET = setting_no

    # read raw file
    # self._raw = self.read_sets( FLAGS)

    # iteration flags
    # self._num_data = len(self._raw)


  def read_sets(self, FLAGS): ### fpath should be the dataset folder /kiba/ or /davis/
    fpath = FLAGS.dataset_path
    setting_no = FLAGS.problem_type
    print("Reading %s start" % fpath)

    test_fold = json.load(open(fpath + "folds/test_fold_setting" + str(setting_no)+".txt"))
    train_folds = json.load(open(fpath + "folds/train_fold_setting" + str(setting_no)+".txt"))
    
    return test_fold, train_folds

  def parse_data(self, FLAGS,  with_label=True, smileStr, proteinSeq): 
    fpath = FLAGS.dataset_path	
    print("Read %s start" % fpath)

    ligands = json.load(open(fpath+"ligands_can.txt"), object_pairs_hook=OrderedDict)
    proteins = json.load(open(fpath+"proteins.txt"), object_pairs_hook=OrderedDict)

    Y = pickle.load(open(fpath + "Y","rb"), encoding='latin1') ### TODO: read from raw
    if FLAGS.is_log:
        Y = -(np.log10(Y/(math.pow(10,9))))

    XD = []
    XT = []

    #smileStr = 'CC1CC=CC(=O)CCCCCC2=CC(=CC(=C2C(=O)O1)O)OC';
    #proteinSeq = 'MTVKTEAAKGTLTYSRMRGMVAILIAFMKQRRMGLNDFIQKIANNSYACKHPEVQSILKISQPQEPELMNANPSPPPSPSQQINLGPSSNPHAKPSDFHFLKVIGKGSFGKVLLARHKAEEVFYAVKVLQKKAILKKKEEKHIMSERNVLLKNVKHPFLVGLHFSFQTADKLYFVLDYINGGELFYHLQRERCFLEPRARFYAAEIASALGYLHSLNIVYRDLKPENILLDSQGHIVLTDFGLCKENIEHNSTTSTFCGTPEYLAPEVLHKQPYDRTVDWWCLGAVLYEMLYGLPPFYSRNTAEMYDNILNKPLQLKPNITNSARHLLEGLLQKDRTKRLGAKDDFMEIKSHVFFSLINWDDLINKKITPPFNPNVSGPNDLRHFDPEFTEEPVPNSIGKSPDSVLVTASVKEAAEAFLGFSYAPPTDSFL';
#     print(type(ligands.keys()))
#     sys.exit()
    if with_label:
#     for d in ligands.keys():
            XD.append(label_smiles(smileStr, self.SMILEN, self.charsmiset))
            XT.append(label_sequence(proteinSeq, self.SEQLEN, self.charseqset))
    else:
            XD.append(one_hot_smiles(smileStr, self.SMILEN, self.charsmiset))
            XT.append(one_hot_sequence(proteinSeq, self.SEQLEN, self.charseqset))
  
    return XD, XT, Y



In [11]:
CHARPROTSET = { "A": 1, "C": 2, "B": 3, "E": 4, "D": 5, "G": 6, 
				"F": 7, "I": 8, "H": 9, "K": 10, "M": 11, "L": 12, 
				"O": 13, "N": 14, "Q": 15, "P": 16, "S": 17, "R": 18, 
				"U": 19, "T": 20, "W": 21, 
				"V": 22, "Y": 23, "X": 24, 
				"Z": 25 }

CHARPROTLEN = 25

CHARCANSMISET = { "#": 1, "%": 2, ")": 3, "(": 4, "+": 5, "-": 6, 
			 ".": 7, "1": 8, "0": 9, "3": 10, "2": 11, "5": 12, 
			 "4": 13, "7": 14, "6": 15, "9": 16, "8": 17, "=": 18, 
			 "A": 19, "C": 20, "B": 21, "E": 22, "D": 23, "G": 24,
			 "F": 25, "I": 26, "H": 27, "K": 28, "M": 29, "L": 30, 
			 "O": 31, "N": 32, "P": 33, "S": 34, "R": 35, "U": 36, 
			 "T": 37, "W": 38, "V": 39, "Y": 40, "[": 41, "Z": 42, 
			 "]": 43, "_": 44, "a": 45, "c": 46, "b": 47, "e": 48, 
			 "d": 49, "g": 50, "f": 51, "i": 52, "h": 53, "m": 54, 
			 "l": 55, "o": 56, "n": 57, "s": 58, "r": 59, "u": 60,
			 "t": 61, "y": 62}

CHARCANSMILEN = 62

CHARISOSMISET = {"#": 29, "%": 30, ")": 31, "(": 1, "+": 32, "-": 33, "/": 34, ".": 2, 
				"1": 35, "0": 3, "3": 36, "2": 4, "5": 37, "4": 5, "7": 38, "6": 6, 
				"9": 39, "8": 7, "=": 40, "A": 41, "@": 8, "C": 42, "B": 9, "E": 43, 
				"D": 10, "G": 44, "F": 11, "I": 45, "H": 12, "K": 46, "M": 47, "L": 13, 
				"O": 48, "N": 14, "P": 15, "S": 49, "R": 16, "U": 50, "T": 17, "W": 51, 
				"V": 18, "Y": 52, "[": 53, "Z": 19, "]": 54, "\\": 20, "a": 55, "c": 56, 
				"b": 21, "e": 57, "d": 22, "g": 58, "f": 23, "i": 59, "h": 24, "m": 60, 
				"l": 25, "o": 61, "n": 26, "s": 62, "r": 27, "u": 63, "t": 28, "y": 64}

CHARISOSMILEN = 64
def one_hot_smiles(line, MAX_SMI_LEN, smi_ch_ind):
	X = np.zeros((MAX_SMI_LEN, len(smi_ch_ind))) #+1

	for i, ch in enumerate(line[:MAX_SMI_LEN]):
		X[i, (smi_ch_ind[ch]-1)] = 1 

	return X #.tolist()

def one_hot_sequence(line, MAX_SEQ_LEN, smi_ch_ind):
	X = np.zeros((MAX_SEQ_LEN, len(smi_ch_ind))) 
	for i, ch in enumerate(line[:MAX_SEQ_LEN]):
		X[i, (smi_ch_ind[ch])-1] = 1

	return X #.tolist()


def label_smiles(line, MAX_SMI_LEN, smi_ch_ind):
	X = np.zeros(MAX_SMI_LEN)
	for i, ch in enumerate(line[:MAX_SMI_LEN]): #	x, smi_ch_ind, y
		X[i] = smi_ch_ind[ch]

	return X #.tolist()

def label_sequence(line, MAX_SEQ_LEN, smi_ch_ind):
	X = np.zeros(MAX_SEQ_LEN)

	for i, ch in enumerate(line[:MAX_SEQ_LEN]):
		X[i] = smi_ch_ind[ch]

	return X #.tolist()


In [12]:
FLAGS.num_windows = [32]
FLAGS.smi_window_lengths = [4, 8]
FLAGS.seq_window_lengths = [8, 12]
FLAGS.num_epoch = 200
FLAGS.batch_size = 64
FLAGS.max_seq_len  = 1000
FLAGS.max_smi_len = 100
FLAGS.problem_type = 2
FLAGS.log_dir  = "logs/"
dataset = DataSet( fpath = FLAGS.dataset_path, ### BUNU ARGS DA GUNCELLE
                  setting_no = FLAGS.problem_type, ##BUNU ARGS A EKLE
                  seqlen = FLAGS.max_seq_len,
                  smilen = FLAGS.max_smi_len,
                  need_shuffle = False )

In [13]:
import sys, re, math, time
import numpy as np
import matplotlib.pyplot as plt
import json
import pickle
import collections
from collections import OrderedDict
from matplotlib.pyplot import cm

XD, XT, Y = dataset.parse_data(FLAGS)
test_set, outer_train_sets = dataset.read_sets(FLAGS) 
print('-----Setting 2 test-------')
x = outer_train_sets
train_sets = []
val_sets = []
r1 = int(0.9 * len(x))
x1 = x[0:r1]
x3 = x[r1:]
train_sets.append(x1)
val_sets.append(x3)

Read data/kiba/ start
Reading data/kiba/ start
-----Setting 2 test-------


In [14]:
foldind = 0 # folding is just index of the fold
valinds = val_sets[foldind]
label_row_inds, label_col_inds = np.where(np.isnan(Y)==False)  #basically finds the point address of affinity [x,y]
terows = label_row_inds[valinds]
tecols = label_col_inds[valinds]
val_drugs, val_prots = prepare_interaction_pairs(XD, XT,  Y, terows, tecols)
# val_drugs, val_prots, val_Y = prepare_interaction_pairs(XD, XT,  Y, terows, tecols)
print(val_drugs)


[[42. 42. 35. 42. 42. 40. 42. 42.  1. 40. 48. 31. 42. 42. 42. 42. 42. 42.
   4. 40. 42. 42.  1. 40. 42. 42.  1. 40. 42.  4. 42.  1. 40. 48. 31. 48.
  35. 31. 48. 31. 48. 42.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]


In [15]:
#XD and XT is empty. findout please
print(val_prots)

[[11.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
   1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
   1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. 11. 20. 22. 10. 20.
   4.  1.  1. 10.  6. 20. 12. 20. 23. 17. 18. 11. 18.  6. 11. 22.  1.  8.
  12.  8.  1.  7. 11. 10. 15. 18. 18. 11.  6. 12. 14.  5.  7.  8. 15. 10.
   8.  1. 14. 14. 17. 23.  1.  2. 10.  9. 16.  4. 22. 15. 17.  8. 12. 10.
   8. 17. 15. 16. 15.  4. 16.  4. 12. 11. 14.  1. 14. 16. 17. 16. 16. 16.
  17. 16. 17. 15. 15.  8. 14. 12.  6. 16. 17. 17. 14. 16.  9.  1. 10. 16.
  17.  5.  7.  9.  7. 12. 10. 22.  8.  6. 10.  6. 17.  7.  6. 10. 22. 12.
  12.  1. 18.  9. 10.  1.  4.  4. 22.  7. 23.  1. 22. 10. 22. 12. 15. 10.
  10.  1.  8. 12. 10. 10. 10.  4.  4. 10.  9.  8. 11. 17.  4. 18. 14. 22.
  12. 12. 10. 14. 22. 10.  9. 16.  7. 12. 22.  6. 12.  9.  7. 17.  7. 15.
  20.  1.  5. 10. 12. 23.  7. 22. 12.  5. 23.  8. 14.  6.  6.  4. 12.  7.
  23.  9. 12. 15. 18.  4. 18.  2.  7. 

In [16]:
print(model.summary())

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 1000)         0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 100, 128)     8320        input_3[0][0]                    
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 1000, 128)    3328        input_4[0][0]                    
____________________________________________________________________________________________

In [17]:
predicted_labels = model.predict([np.array(val_drugs), np.array(val_prots)])

2022-02-24 17:50:36.306737: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-02-24 17:50:36.463345: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2022-02-24 17:50:37.130832: W tensorflow/stream_executor/gpu/redzone_allocator.cc:312] Not found: ./bin/ptxas not found
Relying on driver to perform ptx compilation. This message will be only logged once.


In [18]:
print(predicted_labels)

[[10.307234]]


print(Y)

In [None]:
# print(val_Y)

In [None]:
# loss, rperf2 = model.evaluate(([np.array(val_drugs),np.array(val_prots) ]), np.array(val_Y), verbose=0)

In [None]:
# print(loss)

In [None]:
# print(rperf2)

In [None]:
# print(predicted_labels.tolist())
#test