In [2]:
from keras.layers import Input, Dense, Activation, Lambda, Dropout, Concatenate
from keras.models import Model, Sequential
from keras.callbacks import TensorBoard
from keras import optimizers
from keras import backend as K

from keras.regularizers import l2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### load a sample data

In [3]:
import numpy as np

In [4]:
with np.load('kdd99_train_test.npz') as data:
    #x_train = data['x_train']     ### TODO: splits are not taken as described in paper
    x_test = data['x_test']


In [5]:
x = x_test[0]
y = x_test[-15]

In [6]:
x

array([0.00000000e+00, 1.48837072e-06, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
      

In [39]:
y

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.99412916,
       0.00195695, 0.1       , 1.        , 0.9       , 0.        ,
       0.        , 1.        , 0.        , 1.        , 0.00392157,
       0.        , 1.        , 0.        , 0.        , 0.09      ,
       1.        , 0.91      , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

### apply functions in np

In [37]:
### cosine similarity
def cos_sim(x):
    a,b = x
    #return K.dot(a, b) / (K.l2_normalize(a) * K.l2_normalize(b))
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

    
### relative euclidian dist
def relative_euc_dist(x):
    a,b = x
    #return K.l2_normalize(a - b) / K.l2_normalize(a)
    return np.linalg.norm(a - b) / np.linalg.norm(a) 

### np.sum(np.sqrt((yTrue - yPred)**2) / np.linalg.norm(yTrue))
#np.linalg.norm(yTrue - yPred) / np.linalg.norm(yTrue) 

In [39]:
print("cos_sim[x,x]",cos_sim([x,x]))
print("cos_sim[x,y]",cos_sim([x,y]))
print("cos_sim[y,x]",cos_sim([y,x]))
print("cos_sim[y,y]",cos_sim([y,y]))

cos_sim[x,x] 1.0
cos_sim[x,y] 0.26507292893564366
cos_sim[y,x] 0.26507292893564366
cos_sim[y,y] 1.0


In [38]:
print("relative_euc_dist[x,x]",relative_euc_dist([x,x]))
print("relative_euc_dist[x,y]",relative_euc_dist([x,y]))
print("relative_euc_dist[y,x]",relative_euc_dist([y,x]))
print("relative_euc_dist[y,y]",relative_euc_dist([y,y]))

relative_euc_dist[x,x] 0.0
relative_euc_dist[x,y] 1.230095954998544
relative_euc_dist[y,x] 1.1955691130461472
relative_euc_dist[y,y] 0.0


### try in keras

In [11]:
### define tensors and stuff

In [7]:
tensor_x = K.constant(value=x)
tensor_y = K.constant(value=y)

In [6]:
K.eval(tensor_x)

array([0.0000000e+00, 1.4883707e-06, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
       1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       1.0000000e+00, 1.0000000e+00, 1.0000000e+00, 0.0000000e+00,
       1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 1.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e

In [44]:
K.eval(tensor_y)

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.9941292 ,
       0.00195695, 0.1       , 1.        , 0.9       , 0.        ,
       0.        , 1.        , 0.        , 1.        , 0.00392157,
       0.        , 1.        , 0.        , 0.        , 0.09      ,
       1.        , 0.91      , 0.        , 0.        , 1.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

##### method 1

In [45]:
### cosine similarity
def cos_sim(a,b):
    #a,b = x
    return K.dot(a, K.transpose(b)) / (K.l2_normalize(a) * K.l2_normalize(b))
    #return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def funct_shapes(shapes):
    shape1, shape2 = shapes
    return(shape1[0],1)
    
    
### relative euclidian dist
def relative_euc_dist(a,b):
    #a,b = x
    return K.l2_normalize(a - b) / K.l2_normalize(a)
    #return np.linalg.norm(a - b) / np.linalg.norm(a) 
    
### https://github.com/keras-team/keras/issues/3031#issuecomment-231336142
def cos_distance(y_true, y_pred):
    def l2_normalize(x, axis):
        norm = K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True))
        return K.sign(x) * K.maximum(K.abs(x), K.epsilon()) / K.maximum(norm, K.epsilon())
    y_true = l2_normalize(y_true, axis=-1)
    y_pred = l2_normalize(y_pred, axis=-1)
    return K.mean(y_true * y_pred, axis=-1)

In [53]:
cos = cos_sim(tensor_x,tensor_x)

ValueError: Shape must be rank 2 but is rank 1 for 'MatMul_1' (op: 'MatMul') with input shapes: [120], [120].

In [47]:
K.eval(cos)    #### wrong answer!!

0.008333333333333333

In [130]:
tensor_x

<tf.Tensor 'Const_27:0' shape=(120,) dtype=float32>

In [131]:
K.transpose(tensor_y)

<tf.Tensor 'transpose_8:0' shape=(120,) dtype=float32>

In [71]:
cos = cos_sim(tensor_x,tensor_x)

ValueError: Shape must be rank 2 but is rank 1 for 'MatMul_1' (op: 'MatMul') with input shapes: [120], [120].

##### method 2


In [12]:
from keras.layers import merge

In [9]:
model = Sequential()

In [14]:
cos_dist = merge([tensor_x, tensor_x], mode='cos', dot_axes=1)

  """Entry point for launching an IPython kernel.
  name=name)


ValueError: Shape must be rank 2 but is rank 1 for 'merge_3/MatMul' (op: 'MatMul') with input shapes: [120], [120].

In [1]:
K.eval(Merge(layers=[tensor_x, tensor_x], mode='cos', concat_axis=-1))

NameError: name 'K' is not defined

###### method 3
###### https://datascience.stackexchange.com/questions/26784/keras-computing-cosine-similarity-matrix-of-two-3d-tensors/26831#26831

- results are wrong

In [25]:
# convenience l2_norm function
def l2_norm(x, axis=None):
    """
    takes an input tensor and returns the l2 norm along specified axis
    """

    square_sum = K.sum(K.square(x), axis=axis, keepdims=True)
    norm = K.sqrt(K.maximum(square_sum, K.epsilon()))

    return norm


### TODO: bizim dataya uygun hale getir
def pairwise_cosine_sim(A_B):
    """
    A [batch x n x d] tensor of n rows with d dimensions
    B [batch x m x d] tensor of n rows with d dimensions

    returns:
    D [batch x n x m] tensor of cosine similarity scores between each point i<n, j<m
    """

    A_tensor, B_tensor = A_B
    A_mag = l2_norm(A_tensor, axis=-1)
    B_mag = l2_norm(B_tensor, axis=-1)
    
    A_tensor = K.reshape(tensor_x, (-1,120))
    B_tensor = K.reshape(tensor_x, (120, -1))
    
    num = K.dot(A_tensor, B_tensor)  ###K.dot(A, K.permute_dimensions) ### TODO!! #K.batch_dot(A_tensor, B_tensor)
    den = (A_mag * B_mag)
    dist_mat =  num / den

    return dist_mat




In [18]:
#tensor_x = K.constant(value=x, dtype='float32')
#tensor_y = K.constant(value=y, dtype='float32')

input_x = Input(tensor=tensor_x)
input_y = Input(tensor=tensor_y)

In [21]:
dist_out = Lambda(pairwise_cosine_sim)([tensor_x, tensor_x])

In [22]:
K.eval(dist_out)

array([[0.99999994]], dtype=float32)

In [31]:
funct = pairwise_cosine_sim2

print(K.eval(Lambda(funct)([tensor_x, tensor_x])))
print(K.eval(Lambda(funct)([tensor_x, tensor_y])))
print(K.eval(Lambda(funct)([tensor_y, tensor_x])))
print(K.eval(Lambda(funct)([tensor_y, tensor_y])))

[[0.99999994]]
[[0.9719315]]
[[0.9719315]]
[[0.9446509]]


In [32]:
funct = pairwise_cosine_sim

print(K.eval(Lambda(funct)([tensor_x, tensor_x])))
print(K.eval(Lambda(funct)([tensor_x, tensor_y])))
print(K.eval(Lambda(funct)([tensor_y, tensor_x])))
print(K.eval(Lambda(funct)([tensor_y, tensor_y])))

[[0.99999994]]
[[0.9719315]]
[[0.9719315]]
[[0.9446509]]


In [152]:
dist_model = Model(inputs=[input_x, input_y], outputs=dist_out)

##### method 4

- working !!!

In [13]:
def cos_sim(a_b):
    '''
    a: 120 dim, b:120 dim
    '''
    a, b = a_b
    
    norm_a = K.sqrt(K.sum(a ** 2))
    norm_b = K.sqrt(K.sum(b ** 2))
    
    return ( K.sum(a * b) / (norm_a * norm_b) )
    
def relative_euc_dist(a_b):
    a,b = a_b
    
    norm_diff = K.sqrt(K.sum((a - b)**2))
    norm_a = K.sqrt(K.sum(a ** 2))
    
    return (norm_diff / norm_a)

In [14]:
funct = cos_sim

print(K.eval(Lambda(funct)([tensor_x, tensor_x])))
print(K.eval(Lambda(funct)([tensor_x, tensor_y])))
print(K.eval(Lambda(funct)([tensor_y, tensor_x])))
print(K.eval(Lambda(funct)([tensor_y, tensor_y])))

0.99999994
0.2650729
0.2650729
0.99999994


In [15]:
funct = relative_euc_dist

print(K.eval(Lambda(funct)([tensor_x, tensor_x])))
print(K.eval(Lambda(funct)([tensor_x, tensor_y])))
print(K.eval(Lambda(funct)([tensor_y, tensor_x])))
print(K.eval(Lambda(funct)([tensor_y, tensor_y])))

0.0
1.230096
1.195569
0.0
