In [0]:
!pip install tensorflow==1.0.0
import tensorflow as tf
import tensorflow.contrib.slim as slim
import abc
import numpy as np
from scipy.stats import pearsonr
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
import codecs
from nltk.tokenize import word_tokenize
import os
from __future__ import print_function
import time
import os
import datetime
from tensorflow.python import debug as tf_debug
import re
import math
from collections import Counter
from sklearn.utils import shuffle
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import nltk
nltk.download('punkt')
import logging
from google.colab import drive
drive.mount("/content/drive",force_remount=True)



def batch_iter(data, batch_size, num_epochs, shuffle=True):
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]
class Embedder(object):
    """ Generic embedding interface.
    Required:
      * w: dict mapping tokens to indices
      * g: matrix with one row per token index
      * N: embedding dimensionality
    """

    def map_tokens(self, tokens, ndim=2):
        """ for the given list of tokens, return a list of GloVe embeddings,
        or a single plain bag-of-words average embedding if ndim=1.
        Unseen words (that's actually *very* rare) are mapped to 0-vectors. """
        gtokens = [self.g[self.w[t]] for t in tokens if t in self.w]
        if not gtokens:
            return np.zeros((1, self.N)) if ndim == 2 else np.zeros(self.N)
        gtokens = np.array(gtokens)
        if ndim == 2:
            return gtokens
        else:
            return gtokens.mean(axis=0)

    def map_set(self, ss, ndim=2):
        """ apply map_tokens on a whole set of sentences """
        return [self.map_tokens(s, ndim=ndim) for s in ss]

    def map_jset(self, sj):
        """ for a set of sentence emb indices, get per-token embeddings """
        return self.g[sj]

    def pad_set(self, ss, spad, N=None):
        """ Given a set of sentences transformed to per-word embeddings
        (using glove.map_set()), convert them to a 3D matrix with fixed
        sentence sizes - padded or trimmed to spad embeddings per sentence.
        Output is a tensor of shape (len(ss), spad, N).
        To determine spad, use something like
            np.sort([np.shape(s) for s in s0], axis=0)[-1000]
        so that typically everything fits, but you don't go to absurd lengths
        to accomodate outliers.
        """
        ss2 = []
        if N is None:
            N = self.N
        for s in ss:
            if spad > s.shape[0]:
                if s.ndim == 2:
                    s = np.vstack((s, np.zeros((spad - s.shape[0], N))))
                else:  # pad non-embeddings (e.g. toklabels) too
                    s = np.hstack((s, np.zeros(spad - s.shape[0])))
            elif spad < s.shape[0]:
                s = s[:spad]
            ss2.append(s)
        return np.array(ss2)



logger = logging.getLogger('mylogger')
logger.setLevel(logging.DEBUG)

  # 创建一个handler，用于写入日志文件
timestamp = str(int(time.time()))
fh = logging.FileHandler('./log_' + timestamp +'.txt')
fh.setLevel(logging.DEBUG)

  # 再创建一个handler，用于输出到控制台
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

  # 定义handler的输出格式
formatter = logging.Formatter('[%(asctime)s][%(levelname)s] ## %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)

  # 给logger添加handler
logger.addHandler(fh)
logger.addHandler(ch)


class GloVe(Embedder):
    """ A GloVe dictionary and the associated N-dimensional vector space """
    def __init__(self, N=50, glovepath='/content/drive/My Drive/Colab Notebooks/MPCNN/glove.6B/glove.6B.%dd.txt'):
        """ Load GloVe dictionary from the standard distributed text file.
        Glovepath should contain %d, which is substituted for the embedding
        dimension N. """
        self.N = N
        self.w = dict()
        self.g = []
        self.glovepath = glovepath % (N,)

        # [0] must be a zero vector
        self.g.append(np.zeros(self.N))

        with open(self.glovepath, 'r', encoding='utf-8') as f:
            for line in f:
                l = line.split()
                word = l[0]
                self.w[word] = len(self.g)
                self.g.append(np.array(l[1:]).astype(float))
        self.w['UKNOW'] = len(self.g)
        self.g.append(np.zeros(self.N))
        self.g = np.array(self.g, dtype='float32')

#class TextSemanticSimilarity1(TextSemanticSimilarity):
class TextSemanticSimilarity(abc.ABC):  
  
  
  def load_sts(self,dsfile, glove, skip_unlabeled=True):
    """ load a dataset in the sts tsv format """
    s0 = []
    s1 = []
    labels = []
    with codecs.open(dsfile, encoding='utf8') as f:
        for line in f:
            line = line.rstrip()
            label, s0x, s1x = line.split('\t')
            if label == '':
                continue
            else:
                score_int = int(round(float(label)))
                y = [0] * 6
                y[score_int] = 1
                labels.append(np.array(y))
            for i, ss in enumerate([s0x, s1x]):
                words = word_tokenize(ss)
                index = []
                for word in words:
                    word = word.lower()
                    if word in glove.w:
                        index.append(glove.w[word])
                    else:
                        index.append(glove.w['UKNOW'])
                left = 100 - len(words)
                pad = [0]*left
                index.extend(pad)
                if i == 0:
                    s0.append(np.array(index))
                else:
                    s1.append(np.array(index))
            #s0.append(word_tokenize(s0x))
            #s1.append(word_tokenize(s1x))
    #print(len(s0))
    return (s0, s1, labels)
  
  def concat_datasets(self,datasets):
    """ Concatenate multiple loaded datasets into a single large one.
    Example: s0, s1, lab = concat_datasets([load_sts(d) for glob.glob('data/sts/semeval-sts/all/201[0-4]*')]) """
    s0 = []
    s1 = []
    labels = []
    for s0x, s1x, labelsx in datasets:
        s0 += s0x
        s1 += s1x
        labels += labelsx
    return (np.array(s0), np.array(s1), np.array(labels))
  
  def load_embedded(self,glove, s0, s1, labels, ndim=0, s0pad=25, s1pad=60):
    """ Post-process loaded (s0, s1, labels) by mapping it to embeddings,
    plus optionally balancing (if labels are binary) and optionally not
    averaging but padding and returning full-sequence matrices.
    Note that this is now deprecated, especially if you use Keras - use the
    vocab.Vocabulary class. """

    if ndim == 1:
        # for averaging:
        e0 = np.array(glove.map_set(s0, ndim=1))
        e1 = np.array(glove.map_set(s1, ndim=1))
    else:
        # for padding and sequences (e.g. keras RNNs):
        # print('(%s) s0[-1000]: %d tokens' % (globmask, np.sort([np.shape(s) for s in s0], axis=0)[-1000]))
        # print('(%s) s1[-1000]: %d tokens' % (globmask, np.sort([np.shape(s) for s in s1], axis=0)[-1000]))
        e0 = glove.pad_set(glove.map_set(s0), s0pad)
        e1 = glove.pad_set(glove.map_set(s1), s1pad)
    return (e0, e1, s0, s1, labels)

  def load_set(self,glove, path):
    files = []
    for file in os.listdir(path):
        if os.path.isfile(path + '/' + file):
            files.append(path + '/' + file)
    s0, s1, labels = self.concat_datasets([self.load_sts(d, glove) for d in files])
    #s0, s1, labels = np.array(s0), np.array(s1), np.array(labels)
    print('(%s) Loaded dataset: %d' % (path, len(s0)))
    #e0, e1, s0, s1, labels = load_embedded(glove, s0, s1, labels)
    return ([s0, s1], labels)
  
  def get_embedding(self):
    gfile_path = os.path.join("/content/drive/My Drive/Colab Notebooks/MPCNN/glove.6B", "glove.6B.300d.txt")
    f = open(gfile_path, 'r')
    embeddings = {}
    for line in f:
        sp_value = line.split()
        word = sp_value[0]
        embedding = [float(value) for value in sp_value[1:]]
        embeddings[word] = embedding
    print("read word2vec finished!")
    f.close()
    return embeddings
  
  
  
   

  
  def get_cosine(self,vec1, vec2):
    intersection = set(vec1.keys()) & set(vec2.keys())
    numerator = sum([vec1[x] * vec2[x] for x in intersection])

    sum1 = sum([vec1[x]**2 for x in vec1.keys()])
    sum2 = sum([vec2[x]**2 for x in vec2.keys()])
    denominator = math.sqrt(sum1) * math.sqrt(sum2)

    if not denominator:
        return 0.0
    else:
        return float(numerator) / denominator


  def text_to_vector(self,text):
      word = re.compile(r'\w+')
      words = word.findall(text)
      return Counter(words)
    
  
    
  def __init__(self):
    
    pass
  
  
    
  #@abc.abstractmethod
  def read_dataset(self, file_name):
    """
		Reads a dataset that is a CSV/Excel File.
		Args:
			file_name : With it's absolute path
		Returns:
			training_data_list : List of Lists that containes 2 sentences and it's similarity score 
			Note :
				Format of the output : [[S1,S2,Sim_score],[T1,T2,Sim_score]....]
		Raises:
			None
		"""
    X,Y=self.load_set(glove, path=file_name)
    return X,Y
    

  #@abc.abstractmethod
  def train(self, Xtrain, ytrain,Xtest, ytest):
    tf.app.flags.DEFINE_integer('embedding_dim', 100, 'The dimension of the word embedding')
    tf.app.flags.DEFINE_integer('num_filters_A', 50, 'The number of filters in block A')
    tf.app.flags.DEFINE_integer('num_filters_B', 50, 'The number of filters in block B')
    tf.app.flags.DEFINE_integer('n_hidden', 150, 'number of hidden units in the fully connected layer')#150
    tf.app.flags.DEFINE_integer('sentence_length', 100, 'max size of sentence')
    tf.app.flags.DEFINE_integer('num_classes', 6, 'num of the labels')
    tf.flags.DEFINE_float("l2_reg_lambda", 1, "L2 regularization lambda (default: 0.0)")

    tf.app.flags.DEFINE_integer('num_epochs', 85, 'Number of epochs to be trained')#85
    tf.app.flags.DEFINE_integer('batch_size', 64, 'size of mini batch')#64

    tf.app.flags.DEFINE_integer("display_step", 100, "Evaluate model on dev set after this many steps (default: 100)")
    tf.app.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
    tf.app.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
    tf.app.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")

    tf.app.flags.DEFINE_float('lr', 1e-3, 'learning rate')

    tf.app.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
    tf.app.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
    filter_size = [1, 2, 100]
    conf = tf.app.flags.FLAGS
    conf._parse_flags()
    
    
    with tf.Session() as sess:
      print("I have just started")
      input_1 = tf.placeholder(tf.int32, [None, conf.sentence_length], name="input_x1")
      input_2 = tf.placeholder(tf.int32, [None, conf.sentence_length], name="input_x2")
      input_3 = tf.placeholder(tf.float32, [None, conf.num_classes], name="input_y")
      dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
      with tf.name_scope("embendding"):
          s0_embed = tf.nn.embedding_lookup(glove.g, input_1)
          s1_embed = tf.nn.embedding_lookup(glove.g, input_2)

      with tf.name_scope("reshape"):
          input_x1 = tf.reshape(s0_embed, [-1, conf.sentence_length, conf.embedding_dim, 1])
          input_x2 = tf.reshape(s1_embed, [-1, conf.sentence_length, conf.embedding_dim, 1])
          input_y = tf.reshape(input_3, [-1, conf.num_classes])


      
      setence_model = MPCNN_Layer(conf.num_classes, conf.embedding_dim, filter_size,
                                  [conf.num_filters_A, conf.num_filters_B], conf.n_hidden,
                                  input_x1, input_x2, input_y, dropout_keep_prob, conf.l2_reg_lambda)
    
      global_step = tf.Variable(0, name='global_step', trainable=False)
      
      setence_model.similarity_measure_layer()
      optimizer = tf.train.AdamOptimizer(conf.lr)
      grads_and_vars = optimizer.compute_gradients(setence_model.loss)
      train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
      
      timestamp = str(int(time.time()))
      out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
      print("Writing to {}\n".format(out_dir))
      #
      loss_summary = tf.summary.scalar("loss", setence_model.loss)
      acc_summary = tf.summary.scalar("accuracy", setence_model.accuracy)
      
      #
      train_summary_op = tf.summary.merge([loss_summary, acc_summary])
      train_summary_dir = os.path.join(out_dir, "summaries", "train")
      train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
      #
      dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
      dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
      dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)
      print("Checkpoint 3")
      def train1(x1_batch, x2_batch, y_batch):
        """
        A single training step
        """
        feed_dict = {
          input_1: x1_batch,
          input_2: x2_batch,
          input_3: y_batch,
          dropout_keep_prob: 0.5
        }
        _, step, summaries, batch_loss, accuracy = sess.run(
            [train_step, global_step, train_summary_op, setence_model.loss, setence_model.accuracy],
            feed_dict)
        time_str = datetime.datetime.now().isoformat()
        logger.info("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, accuracy))
        train_summary_writer.add_summary(summaries, step)
        
      def dev_step(x1_batch, x2_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              input_1: x1_batch,
              input_2: x2_batch,
              input_3: y_batch,
              dropout_keep_prob: 1
            }
            _, step, summaries, batch_loss, accuracy = sess.run(
                [train_step, global_step, dev_summary_op, setence_model.loss, setence_model.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            dev_summary_writer.add_summary(summaries, step)
            if writer:
                 writer.add_summary(summaries, step)

            return batch_loss, accuracy
      sess.run(tf.global_variables_initializer())
      print("Checkpoint 3")
      batches = batch_iter(list(zip(Xtrain[0], Xtrain[1], ytrain)),conf.batch_size,conf.num_epochs)
      for batch in batches:
          x1_batch, x2_batch, y_batch = zip(*batch)
          train1(x1_batch, x2_batch, y_batch)
          current_step = tf.train.global_step(sess, global_step)
          if current_step % conf.evaluate_every == 0:
              total_dev_loss = 0.0
              total_dev_accuracy = 0.0

              logger.info("\nEvaluation:")
              print("Checkpoint 5 iteration")
              dev_batches = batch_iter(list(zip(Xtest[0], Xtest[1], ytest)), conf.batch_size, 1)
              for dev_batch in dev_batches:
                  x1_dev_batch, x2_dev_batch, y_dev_batch = zip(*dev_batch)
                  dev_loss, dev_accuracy = dev_step(x1_dev_batch, x2_dev_batch, y_dev_batch)
                  total_dev_loss += dev_loss
                  total_dev_accuracy += dev_accuracy
              total_dev_accuracy = total_dev_accuracy / (len(ytest) / conf.batch_size)
              logger.info("dev_loss {:g}, dev_acc {:g}, num_dev_batches {:g}".format(total_dev_loss, total_dev_accuracy,
                                                                               len(ytest) / conf.batch_size))

      logger.info("Optimization Finished!")
      
  #@abc.abstractmethod
  def predict(self, data_X, data_Y):
    """
		Predicts the similarity score on the given input data(2 sentences). Assumes model has been trained with train()
		Args:
			data_X: Sentence 1(Non Tokenized).
			data_Y: Sentence 2(Non Tokenized)
		Returns:
			prediction_score: Similarity Score ( Float ) 
				
		Raises:
			None
		"""
    X=data_X
    Y=data_Y
    vector1 = self.text_to_vector(X)
    vector2 = self.text_to_vector(Y)
    result = self.get_cosine(vector1, vector2)
    return result

  

  #@abc.abstractmethod
  def evaluate(self, actual_values, predicted_values):
    """
		Returns the correlation score(0-1) between the actual and predicted similarity scores
		Args:
			actual_values : List of actual similarity scores
			predicted_values : List of predicted similarity scores
		Returns:
			correlation_coefficient : Value between 0-1 to show the correlation between the values(actual and predicted)
		Raises:
			None
      """
    x = np.array(actual_values)
    y = np.array(predicted_values)
    precision=precision_score(actual_values, predicted_values, average='samples')
    recall=recall_score(actual_values, predicted_values, average='samples')
    f1=f1_score(actual_values, predicted_values, average='samples')
    
    r, p = pearsonr(x, y)
    evaluation_score = r
    return precision,recall,f1

  #@abc.abstractmethod
  def save_model(self, file):
    """
		:param file: Where to save the model - Optional function
		:return:
    """
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
         os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=conf.num_checkpoints)
    return

  #@abc.abstractmethod
  def load_model(self, file):
    """
		:param file: From where to load the model - Optional function
		:return:
    """
    pass

#Created a folder for each dataset like sick,sem2017,sem2014 etc for training and testing and that will be given as a path 
#Input will read a txt file which contains sentence1 sentence 2
print('loading glove...')
glove = GloVe(N=100)
model=TextSemanticSimilarity()
Xtrain, ytrain =model.read_dataset('/content/drive/My Drive/Colab Notebooks/MPCNN/sts/semeval-sts/all')
Xtrain[0], Xtrain[1], ytrain = shuffle(Xtrain[0], Xtrain[1], ytrain)
Xtest, ytest =model.read_dataset('/content/drive/My Drive/Colab Notebooks/MPCNN/sts/semeval-sts/2016')
Xtest[0], Xtest[1], ytest = shuffle(Xtest[0], Xtest[1], ytest)
#train will train and evaluate at the same time
model.train(Xtrain, ytrain,Xtest, ytest)
predictions=[]

#P,R,F1 = myModel.evaluate(predictions, test_Y)  # calculate Precision, Recall, F1
#print('Precision: %s, Recall: %s, F1: %s'%(P,R,F1))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Mounted at /content/drive
loading glove...
(/content/drive/My Drive/Colab Notebooks/MPCNN/sts/semeval-sts/all) Loaded dataset: 450
(/content/drive/My Drive/Colab Notebooks/MPCNN/sts/semeval-sts/2016) Loaded dataset: 254
I have just started
Writing to /content/runs/1556763904

Checkpoint 3
Checkpoint 3


[2019-05-02 02:27:30,686][INFO] ## 2019-05-02T02:27:30.686732: step 1, loss 75.057, acc 0.03125
[2019-05-02 02:27:30,686][INFO] ## 2019-05-02T02:27:30.686732: step 1, loss 75.057, acc 0.03125
[2019-05-02 02:28:24,306][INFO] ## 2019-05-02T02:28:24.306303: step 2, loss 73.4256, acc 0.21875
[2019-05-02 02:28:24,306][INFO] ## 2019-05-02T02:28:24.306303: step 2, loss 73.4256, acc 0.21875
[2019-05-02 02:29:13,310][INFO] ## 2019-05-02T02:29:13.310076: step 3, loss 70.1697, acc 0.265625
[2019-05-02 02:29:13,310][INFO] ## 2019-05-02T02:29:13.310076: step 3, loss 70.1697, acc 0.265625
[2019-05-02 02:30:03,705][INFO] ## 2019-05-02T02:30:03.705542: step 4, loss 70.447, acc 0.265625
[2019-05-02 02:30:03,705][INFO] ## 2019-05-02T02:30:03.705542: step 4, loss 70.447, acc 0.265625
[2019-05-02 02:30:53,479][INFO] ## 2019-05-02T02:30:53.479448: step 5, loss 70.5924, acc 0.15625
[2019-05-02 02:30:53,479][INFO] ## 2019-05-02T02:30:53.479448: step 5, loss 70.5924, acc 0.15625
[2019-05-02 02:31:42,850][INFO

In [0]:
with open("/content/drive/My Drive/Colab Notebooks/MPCNN/TextSemanticSimilarity_test_input.txt", "r") as ins:
    for line in ins:
        text1,text2=line.split(",")
        prediction=model.predict(text1,text2)
        s=line+","+str(prediction)
        #print(prediction)
        predictions.append(prediction)
        with open('/content/drive/My Drive/Colab Notebooks/MPCNN/TextSemanticSimilarity_test_output.txt', 'a+') as f:
          f.write(s+"\n")
          print(s)
        

A group of kids is playing in a yard and an old man is standing in the background,A group of boys in a yard is playing and a man is standing in the background
,0.889108448948774
A group of children is playing in the house and there is no man standing in the background,A group of kids is playing in a yard and an old man is standing in the background
,0.7833494518006403
The young boys are playing outdoors and the man is smiling nearby,The kids are playing outdoors near a man with a smile 
,0.4003203845127179
The young boys are playing outdoors and the man is smiling nearby,There is no boy playing outdoors and there is no man smiling 
,0.5051814855409226
The kids are playing outdoors near a man with a smile,A group of kids is playing in a yard and an old man is standing in the background 
,0.2956561979945413
There is no boy playing outdoors and there is no man smiling,A group of kids is playing in a yard and an old man is standing in the background 
,0.3731012536223182
A group of boys in 

In [0]:
class MPCNN_Layer():
    def compute_l1_distance(self,x, y):
      with tf.name_scope('l1_distance'):
          d = tf.reduce_sum(tf.abs(tf.subtract(x, y)), axis=1)
          return d
      
    def compute_euclidean_distance(self,x, y):
      with tf.name_scope('euclidean_distance'):
          d = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(x, y)), axis=1))
          return d

    def compute_pearson_distance(self,x, y):
        with tf.name_scope("pearson"):
            mid1 = tf.reduce_mean(x * y, axis=1) - \
                        tf.reduce_mean(x, axis=1) * tf.reduce_mean(y, axis=1)
            mid2 = tf.sqrt(tf.reduce_mean(tf.square(x), axis=1) - tf.square(tf.reduce_mean(x, axis=1))) * \
                   tf.sqrt(tf.reduce_mean(tf.square(y), axis=1) - tf.square(tf.reduce_mean(y, axis=1)))
            return mid1 / mid2

    def compute_cosine_distance(self,x, y):
        with tf.name_scope('cosine_distance'):
            x_norm = tf.sqrt(tf.reduce_sum(tf.square(x), axis=1))
            y_norm = tf.sqrt(tf.reduce_sum(tf.square(y), axis=1))
            x_y = tf.reduce_sum(tf.multiply(x, y), axis=1)
            d = tf.divide(x_y, tf.multiply(x_norm, y_norm))
            return d

    def comU1(self,x, y):
        result = [self.compute_cosine_distance(x, y), self.compute_l1_distance(x, y)]
        # result = [compute_euclidean_distance(x, y), compute_euclidean_distance(x, y), compute_euclidean_distance(x, y)]
        return tf.stack(result, axis=1)
    def comU2(self,x, y):
    # result = [compute_cosine_distance(x, y), compute_euclidean_distance(x, y)]
    # return tf.stack(result, axis=1)
      return tf.expand_dims(self.compute_cosine_distance(x, y), -1)
    def __init__(self, num_classes, embedding_size, filter_sizes, num_filters, n_hidden,
                 input_x1, input_x2, input_y, dropout_keep_prob, l2_reg_lambda):
        '''
        :param sequence_length:
        :param num_classes:
        :param embedding_size:
        :param filter_sizes:
        :param num_filters:
        '''
        self.embedding_size = embedding_size
        self.filter_sizes = filter_sizes
        self .num_filters = num_filters
        self.num_classes = num_classes
        self.poolings = [tf.reduce_max, tf.reduce_min, tf.reduce_mean]

        self.input_x1 = input_x1
        self.input_x2 = input_x2
        self.input_y = input_y
        self.dropout_keep_prob = dropout_keep_prob
        self.l2_loss = tf.constant(0.0)
        self.l2_reg_lambda = l2_reg_lambda
        self.W1 = [self.init_weight([filter_sizes[0], embedding_size, 1, num_filters[0]], "W1_0"),
                   self.init_weight([filter_sizes[1], embedding_size, 1, num_filters[0]], "W1_1"),
                   self.init_weight([filter_sizes[2], embedding_size, 1, num_filters[0]], "W1_2")]
        self.b1 = [tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_0"),
                   tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_1"),
                   tf.Variable(tf.constant(0.1, shape=[num_filters[0]]), "b1_2")]

        self.W2 = [self.init_weight([filter_sizes[0], embedding_size, 1, num_filters[1]], "W2_0"),
                   self.init_weight([filter_sizes[1], embedding_size, 1, num_filters[1]], "W2_1")]
        self.b2 = [tf.Variable(tf.constant(0.1, shape=[num_filters[1], embedding_size]), "b2_0"),
                   tf.Variable(tf.constant(0.1, shape=[num_filters[1], embedding_size]), "b2_1")]
        self.h = num_filters[0]*len(self.poolings)*2 + \
                 num_filters[1]*(len(self.poolings)-1)*(len(filter_sizes)-1)*3 + \
                 len(self.poolings)*len(filter_sizes)*len(filter_sizes)*3
        self.Wh = tf.Variable(tf.random_normal([604, n_hidden], stddev=0.01), name='Wh')
        self.bh = tf.Variable(tf.constant(0.1, shape=[n_hidden]), name="bh")

        self.Wo = tf.Variable(tf.random_normal([n_hidden, num_classes], stddev=0.01), name='Wo')
        self.bo = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="bo")

    def init_weight(self,shape, name):
      var = tf.Variable(tf.truncated_normal(shape, mean=0, stddev=1.0), name=name)
      return var
  
    def attention(self):
        sent1_unstack = tf.unstack(self.input_x1, axis=1)
        sent2_unstack = tf.unstack(self.input_x2, axis=1)
        D = []
        for i in range(len(sent1_unstack)):
            d = []
            for j in range(len(sent2_unstack)):
                dis = self.compute_cosine_distance(sent1_unstack[i], sent2_unstack[j])
                #dis:[batch_size, 1(channels)]
                d.append(dis)
            D.append(d)
            print(i)
        D = tf.reshape(D, [-1, len(sent1_unstack), len(sent2_unstack), 1])
        A = [tf.nn.softmax(tf.expand_dims(tf.reduce_sum(D, axis=i), 2)) for i in [2, 1]]
        atten_embed = []
        atten_embed.append(tf.concat([self.input_x1, A[0] * self.input_x1], 2))
        atten_embed.append(tf.concat([self.input_x2, A[1] * self.input_x2], 2))
        return atten_embed

    def per_dim_conv_layer(self, x, w, b, pooling):
        '''
        :param input: [batch_size, sentence_length, embed_size, 1]
        :param w: [ws, embedding_size, 1, num_filters]
        :param b: [num_filters, embedding_size]
        :param pooling:
        :return:
        '''
        # unpcak the input in the dim of embed_dim
        input_unstack = tf.unstack(x, axis=2)
        w_unstack = tf.unstack(w, axis=1)
        b_unstack = tf.unstack(b, axis=1)
        convs = []
        for i in range(x.get_shape()[2]):
            conv = tf.nn.conv1d(input_unstack[i], w_unstack[i], stride=1, padding="VALID")
            conv = slim.batch_norm(inputs=conv, activation_fn=tf.nn.tanh, is_training=self.is_training)
            convs.append(conv)
        conv = tf.stack(convs, axis=2)
        pool = pooling(conv, axis=1)

        return pool

    def bulit_block_A(self, x):
        #bulid block A and cal the similarity according to algorithm 1
        out = []
        with tf.name_scope("bulid_block_A"):
            for pooling in self.poolings:
                pools = []
                for i, ws in enumerate(self.filter_sizes):
                    with tf.name_scope("conv-pool-%s" %ws):
                        conv = tf.nn.conv2d(x, self.W1[i], strides=[1, 1, 1, 1], padding="VALID")
                        conv = slim.batch_norm(inputs=conv, activation_fn=tf.nn.tanh, is_training=self.is_training)
                        pool = pooling(conv, axis=1)
                    pools.append(pool)
                out.append(pools)
            return out

    def bulid_block_B(self, x):
        out = []
        with tf.name_scope("bulid_block_B"):
            for pooling in self.poolings[:-1]:
                pools = []
                with tf.name_scope("conv-pool"):
                    for i, ws in enumerate(self.filter_sizes[:-1]):
                        with tf.name_scope("per_conv-pool-%s" % ws):
                            pool = self.per_dim_conv_layer(x, self.W2[i], self.b2[i], pooling)
                        pools.append(pool)
                    out.append(pools)
            return out


    def similarity_sentence_layer(self):
        # atten = self.attention() #[batch_size, length, 2*embedding, 1]
        sent1 = self.bulit_block_A(self.input_x1)
        sent2 = self.bulit_block_A(self.input_x2)
        fea_h = []
        with tf.name_scope("cal_dis_with_alg1"):
            for i in range(3):
                regM1 = tf.concat(sent1[i], 1)
                regM2 = tf.concat(sent2[i], 1)
                for k in range(self.num_filters[0]):
                    fea_h.append(self.comU2(regM1[:, :, k], regM2[:, :, k]))

        #self.fea_h = fea_h

        fea_a = []
        with tf.name_scope("cal_dis_with_alg2_2-9"):
            for i in range(3):
                for j in range(len(self.filter_sizes)):
                    for k in range(len(self.filter_sizes)):
                        fea_a.append(self.comU1(sent1[i][j][:, 0, :], sent2[i][k][:, 0, :]))
        #
        sent1 = self.bulid_block_B(self.input_x1)
        sent2 = self.bulid_block_B(self.input_x2)

        fea_b = []
        with tf.name_scope("cal_dis_with_alg2_last"):
            for i in range(len(self.poolings)-1):
                for j in range(len(self.filter_sizes)-1):
                    for k in range(self.num_filters[1]):
                        fea_b.append(self.comU1(sent1[i][j][:, :, k], sent2[i][j][:, :, k]))
        #self.fea_b = fea_b
        return tf.concat(fea_h + fea_a + fea_b, 1)


    def similarity_measure_layer(self, is_training=True):
        self.is_training = is_training
        fea = self.similarity_sentence_layer()
        self.h_drop = tf.nn.dropout(fea, self.dropout_keep_prob)
        # fea_h.extend(fea_a)
        # fea_h.extend(fea_b)
        #print len(fea_h), fea_h
        #fea = tf.concat(fea_h+fea_a+fea_b, 1)
        #print fea.get_shape()
        with tf.name_scope("full_connect_layer"):
            h = tf.nn.tanh(tf.matmul(fea, self.Wh) + self.bh)
            # h = tf.nn.dropout(h, self.dropout_keep_prob)
            self.scores = tf.matmul(h, self.Wo) + self.bo
            self.output = tf.nn.softmax(self.scores)
        #     return o

        # CalculateMean cross-entropy loss
        reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables())
        with tf.name_scope("loss"):
            # self.loss = -tf.reduce_sum(self.input_y * tf.log(self.output))
            self.loss = tf.reduce_sum(tf.square(tf.subtract(self.input_y, self.output))) + reg

            # self.loss = tf.reduce_mean(
            #     tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y))
            # self.loss = tf.reduce_mean(losses) + self.l2_reg_lambda * self.l2_loss

        with tf.name_scope("accuracy"):
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.input_y, 1), tf.argmax(self.scores, 1)), tf.float32))
