From 08ab0252c84af2d9c439b5f6de00cda1a54b2545 Mon Sep 17 00:00:00 2001
From: Philip <p208p2002@gmail.com>
Date: Tue, 28 Jul 2020 01:36:07 +0000
Subject: [PATCH] update for tf2.1.0

---
 src/api.py       | 18 +++++-----
 src/model_ner.py | 79 ++++++++++++++++++++++----------------------
 src/model_pos.py | 79 ++++++++++++++++++++++----------------------
 src/model_ws.py  | 85 ++++++++++++++++++++++++------------------------
 4 files changed, 129 insertions(+), 132 deletions(-)

diff --git a/src/api.py b/src/api.py
index 24b3782..20d4f21 100644
--- a/src/api.py
+++ b/src/api.py
@@ -50,9 +50,9 @@ def __init__(self, data_dir, disable_cuda=True):
             
         with tf.Graph().as_default():
             model = model_ws.Model(config)
-            model.sess = tf.Session()
-            model.sess.run(tf.global_variables_initializer())
-            saver = tf.train.Saver()
+            model.sess = tf.compat.v1.Session()
+            model.sess.run(tf.compat.v1.global_variables_initializer())
+            saver = tf.compat.v1.train.Saver()
             saver.restore(model.sess, os.path.join(data_dir, "model_ws", config.name))
             
         if disable_cuda and env_backup:
@@ -170,9 +170,9 @@ def __init__(self, data_dir, disable_cuda=True):
             
         with tf.Graph().as_default():
             model = model_pos.Model(config)
-            model.sess = tf.Session()
-            model.sess.run(tf.global_variables_initializer())
-            saver = tf.train.Saver()
+            model.sess = tf.compat.v1.Session()
+            model.sess.run(tf.compat.v1.global_variables_initializer())
+            saver = tf.compat.v1.train.Saver()
             saver.restore(model.sess, os.path.join(data_dir, "model_pos", config.name))
             
         if disable_cuda and env_backup:
@@ -272,9 +272,9 @@ def __init__(self, data_dir, disable_cuda=True):
             
         with tf.Graph().as_default():
             model = model_ner.Model(config)
-            model.sess = tf.Session()
-            model.sess.run(tf.global_variables_initializer())
-            saver = tf.train.Saver()
+            model.sess = tf.compat.v1.Session()
+            model.sess.run(tf.compat.v1.global_variables_initializer())
+            saver = tf.compat.v1.train.Saver()
             saver.restore(model.sess, os.path.join(data_dir, "model_ner", config.name))
             
         if disable_cuda and env_backup:
diff --git a/src/model_ner.py b/src/model_ner.py
index 31594f5..73802a2 100644
--- a/src/model_ner.py
+++ b/src/model_ner.py
@@ -38,7 +38,7 @@ def __init__(self):
 class Cross_BiLSTM:
     def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         self.name = name
-        self.kr = tf.placeholder(tf.float32, [])
+        self.kr = tf.compat.v1.placeholder(tf.float32, [])
         
         self.f_cell_list = []
         self.b_cell_list = []
@@ -54,8 +54,8 @@ def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         return
         
     def create_cell(self, input_d, hidden_d, is_top_cell, name):
-        cell = tf.nn.rnn_cell.LSTMCell(hidden_d, name=name)
-        cell = tf.nn.rnn_cell.DropoutWrapper(
+        cell = tf.compat.v1.nn.rnn_cell.LSTMCell(hidden_d, name=name)
+        cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             cell,
             input_keep_prob = self.kr,
             state_keep_prob = self.kr,
@@ -78,14 +78,14 @@ def transform(self, X, L):
         H = X
         S_fw = []
         S_bw = []
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             for f_cell, b_cell in zip(self.f_cell_list, self.b_cell_list):
                 # top_output: [2, batch, length, hidden_d]
                 #    axis0: forward/backward
                 # last_state: [2, 2, batch, hidden_d]
                 #    axis0: forward/backward
                 #    axis1: LSTM c/h
-                top_output, last_state = tf.nn.bidirectional_dynamic_rnn(
+                top_output, last_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(
                     f_cell,
                     b_cell,
                     H,
@@ -105,13 +105,13 @@ def __init__(self, name="att", heads=5, dk_head=60, dv_head=60):
         self.VP_list = []
         for i in range(heads):
             self.QP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
             )
             self.KP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
             )
             self.VP_list.append(
-                tf.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
+                tf.compat.v1.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
             )
         return
         
@@ -124,7 +124,7 @@ def attention_transform(self, Q, K, V, L=None):
         C: [batch, q, dv]
         """
         # Attention score
-        dk = tf.cast(tf.shape(K)[2], tf.float32)
+        dk = tf.cast(tf.shape(input=K)[2], tf.float32)
         score = tf.matmul(Q, K, transpose_b=True) # [batch, q, m]
         score = score/tf.sqrt(dk)                 # [batch, q, m]
         
@@ -132,9 +132,9 @@ def attention_transform(self, Q, K, V, L=None):
         if L is not None:
             mask = tf.sequence_mask(L)                                     # [batch, m]
             mask = tf.expand_dims(mask, 1)                                 # [batch, 1, m]
-            mask = tf.tile(mask, [1, tf.shape(score)[1], 1])               # [batch, q, m]
+            mask = tf.tile(mask, [1, tf.shape(input=score)[1], 1])               # [batch, q, m]
             worst_score = tf.ones_like(score) * tf.constant(float("-inf")) # [batch, q, m]
-            score = tf.where(mask, score, worst_score)                     # [batch, q, m]
+            score = tf.compat.v1.where(mask, score, worst_score)                     # [batch, q, m]
             
         # Context vector
         alpha = tf.nn.softmax(score, 2) # [batch, q, m]
@@ -148,9 +148,9 @@ def transform(self, Q, K, V, L=None):
         V: [batch, m, dv]
         C: [batch, q, heads * dv_head]
         """
-        batch = tf.shape(Q)[0]
-        q = tf.shape(Q)[1]
-        m = tf.shape(K)[1]
+        batch = tf.shape(input=Q)[0]
+        q = tf.shape(input=Q)[1]
+        m = tf.shape(input=K)[1]
         
         C_list = []
         for i in range(self.heads):
@@ -184,42 +184,42 @@ def create_hyper_parameter(self, config):
         
     def create_embedding(self):
         """Create trainable unknown vectors."""
-        with tf.variable_scope(self.name, initializer=tf.random_normal_initializer(stddev=0.1)):
-            self.unknown_c_v = tf.get_variable("unknown_c_v", [1, self.c_embedding_d+self.c_feature_d])
-            self.unknown_w_v = tf.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
+        with tf.compat.v1.variable_scope(self.name, initializer=tf.compat.v1.random_normal_initializer(stddev=0.1)):
+            self.unknown_c_v = tf.compat.v1.get_variable("unknown_c_v", [1, self.c_embedding_d+self.c_feature_d])
+            self.unknown_w_v = tf.compat.v1.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
         return
         
     def create_encoder(self):
         """Create an Att-BiLSTM encoder."""
         
         # Input sequence                     batch, s_l_max
-        self.s_l = tf.placeholder(  tf.int32, [None])
-        self.c_k = tf.placeholder(  tf.int32, [None, None])
-        self.c_v = tf.placeholder(tf.float32, [None, None, self.c_embedding_d+self.c_feature_d])
-        self.w_k = tf.placeholder(  tf.int32, [None, None])
-        self.w_v = tf.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
+        self.s_l = tf.compat.v1.placeholder(  tf.int32, [None])
+        self.c_k = tf.compat.v1.placeholder(  tf.int32, [None, None])
+        self.c_v = tf.compat.v1.placeholder(tf.float32, [None, None, self.c_embedding_d+self.c_feature_d])
+        self.w_k = tf.compat.v1.placeholder(  tf.int32, [None, None])
+        self.w_v = tf.compat.v1.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
         
-        batch  = tf.shape(self.c_v)[0]
-        s_l_max = tf.shape(self.c_v)[1]
+        batch  = tf.shape(input=self.c_v)[0]
+        s_l_max = tf.shape(input=self.c_v)[1]
         
         # Character
         c_known     = tf.equal(tf.reshape(self.c_k, [batch*s_l_max]), 1)                         # [batch*s_l_max]
         c_known_v   = tf.reshape(self.c_v, [batch*s_l_max, self.c_embedding_d+self.c_feature_d]) # [batch*s_l_max, c_embedding_d+c_feature_d]
         c_unknown_v = tf.tile(self.unknown_c_v, [batch*s_l_max, 1])                              # [batch*s_l_max, c_embedding_d+c_feature_d]
-        c_v         = tf.where(c_known, c_known_v, c_unknown_v)                                  # [batch*s_l_max, c_embedding_d+c_feature_d]
+        c_v         = tf.compat.v1.where(c_known, c_known_v, c_unknown_v)                                  # [batch*s_l_max, c_embedding_d+c_feature_d]
         c_v         = tf.reshape(c_v, [batch, s_l_max, self.c_embedding_d+self.c_feature_d])     # [batch, s_l_max, c_embedding_d+c_feature_d]
         
         # Word
         w_known     = tf.equal(tf.reshape(self.w_k, [batch*s_l_max]), 1)                         # [batch*s_l_max]
         w_known_v   = tf.reshape(self.w_v, [batch*s_l_max, self.w_embedding_d+self.w_feature_d]) # [batch*s_l_max, w_embedding_d+w_feature_d]
         w_unknown_v = tf.tile(self.unknown_w_v, [batch*s_l_max, 1])                              # [batch*s_l_max, w_embedding_d+w_feature_d]
-        w_v         = tf.where(w_known, w_known_v, w_unknown_v)                                  # [batch*s_l_max, w_embedding_d+w_feature_d]
+        w_v         = tf.compat.v1.where(w_known, w_known_v, w_unknown_v)                                  # [batch*s_l_max, w_embedding_d+w_feature_d]
         w_v         = tf.reshape(w_v, [batch, s_l_max, self.w_embedding_d+self.w_feature_d])     # [batch, s_l_max, w_embedding_d+w_feature_d]
         
         cw_v         = tf.concat([c_v, w_v], axis=2) # [batch, s_l_max, c_embedding_d+c_feature_d+w_embedding_d+w_feature_d]
         
         # BiLSTM
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             self.bilstm = Cross_BiLSTM(
                 input_d = self.c_embedding_d + self.c_feature_d + self.w_embedding_d + self.w_feature_d,
                 hidden_d = self.hidden_d,
@@ -231,7 +231,7 @@ def create_encoder(self):
         if self.attention_heads == 0:
             self.cw_a = cw_h
         else:
-            with tf.variable_scope(self.name):
+            with tf.compat.v1.variable_scope(self.name):
                 head_d = int(self.w_hidden_d * 2 / self.attention_heads)
                 att = Att(
                     heads = self.attention_heads,
@@ -247,30 +247,30 @@ def create_classifier(self):
         
         # Output
         # self.logits are not actually logits, because a tf.nn.log_softmax() is omitted
-        output_layer = tf.layers.Dense(
+        output_layer = tf.compat.v1.layers.Dense(
             self.output_d,
             use_bias = True,
             name = "output_layer",
         )
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             self.logits = output_layer(self.cw_a) # [batch, s_l_max, output_d]
             
         # Loss
-        self.o_i    = tf.placeholder(  tf.int32, [None, None]) # [batch, s_l_max]
-        self.o_mask = tf.placeholder(tf.float32, [None, None]) # [batch, s_l_max]
+        self.o_i    = tf.compat.v1.placeholder(  tf.int32, [None, None]) # [batch, s_l_max]
+        self.o_mask = tf.compat.v1.placeholder(tf.float32, [None, None]) # [batch, s_l_max]
         
         cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
             labels = self.o_i,
             logits = self.logits,
         )
-        self.loss = tf.reduce_sum(cross_entropy * self.o_mask) / tf.reduce_sum(self.o_mask)
+        self.loss = tf.reduce_sum(input_tensor=cross_entropy * self.o_mask) / tf.reduce_sum(input_tensor=self.o_mask)
         
         # Optimization
-        optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=self.learning_rate)
+        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.learning_rate)
         gv_list = optimizer.compute_gradients(self.loss)
         g_list, v_list = zip(*gv_list)
         g_list = [
-            tf.convert_to_tensor(g) if isinstance(g, tf.IndexedSlices) else g
+            tf.convert_to_tensor(value=g) if isinstance(g, tf.IndexedSlices) else g
             for g in g_list
         ]
         if self.max_gradient_norm > 0:
@@ -442,15 +442,14 @@ def predict_label_for_a_batch(self, sample_list):
 def main():
     config = Config()
     model = Model(config)
-    tf_config = tf.ConfigProto()
+    tf_config = tf.compat.v1.ConfigProto()
     tf_config.gpu_options.per_process_gpu_memory_fraction = 0.45
-    with tf.Session(config=tf_config) as sess:
-        sess.run(tf.global_variables_initializer())
-        for v in tf.trainable_variables():
+    with tf.compat.v1.Session(config=tf_config) as sess:
+        sess.run(tf.compat.v1.global_variables_initializer())
+        for v in tf.compat.v1.trainable_variables():
             print(v)
     return
     
 if __name__ == "__main__":
     main()
     sys.exit()
-    
\ No newline at end of file
diff --git a/src/model_pos.py b/src/model_pos.py
index 1e9f890..9d06195 100644
--- a/src/model_pos.py
+++ b/src/model_pos.py
@@ -37,7 +37,7 @@ def __init__(self):
 class Cross_BiLSTM:
     def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         self.name = name
-        self.kr = tf.placeholder(tf.float32, [])
+        self.kr = tf.compat.v1.placeholder(tf.float32, [])
         
         self.f_cell_list = []
         self.b_cell_list = []
@@ -53,8 +53,8 @@ def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         return
         
     def create_cell(self, input_d, hidden_d, is_top_cell, name):
-        cell = tf.nn.rnn_cell.LSTMCell(hidden_d, name=name)
-        cell = tf.nn.rnn_cell.DropoutWrapper(
+        cell = tf.compat.v1.nn.rnn_cell.LSTMCell(hidden_d, name=name)
+        cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             cell,
             input_keep_prob = self.kr,
             state_keep_prob = self.kr,
@@ -77,14 +77,14 @@ def transform(self, X, L):
         H = X
         S_fw = []
         S_bw = []
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             for f_cell, b_cell in zip(self.f_cell_list, self.b_cell_list):
                 # top_output: [2, batch, length, hidden_d]
                 #    axis0: forward/backward
                 # last_state: [2, 2, batch, hidden_d]
                 #    axis0: forward/backward
                 #    axis1: LSTM c/h
-                top_output, last_state = tf.nn.bidirectional_dynamic_rnn(
+                top_output, last_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(
                     f_cell,
                     b_cell,
                     H,
@@ -104,13 +104,13 @@ def __init__(self, name="att", heads=5, dk_head=60, dv_head=60):
         self.VP_list = []
         for i in range(heads):
             self.QP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
             )
             self.KP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
             )
             self.VP_list.append(
-                tf.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
+                tf.compat.v1.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
             )
         return
         
@@ -123,7 +123,7 @@ def attention_transform(self, Q, K, V, L=None):
         C: [batch, q, dv]
         """
         # Attention score
-        dk = tf.cast(tf.shape(K)[2], tf.float32)
+        dk = tf.cast(tf.shape(input=K)[2], tf.float32)
         score = tf.matmul(Q, K, transpose_b=True) # [batch, q, m]
         score = score/tf.sqrt(dk)                 # [batch, q, m]
         
@@ -131,9 +131,9 @@ def attention_transform(self, Q, K, V, L=None):
         if L is not None:
             mask = tf.sequence_mask(L)                                     # [batch, m]
             mask = tf.expand_dims(mask, 1)                                 # [batch, 1, m]
-            mask = tf.tile(mask, [1, tf.shape(score)[1], 1])               # [batch, q, m]
+            mask = tf.tile(mask, [1, tf.shape(input=score)[1], 1])               # [batch, q, m]
             worst_score = tf.ones_like(score) * tf.constant(float("-inf")) # [batch, q, m]
-            score = tf.where(mask, score, worst_score)                     # [batch, q, m]
+            score = tf.compat.v1.where(mask, score, worst_score)                     # [batch, q, m]
             
         # Context vector
         alpha = tf.nn.softmax(score, 2) # [batch, q, m]
@@ -147,9 +147,9 @@ def transform(self, Q, K, V, L=None):
         V: [batch, m, dv]
         C: [batch, q, heads * dv_head]
         """
-        batch = tf.shape(Q)[0]
-        q = tf.shape(Q)[1]
-        m = tf.shape(K)[1]
+        batch = tf.shape(input=Q)[0]
+        q = tf.shape(input=Q)[1]
+        m = tf.shape(input=K)[1]
         
         C_list = []
         for i in range(self.heads):
@@ -183,42 +183,42 @@ def create_hyper_parameter(self, config):
         
     def create_embedding(self):
         """Create trainable unknown vectors."""
-        with tf.variable_scope(self.name, initializer=tf.random_normal_initializer(stddev=0.1)):
-            self.unknown_c_v = tf.get_variable("unknown_c_v", [1, self.c_embedding_d+self.c_feature_d])
-            self.unknown_w_v = tf.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
+        with tf.compat.v1.variable_scope(self.name, initializer=tf.compat.v1.random_normal_initializer(stddev=0.1)):
+            self.unknown_c_v = tf.compat.v1.get_variable("unknown_c_v", [1, self.c_embedding_d+self.c_feature_d])
+            self.unknown_w_v = tf.compat.v1.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
         return
         
     def create_encoder(self):
         """Create an Att-BiLSTM encoder."""
         
         # Input sequence                     batch, s_l_max
-        self.s_l = tf.placeholder(  tf.int32, [None])
-        self.c_k = tf.placeholder(  tf.int32, [None, None])
-        self.c_v = tf.placeholder(tf.float32, [None, None, self.c_embedding_d+self.c_feature_d])
-        self.w_k = tf.placeholder(  tf.int32, [None, None])
-        self.w_v = tf.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
+        self.s_l = tf.compat.v1.placeholder(  tf.int32, [None])
+        self.c_k = tf.compat.v1.placeholder(  tf.int32, [None, None])
+        self.c_v = tf.compat.v1.placeholder(tf.float32, [None, None, self.c_embedding_d+self.c_feature_d])
+        self.w_k = tf.compat.v1.placeholder(  tf.int32, [None, None])
+        self.w_v = tf.compat.v1.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
         
-        batch  = tf.shape(self.c_v)[0]
-        s_l_max = tf.shape(self.c_v)[1]
+        batch  = tf.shape(input=self.c_v)[0]
+        s_l_max = tf.shape(input=self.c_v)[1]
         
         # Character
         c_known     = tf.equal(tf.reshape(self.c_k, [batch*s_l_max]), 1)                         # [batch*s_l_max]
         c_known_v   = tf.reshape(self.c_v, [batch*s_l_max, self.c_embedding_d+self.c_feature_d]) # [batch*s_l_max, c_embedding_d+c_feature_d]
         c_unknown_v = tf.tile(self.unknown_c_v, [batch*s_l_max, 1])                              # [batch*s_l_max, c_embedding_d+c_feature_d]
-        c_v         = tf.where(c_known, c_known_v, c_unknown_v)                                  # [batch*s_l_max, c_embedding_d+c_feature_d]
+        c_v         = tf.compat.v1.where(c_known, c_known_v, c_unknown_v)                                  # [batch*s_l_max, c_embedding_d+c_feature_d]
         c_v         = tf.reshape(c_v, [batch, s_l_max, self.c_embedding_d+self.c_feature_d])     # [batch, s_l_max, c_embedding_d+c_feature_d]
         
         # Word
         w_known     = tf.equal(tf.reshape(self.w_k, [batch*s_l_max]), 1)                         # [batch*s_l_max]
         w_known_v   = tf.reshape(self.w_v, [batch*s_l_max, self.w_embedding_d+self.w_feature_d]) # [batch*s_l_max, w_embedding_d+w_feature_d]
         w_unknown_v = tf.tile(self.unknown_w_v, [batch*s_l_max, 1])                              # [batch*s_l_max, w_embedding_d+w_feature_d]
-        w_v         = tf.where(w_known, w_known_v, w_unknown_v)                                  # [batch*s_l_max, w_embedding_d+w_feature_d]
+        w_v         = tf.compat.v1.where(w_known, w_known_v, w_unknown_v)                                  # [batch*s_l_max, w_embedding_d+w_feature_d]
         w_v         = tf.reshape(w_v, [batch, s_l_max, self.w_embedding_d+self.w_feature_d])     # [batch, s_l_max, w_embedding_d+w_feature_d]
         
         cw_v         = tf.concat([c_v, w_v], axis=2) # [batch, s_l_max, c_embedding_d+c_feature_d+w_embedding_d+w_feature_d]
         
         # BiLSTM
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             self.bilstm = Cross_BiLSTM(
                 input_d = self.c_embedding_d + self.c_feature_d + self.w_embedding_d + self.w_feature_d,
                 hidden_d = self.hidden_d,
@@ -230,7 +230,7 @@ def create_encoder(self):
         if self.attention_heads == 0:
             self.cw_a = cw_h
         else:
-            with tf.variable_scope(self.name):
+            with tf.compat.v1.variable_scope(self.name):
                 head_d = int(self.w_hidden_d * 2 / self.attention_heads)
                 att = Att(
                     heads = self.attention_heads,
@@ -246,30 +246,30 @@ def create_classifier(self):
         
         # Output
         # self.logits are not actually logits, because a tf.nn.log_softmax() is omitted
-        output_layer = tf.layers.Dense(
+        output_layer = tf.compat.v1.layers.Dense(
             self.output_d,
             use_bias = True,
             name = "output_layer",
         )
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             self.logits = output_layer(self.cw_a) # [batch, s_l_max, output_d]
             
         # Loss
-        self.o_i    = tf.placeholder(  tf.int32, [None, None]) # [batch, s_l_max]
-        self.o_mask = tf.placeholder(tf.float32, [None, None]) # [batch, s_l_max]
+        self.o_i    = tf.compat.v1.placeholder(  tf.int32, [None, None]) # [batch, s_l_max]
+        self.o_mask = tf.compat.v1.placeholder(tf.float32, [None, None]) # [batch, s_l_max]
         
         cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
             labels = self.o_i,
             logits = self.logits,
         )
-        self.loss = tf.reduce_sum(cross_entropy * self.o_mask) / tf.reduce_sum(self.o_mask)
+        self.loss = tf.reduce_sum(input_tensor=cross_entropy * self.o_mask) / tf.reduce_sum(input_tensor=self.o_mask)
         
         # Optimization
-        optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=self.learning_rate)
+        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.learning_rate)
         gv_list = optimizer.compute_gradients(self.loss)
         g_list, v_list = zip(*gv_list)
         g_list = [
-            tf.convert_to_tensor(g) if isinstance(g, tf.IndexedSlices) else g
+            tf.convert_to_tensor(value=g) if isinstance(g, tf.IndexedSlices) else g
             for g in g_list
         ]
         if self.max_gradient_norm > 0:
@@ -439,15 +439,14 @@ def predict_label_for_a_batch(self, sample_list):
 def main():
     config = Config()
     model = Model(config)
-    tf_config = tf.ConfigProto()
+    tf_config = tf.compat.v1.ConfigProto()
     tf_config.gpu_options.per_process_gpu_memory_fraction = 0.45
-    with tf.Session(config=tf_config) as sess:
-        sess.run(tf.global_variables_initializer())
-        for v in tf.trainable_variables():
+    with tf.compat.v1.Session(config=tf_config) as sess:
+        sess.run(tf.compat.v1.global_variables_initializer())
+        for v in tf.compat.v1.trainable_variables():
             print(v)
     return
     
 if __name__ == "__main__":
     main()
     sys.exit()
-    
\ No newline at end of file
diff --git a/src/model_ws.py b/src/model_ws.py
index ccf0816..d022900 100644
--- a/src/model_ws.py
+++ b/src/model_ws.py
@@ -34,7 +34,7 @@ def __init__(self):
 class BiLSTM:
     def __init__(self, name="bilstm", input_d=300, hidden_d=100, layers=2):
         self.name = name
-        self.kr = tf.placeholder(tf.float32, [])
+        self.kr = tf.compat.v1.placeholder(tf.float32, [])
         
         f_cell_list = []
         b_cell_list = []
@@ -47,13 +47,13 @@ def __init__(self, name="bilstm", input_d=300, hidden_d=100, layers=2):
             b_cell_list.append(
                 self.create_cell(current_input_d, hidden_d, is_top_cell)
             )
-        self.f_cell = tf.nn.rnn_cell.MultiRNNCell(f_cell_list)
-        self.b_cell = tf.nn.rnn_cell.MultiRNNCell(b_cell_list)
+        self.f_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(f_cell_list)
+        self.b_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(b_cell_list)
         return
         
     def create_cell(self, input_d, hidden_d, is_top_cell):
-        cell = tf.nn.rnn_cell.LSTMCell(hidden_d)
-        cell = tf.nn.rnn_cell.DropoutWrapper(
+        cell = tf.compat.v1.nn.rnn_cell.LSTMCell(hidden_d)
+        cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             cell,
             input_keep_prob = self.kr,
             state_keep_prob = self.kr,
@@ -71,10 +71,10 @@ def transform(self, X, L):
         L: [batch]
         H: [batch, length, hidden_d*2]
         """
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             # top_output: [2, batch, length, hidden_d], axis0: forward/backward
             # last_state: [2, layers, 2, batch, hidden_d], axis0: forward/backward, axis2: LSTM c/h
-            top_output, last_state = tf.nn.bidirectional_dynamic_rnn(
+            top_output, last_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(
                 self.f_cell,
                 self.b_cell,
                 X,
@@ -87,7 +87,7 @@ def transform(self, X, L):
 class Cross_BiLSTM:
     def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         self.name = name
-        self.kr = tf.placeholder(tf.float32, [])
+        self.kr = tf.compat.v1.placeholder(tf.float32, [])
         
         self.f_cell_list = []
         self.b_cell_list = []
@@ -103,8 +103,8 @@ def __init__(self, name="cross-bilstm", input_d=300, hidden_d=100, layers=2):
         return
         
     def create_cell(self, input_d, hidden_d, is_top_cell, name):
-        cell = tf.nn.rnn_cell.LSTMCell(hidden_d, name=name)
-        cell = tf.nn.rnn_cell.DropoutWrapper(
+        cell = tf.compat.v1.nn.rnn_cell.LSTMCell(hidden_d, name=name)
+        cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             cell,
             input_keep_prob = self.kr,
             state_keep_prob = self.kr,
@@ -123,11 +123,11 @@ def transform(self, X, L):
         H: [batch, length, hidden_d*2]
         """
         H = X
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             for f_cell, b_cell in zip(self.f_cell_list, self.b_cell_list):
                 # top_output: [2, batch, length, hidden_d], axis0: forward/backward
                 # last_state: [2, layers, 2, batch, hidden_d], axis0: forward/backward, axis2: LSTM c/h
-                top_output, last_state = tf.nn.bidirectional_dynamic_rnn(
+                top_output, last_state = tf.compat.v1.nn.bidirectional_dynamic_rnn(
                     f_cell,
                     b_cell,
                     H,
@@ -145,13 +145,13 @@ def __init__(self, name="att", heads=5, dk_head=60, dv_head=60):
         self.VP_list = []
         for i in range(heads):
             self.QP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/QP_{i}")
             )
             self.KP_list.append(
-                tf.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
+                tf.compat.v1.layers.Dense(dk_head, use_bias=False, name=f"{name}/KP_{i}")
             )
             self.VP_list.append(
-                tf.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
+                tf.compat.v1.layers.Dense(dv_head, use_bias=False, name=f"{name}/VP_{i}")
             )
         return
         
@@ -164,7 +164,7 @@ def attention_transform(self, Q, K, V, L=None):
         C: [batch, q, dv]
         """
         # Attention score
-        dk = tf.cast(tf.shape(K)[2], tf.float32)
+        dk = tf.cast(tf.shape(input=K)[2], tf.float32)
         score = tf.matmul(Q, K, transpose_b=True) # [batch, q, m]
         score = score/tf.sqrt(dk)                 # [batch, q, m]
         
@@ -172,9 +172,9 @@ def attention_transform(self, Q, K, V, L=None):
         if L is not None:
             mask = tf.sequence_mask(L)                                     # [batch, m]
             mask = tf.expand_dims(mask, 1)                                 # [batch, 1, m]
-            mask = tf.tile(mask, [1, tf.shape(score)[1], 1])               # [batch, q, m]
+            mask = tf.tile(mask, [1, tf.shape(input=score)[1], 1])               # [batch, q, m]
             worst_score = tf.ones_like(score) * tf.constant(float("-inf")) # [batch, q, m]
-            score = tf.where(mask, score, worst_score)                     # [batch, q, m]
+            score = tf.compat.v1.where(mask, score, worst_score)                     # [batch, q, m]
             
         # Context vector
         alpha = tf.nn.softmax(score, 2) # [batch, q, m]
@@ -188,9 +188,9 @@ def transform(self, Q, K, V, L=None):
         V: [batch, m, dv]
         C: [batch, q, heads * dv_head]
         """
-        batch = tf.shape(Q)[0]
-        q = tf.shape(Q)[1]
-        m = tf.shape(K)[1]
+        batch = tf.shape(input=Q)[0]
+        q = tf.shape(input=Q)[1]
+        m = tf.shape(input=K)[1]
         
         C_list = []
         for i in range(self.heads):
@@ -224,30 +224,30 @@ def create_hyper_parameter(self, config):
         
     def create_embedding(self):
         """Create a trainable unknown vector."""
-        with tf.variable_scope(self.name, initializer=tf.random_normal_initializer(stddev=0.1)):
-            self.unknown_w_v = tf.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
+        with tf.compat.v1.variable_scope(self.name, initializer=tf.compat.v1.random_normal_initializer(stddev=0.1)):
+            self.unknown_w_v = tf.compat.v1.get_variable("unknown_w_v", [1, self.w_embedding_d+self.w_feature_d])
         return
         
     def create_encoder(self):
         """Create an Att-BiLSTM encoder."""
         
         # Input sequence                              [batch, length]
-        self.input_length = tf.placeholder(  tf.int32, [None])
-        self.w_k          = tf.placeholder(  tf.int32, [None, None])
-        self.w_v          = tf.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
+        self.input_length = tf.compat.v1.placeholder(  tf.int32, [None])
+        self.w_k          = tf.compat.v1.placeholder(  tf.int32, [None, None])
+        self.w_v          = tf.compat.v1.placeholder(tf.float32, [None, None, self.w_embedding_d+self.w_feature_d])
         
-        batch  = tf.shape(self.w_v)[0]
-        length = tf.shape(self.w_v)[1]
+        batch  = tf.shape(input=self.w_v)[0]
+        length = tf.shape(input=self.w_v)[1]
         
         # Word
         w_known     = tf.equal(tf.reshape(self.w_k, [batch*length]), 1)                         # [batch * length]
         w_known_v   = tf.reshape(self.w_v, [batch*length, self.w_embedding_d+self.w_feature_d]) # [batch * length, w_embedding_d+w_feature_d]
         w_unknown_v = tf.tile(self.unknown_w_v, [batch*length, 1])                              # [batch * length, w_embedding_d+w_feature_d]
-        w_v         = tf.where(w_known, w_known_v, w_unknown_v)                                 # [batch * length, w_embedding_d+w_feature_d]
+        w_v         = tf.compat.v1.where(w_known, w_known_v, w_unknown_v)                                 # [batch * length, w_embedding_d+w_feature_d]
         w_v         = tf.reshape(w_v, [batch, length, self.w_embedding_d+self.w_feature_d])     # [batch,  length, w_embedding_d+w_feature_d]
         
         # BiLSTM
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             if self.is_cross_bilstm:
                 self.bilstm = Cross_BiLSTM(
                     input_d = self.w_embedding_d + self.w_feature_d,
@@ -266,7 +266,7 @@ def create_encoder(self):
         if self.attention_heads == 0:
             self.w_a = w_h
         else:
-            with tf.variable_scope(self.name):
+            with tf.compat.v1.variable_scope(self.name):
                 head_d = int(self.hidden_d * 2 / self.attention_heads)
                 att = Att(
                     heads = self.attention_heads,
@@ -282,30 +282,30 @@ def create_classifier(self):
         
         # Output
         # self.logits are not actually logits, because a tf.nn.log_softmax() is omitted
-        output_layer = tf.layers.Dense(
+        output_layer = tf.compat.v1.layers.Dense(
             self.output_d,
             use_bias = True,
             name = "output_layer",
         )
-        with tf.variable_scope(self.name):
+        with tf.compat.v1.variable_scope(self.name):
             self.logits = output_layer(self.w_a) # [batch, length, output_d]
             
         # Loss
-        self.o_i    = tf.placeholder(  tf.int32, [None, None]) # [batch, length]
-        self.o_mask = tf.placeholder(tf.float32, [None, None]) # [batch, length]
+        self.o_i    = tf.compat.v1.placeholder(  tf.int32, [None, None]) # [batch, length]
+        self.o_mask = tf.compat.v1.placeholder(tf.float32, [None, None]) # [batch, length]
         
         cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
             labels = self.o_i,
             logits = self.logits,
         )
-        self.loss = tf.reduce_sum(cross_entropy * self.o_mask) / tf.reduce_sum(self.o_mask)
+        self.loss = tf.reduce_sum(input_tensor=cross_entropy * self.o_mask) / tf.reduce_sum(input_tensor=self.o_mask)
         
         # Optimization
-        optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=self.learning_rate)
+        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.learning_rate)
         gv_list = optimizer.compute_gradients(self.loss)
         g_list, v_list = zip(*gv_list)
         g_list = [
-            tf.convert_to_tensor(g) if isinstance(g, tf.IndexedSlices) else g
+            tf.convert_to_tensor(value=g) if isinstance(g, tf.IndexedSlices) else g
             for g in g_list
         ]
         if self.max_gradient_norm > 0:
@@ -434,15 +434,14 @@ def predict_label_for_a_batch(self, sample_list):
 def main():
     config = Config()
     model = Model(config)
-    tf_config = tf.ConfigProto()
+    tf_config = tf.compat.v1.ConfigProto()
     tf_config.gpu_options.per_process_gpu_memory_fraction = 0.45
-    with tf.Session(config=tf_config) as sess:
-        sess.run(tf.global_variables_initializer())
-        for v in tf.trainable_variables():
+    with tf.compat.v1.Session(config=tf_config) as sess:
+        sess.run(tf.compat.v1.global_variables_initializer())
+        for v in tf.compat.v1.trainable_variables():
             print(v)
     return
     
 if __name__ == "__main__":
     main()
     sys.exit()
-    
\ No newline at end of file