rnn for tf 12

ahangchen · Jun 25, 2017 · 4ec8483 · 4ec8483
1 parent 782cf98
commit 4ec8483
Show file tree

Hide file tree

Showing 14 changed files with 838 additions and 74 deletions.
diff --git a/note/lesson-4/rnn_practice.md b/note/lesson-4/rnn_practice.md
@@ -65,6 +65,13 @@ data_index = (data_index + 1) % len(data)
 
 代码见：[word2vec.py](../../src/rnn/word2vec.py)
 
+这里我们指定了gpu作为运算设备，会出现这个[issue](https://github.com/tensorflow/tensorflow/issues/2285)说明的bug，需要进行如下配置解决：
+
+```python
+config = tf.ConfigProto(allow_soft_placement=True)
+session = tf.Session(graph=graph, config=config)
+```
+
 ## CBOW
 上面训练的是Skip-gram模型，是根据目标词汇预测上下文，而word2vec还有一种方式，CBOW，根据上下文预测目标词汇。
 
@@ -160,7 +167,7 @@ embed_sum = tf.reduce_sum(embed, 0)
 可以把四组参数分别合并，一次计算，再分别取出：
 
 ```python
-values = tf.split(1, gate_count, tf.matmul(i, input_weights) + tf.matmul(o, output_weights) + bias)
+values = tf.split(tf.matmul(i, input_weights) + tf.matmul(o, output_weights) + bias, gate_count, 1)
 input_gate = tf.sigmoid(values[0])
 forget_gate = tf.sigmoid(values[1])
 update = values[2]

diff --git a/note/tensorflow/skflow.md b/note/tensorflow/skflow.md
@@ -1,7 +1,8 @@
 ## skflow
-sklearn风格的api，用tensorflow来处理训练sklearn的数据集
+sklearn风格的api，用tensorflow来处理训练sklearn的数据集，现在已经合并到tf.contrib里。
 
 ```python
+# tensorflow 0.8
 import skflow
 from sklearn import datasets, metrics
 

diff --git a/src/rnn/bigram_lstm.py b/src/rnn/bigram_lstm.py
@@ -219,10 +219,10 @@ def lstm_cell(i, o, state):
     with tf.control_dependencies([saved_output.assign(output),
                                   saved_state.assign(state)]):
         # Classifier.
-        logits = tf.nn.xw_plus_b(tf.concat(0, outputs), w, b)
+        logits = tf.nn.xw_plus_b(tf.concat(outputs, 0), w, b)
         loss = tf.reduce_mean(
             tf.nn.softmax_cross_entropy_with_logits(
-                logits, tf.concat(0, train_labels)))
+                logits=logits, labels=tf.concat(train_labels, 0)))
 
     # Optimizer.
     global_step = tf.Variable(0)

diff --git a/src/rnn/cbow.py b/src/rnn/cbow.py
@@ -19,7 +19,7 @@ def read_data(filename):
     return data
 
 
-def build_dataset(words):
+def build_dataset(words, vocabulary_size):
     count = [['UNK', -1]]
     count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
     dictionary = dict()
@@ -44,7 +44,7 @@ def generate_batch(batch_size, num_skips, skip_window):
     assert batch_size % num_skips == 0
     assert num_skips <= 2 * skip_window
     context_size = 2 * skip_window
-    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
+    labels = np.ndarray(shape=(batch_size, 1), dtype=np.float32)
     batchs = np.ndarray(shape=(context_size, batch_size), dtype=np.int32)
     span = 2 * skip_window + 1  # [ skip_window target skip_window ]
     buffer = collections.deque(maxlen=span)
@@ -72,7 +72,7 @@ def generate_batch(batch_size, num_skips, skip_window):
     # print(batchs)
     return batchs, labels
 
-
+vocabulary_size = 50000
 data_set = load_pickle('text8_data.pickle')
 if data_set is None:
     # load data
@@ -82,7 +82,7 @@ def generate_batch(batch_size, num_skips, skip_window):
     # read data
     words = read_data(filename)
     print('Data size %d' % len(words))
-    data, count, dictionary, reverse_dictionary = build_dataset(words)
+    data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)
     print('Most common words (+UNK)', count[:5])
     print('Sample data', data[:10])
     del words  # Hint to reduce memory.
@@ -96,7 +96,6 @@ def generate_batch(batch_size, num_skips, skip_window):
     dictionary = data_set['dictionary']
     reverse_dictionary = data_set['reverse_dictionary']
 
-vocabulary_size = 50000
 # split data
 data_index = 0
 
@@ -124,10 +123,10 @@ def generate_batch(batch_size, num_skips, skip_window):
 # tensor: Train a skip-gram model, word2vec
 graph = tf.Graph()
 
-with graph.as_default(), tf.device('/gpu:0'):
+with graph.as_default():
     # Input data.
     train_dataset = tf.placeholder(tf.int32, shape=[2 * skip_window, batch_size])
-    train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
+    train_labels = tf.placeholder(tf.float32, shape=[batch_size, 1])
     valid_dataset = tf.constant(valid_examples, shape=[2 * skip_window, batch_size], dtype=tf.int32)
 
     # Variables.
@@ -147,8 +146,8 @@ def generate_batch(batch_size, num_skips, skip_window):
     # Compute the softmax loss, using a sample of the negative labels each time.
     loss = tf.reduce_mean(
         tf.nn.sampled_softmax_loss(softmax_weights, softmax_biases,
-                                   embed_sum,
-                                   train_labels, num_sampled, vocabulary_size))
+                                   train_labels, embed_sum,
+                                   num_sampled, vocabulary_size))
 
     # Optimizer.
     optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)