add scatter, and update results

hughperkins · May 28, 2017 · a1813a1 · a1813a1
1 parent 7b9bb63
commit a1813a1
Show file tree

Hide file tree

Showing 9 changed files with 51 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -60,16 +60,16 @@ Please see the main repository for full Tensorflow documentation.  This readme w
 
 | test | Mac Sierra, using Radeon Pro 450 GPU |
 |----- |-------|
-| unit tests (`py.test -v`) | pass | pass |
+| unit tests (`py.test -v`) | pass |
 | [linear_regression.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/2_BasicModels/linear_regression.py) | slow, but works   |
 | [logistic_regression.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/2_BasicModels/logistic_regression.py) | ok  |
 | [nearest_neighbor.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/2_BasicModels/nearest_neighbor.py) | ok (accuracy 0.92)  |
 | [autoencoder.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/autoencoder.py)| runs ok, random numbers working now :-) |
 | [multilayer_perceptron.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/multilayer_perceptron.py) | runs ok, loss decreases, random numbers working now :-) |
 | [recurrent_network.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/recurrent_network.py)| runs ok, loss goes down |
 | [bidirectional_rnn.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/birectional_rnn.py)| runs ok, loss goes down |
-| [dynamic_rnn.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/dynamic_rnn.py) | Missing Reshape gradients |
-| [convolutional_network.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/convolutional_network.py) | Missing conv operation |
+| [dynamic_rnn.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/dynamic_rnn.py) | Runs, but loss is nan currently |
+| [convolutional_network.py](https://github.com/hughperkins/TensorFlow-Examples/blob/enforce-gpu/examples/3_NeuralNetworks/convolutional_network.py) | Missing conv |
 
 ## Installation 
 

diff --git a/tensorflow/core/kernels/scatter_functor.cc b/tensorflow/core/kernels/scatter_functor.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 
 #include "tensorflow/core/kernels/scatter_functor.h"
 #include "tensorflow/core/framework/register_types.h"
@@ -43,8 +43,8 @@ namespace functor {
   DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV);
 
 #define DECLARE_GPU_SPECS(T)         \
-  DECLARE_GPU_SPECS_INDEX(T, int32); \
-  DECLARE_GPU_SPECS_INDEX(T, int64);
+  DECLARE_GPU_SPECS_INDEX(T, int32); 
+  // DECLARE_GPU_SPECS_INDEX(T, int64);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_GPU_SPECS);
 
@@ -55,8 +55,8 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_GPU_SPECS);
 }  // namespace functor
 }  // namespace tensorflow
 
-#else
+// #else
 
 #include "tensorflow/core/kernels/scatter_functor.h"
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.cc b/tensorflow/core/kernels/scatter_functor_gpu.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 
 #define EIGEN_USE_GPU
 
@@ -34,11 +34,11 @@ typedef Eigen::GpuDevice GPUDevice;
   DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV);
 
 #define DEFINE_GPU_SPECS(T)         \
-  DEFINE_GPU_SPECS_INDEX(T, int32); \
-  DEFINE_GPU_SPECS_INDEX(T, int64);
+  DEFINE_GPU_SPECS_INDEX(T, int32); 
+  // DEFINE_GPU_SPECS_INDEX(T, int64);
 
 DEFINE_GPU_SPECS(float);
-DEFINE_GPU_SPECS(double);
+// DEFINE_GPU_SPECS(double);
 // TODO(b/27222123): The following fails to compile due to lack of support for
 // fp16.
 // TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
@@ -49,4 +49,4 @@ DEFINE_GPU_SPECS(double);
 
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.h b/tensorflow/core/kernels/scatter_functor_gpu.cu.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_KERNELS_SCATTER_FUNCTOR_GPU_CU_H_
 #define TENSORFLOW_KERNELS_SCATTER_FUNCTOR_GPU_CU_H_
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 
 #define EIGEN_USE_GPU
 
@@ -94,6 +94,6 @@ struct ScatterFunctor<GPUDevice, T, Index, op> {
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
 
 #endif  // TENSORFLOW_KERNELS_SCATTER_FUNCTOR_GPU_CU_H_
diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc
@@ -159,7 +159,27 @@ TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHEMTIC_CPU);
 TF_CALL_ALL_TYPES(REGISTER_SCATTER_UPDATE_CPU);
 
 // Registers GPU kernels.
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
+
+// added by Hugh:
+#undef REGISTER_SCATTER_KERNEL
+#undef REGISTER_SCATTER_ARITHEMTIC
+#undef REGISTER_SCATTER_UPDATE
+
+#define REGISTER_SCATTER_KERNEL(type, dev, name, op)         \
+  REGISTER_SCATTER_KERNEL_INDEX(type, int32, dev, name, op); 
+  // REGISTER_SCATTER_KERNEL_INDEX(type, int64, dev, name, op);
+
+#define REGISTER_SCATTER_ARITHEMTIC(type, dev)                                 \
+  REGISTER_SCATTER_KERNEL(type, dev, "ScatterAdd", scatter_op::UpdateOp::ADD); \
+  REGISTER_SCATTER_KERNEL(type, dev, "ScatterDiv", scatter_op::UpdateOp::DIV); \
+  REGISTER_SCATTER_KERNEL(type, dev, "ScatterMul", scatter_op::UpdateOp::MUL); \
+  REGISTER_SCATTER_KERNEL(type, dev, "ScatterSub", scatter_op::UpdateOp::SUB);
+
+#define REGISTER_SCATTER_UPDATE(type, dev)            \
+  REGISTER_SCATTER_KERNEL(type, dev, "ScatterUpdate", \
+                          scatter_op::UpdateOp::ASSIGN);
+
 #define REGISTER_SCATTER_ARITHEMTIC_GPU(type) \
   REGISTER_SCATTER_ARITHEMTIC(type, GPU);
 
@@ -168,7 +188,7 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_UPDATE_CPU);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_GPU);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_GPU);
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
 
 #undef REGISTER_SCATTER_ADD
 #undef REGISTER_SCATTER_ARITHEMTIC

diff --git a/tensorflow/core/kernels/scatter_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_op_gpu.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 
 #define EIGEN_USE_GPU
 
@@ -35,11 +35,11 @@ typedef Eigen::GpuDevice GPUDevice;
   DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV);
 
 #define DEFINE_GPU_SPECS(T)         \
-  DEFINE_GPU_SPECS_INDEX(T, int32); \
-  DEFINE_GPU_SPECS_INDEX(T, int64);
+  DEFINE_GPU_SPECS_INDEX(T, int32); 
+  // DEFINE_GPU_SPECS_INDEX(T, int64);
 
 DEFINE_GPU_SPECS(float);
-DEFINE_GPU_SPECS(double);
+// DEFINE_GPU_SPECS(double);
 // TODO: The following fails to compile.
 // TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
 
@@ -49,4 +49,4 @@ DEFINE_GPU_SPECS(double);
 
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -310,7 +310,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_UNSORTED_KERNELS_ALL);
 #undef REGISTER_CPU_UNSORTED_KERNELS
 #undef REGISTER_CPU_UNSORTED_KERNELS_ALL
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 #define REGISTER_GPU_UNSORTED_KERNELS(type, index_type)                \
   REGISTER_KERNEL_BUILDER(Name("UnsortedSegmentSum")                   \
                               .Device(DEVICE_GPU)                      \
@@ -320,13 +320,13 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_UNSORTED_KERNELS_ALL);
                           UnsortedSegmentSumOp<GPUDevice, type, index_type>);
 
 #define REGISTER_GPU_UNSORTED_KERNELS_ALL(type) \
-  REGISTER_GPU_UNSORTED_KERNELS(type, int32);   \
-  REGISTER_GPU_UNSORTED_KERNELS(type, int64);
+  REGISTER_GPU_UNSORTED_KERNELS(type, int32);   
+  // REGISTER_GPU_UNSORTED_KERNELS(type, int64);
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_UNSORTED_KERNELS_ALL);
 #undef REGISTER_GPU_UNSORTED_KERNELS
 #undef REGISTER_GPU_UNSORTED_KERNELS_ALL
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
 
 // Same as SegmentReductionOp but takes as input a "sparse" tensor, represented
 // by two dense tensors, one containing the data, and the other containing

diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+// #if GOOGLE_CUDA
 
 #define EIGEN_USE_GPU
 
@@ -95,8 +95,8 @@ struct UnsortedSegmentSumFunctor<GPUDevice, T, Index> {
   template struct UnsortedSegmentSumFunctor<GPUDevice, T, Index>
 
 #define DEFINE_GPU_SPECS(T)         \
-  DEFINE_GPU_SPECS_INDEX(T, int32); \
-  DEFINE_GPU_SPECS_INDEX(T, int64);
+  DEFINE_GPU_SPECS_INDEX(T, int32); 
+  // DEFINE_GPU_SPECS_INDEX(T, int64);
 
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
 
@@ -106,4 +106,4 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
 }  // namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+// #endif  // GOOGLE_CUDA
diff --git a/third_party/coriander b/third_party/coriander