diff --git a/examples/6_MultiGPU/multigpu_cnn.py b/examples/6_MultiGPU/multigpu_cnn.py index be0dae1d..be003ebd 100644 --- a/examples/6_MultiGPU/multigpu_cnn.py +++ b/examples/6_MultiGPU/multigpu_cnn.py @@ -104,6 +104,22 @@ def average_gradients(tower_grads): return average_grads +# By default, all variables will be placed on '/gpu:0' +# So we need a custom device function, to assign all variables to '/cpu:0' +# Note: If GPUs are peered, '/gpu:0' can be a faster option +PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable'] + +def assign_to_device(device, ps_device='/cpu:0'): + def _assign(op): + node_def = op if isinstance(op, tf.NodeDef) else op.node_def + if node_def.op in PS_OPS: + return "/" + ps_device + else: + return device + + return _assign + + # Place all ops on CPU by default with tf.device('/cpu:0'): tower_grads = [] @@ -115,7 +131,7 @@ def average_gradients(tower_grads): # Loop over all GPUs and construct their own computation graph for i in range(num_gpus): - with tf.device('/gpu:%d' % i): + with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')): # Split data between GPUs _x = X[i * batch_size: (i+1) * batch_size] diff --git a/notebooks/6_MultiGPU/multigpu_cnn.ipynb b/notebooks/6_MultiGPU/multigpu_cnn.ipynb index 9785d740..2d4746d2 100644 --- a/notebooks/6_MultiGPU/multigpu_cnn.ipynb +++ b/notebooks/6_MultiGPU/multigpu_cnn.ipynb @@ -167,6 +167,30 @@ { "cell_type": "code", "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# By default, all variables will be placed on '/gpu:0'\n", + "# So we need a custom device function, to assign all variables to '/cpu:0'\n", + "# Note: If GPUs are peered, '/gpu:0' can be a faster option\n", + "PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']\n", + "\n", + "def assign_to_device(device, ps_device='/cpu:0'):\n", + " def _assign(op):\n", + " node_def = op if isinstance(op, tf.NodeDef) else op.node_def\n", + " if node_def.op in PS_OPS:\n", + " return \"/\" + ps_device\n", + " else:\n", + " return device\n", + "\n", + " return _assign" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false @@ -214,7 +238,7 @@ "\n", " # Loop over all GPUs and construct their own computation graph\n", " for i in range(num_gpus):\n", - " with tf.device('/gpu:%d' % i):\n", + " with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):\n", "\n", " # Split data between GPUs\n", " _x = X[i * batch_size: (i+1) * batch_size]\n", @@ -289,7 +313,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2.0 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -301,4 +325,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +}