diff --git a/examples/6_MultiGPU/multigpu_cnn.py b/examples/6_MultiGPU/multigpu_cnn.py
index be0dae1d..be003ebd 100644
--- a/examples/6_MultiGPU/multigpu_cnn.py
+++ b/examples/6_MultiGPU/multigpu_cnn.py
@@ -104,6 +104,22 @@ def average_gradients(tower_grads):
     return average_grads
 
 
+# By default, all variables will be placed on '/gpu:0'
+# So we need a custom device function, to assign all variables to '/cpu:0'
+# Note: If GPUs are peered, '/gpu:0' can be a faster option
+PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']
+
+def assign_to_device(device, ps_device='/cpu:0'):
+    def _assign(op):
+        node_def = op if isinstance(op, tf.NodeDef) else op.node_def
+        if node_def.op in PS_OPS:
+            return "/" + ps_device
+        else:
+            return device
+
+    return _assign
+
+
 # Place all ops on CPU by default
 with tf.device('/cpu:0'):
     tower_grads = []
@@ -115,7 +131,7 @@ def average_gradients(tower_grads):
 
     # Loop over all GPUs and construct their own computation graph
     for i in range(num_gpus):
-        with tf.device('/gpu:%d' % i):
+        with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):
 
             # Split data between GPUs
             _x = X[i * batch_size: (i+1) * batch_size]
diff --git a/notebooks/6_MultiGPU/multigpu_cnn.ipynb b/notebooks/6_MultiGPU/multigpu_cnn.ipynb
index 9785d740..2d4746d2 100644
--- a/notebooks/6_MultiGPU/multigpu_cnn.ipynb
+++ b/notebooks/6_MultiGPU/multigpu_cnn.ipynb
@@ -167,6 +167,30 @@
   {
    "cell_type": "code",
    "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# By default, all variables will be placed on '/gpu:0'\n",
+    "# So we need a custom device function, to assign all variables to '/cpu:0'\n",
+    "# Note: If GPUs are peered, '/gpu:0' can be a faster option\n",
+    "PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable']\n",
+    "\n",
+    "def assign_to_device(device, ps_device='/cpu:0'):\n",
+    "    def _assign(op):\n",
+    "        node_def = op if isinstance(op, tf.NodeDef) else op.node_def\n",
+    "        if node_def.op in PS_OPS:\n",
+    "            return \"/\" + ps_device\n",
+    "        else:\n",
+    "            return device\n",
+    "\n",
+    "    return _assign"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
    "metadata": {
     "collapsed": false,
     "scrolled": false
@@ -214,7 +238,7 @@
     "\n",
     "    # Loop over all GPUs and construct their own computation graph\n",
     "    for i in range(num_gpus):\n",
-    "        with tf.device('/gpu:%d' % i):\n",
+    "        with tf.device(assign_to_device('/gpu:{}'.format(i), ps_device='/cpu:0')):\n",
     "\n",
     "            # Split data between GPUs\n",
     "            _x = X[i * batch_size: (i+1) * batch_size]\n",
@@ -289,7 +313,7 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2.0
+    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
@@ -301,4 +325,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}