In [1]:
import numpy as np
import tensorflow as tf
import sklearn.datasets

  from ._conv import register_converters as _register_converters


In [2]:
x = np.linspace(1,100,100).reshape([100,1])
y = x * 2.0 - 1.5 + 0.2 * np.random.randn()

In [3]:
def regression_model(x, y):
    with tf.variable_scope('regress_model') as scope:
        y_hat = tf.layers.dense(x, 1, activation=None, use_bias=True)
        mean_loss = tf.reduce_mean(tf.square(y_hat - y))
        tf.add_to_collection('losses', mean_loss)
        total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
    return total_loss

In [4]:
def average_gradients_my(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
#         if len(grad_and_vars)==0 or grad_and_vars[0][0] is None:
#             continue
        grads = []
        for g, _ in grad_and_vars:
            expanded_g = tf.expand_dims(g, 0)
            grads.append(expanded_g)
        # Average over the 'tower' dimension.
        grad = tf.concat(axis=0, values=grads)
#         print 'shape BEFORE reduce mean:'+str(grad.get_shape())
        grad = tf.reduce_mean(grad, 0)
#         print 'shape after reduce mean:'+str(grad.get_shape())
        # Attention that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [5]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
config.gpu_options.per_process_gpu_memory_fraction = 0.5 
config.log_device_placement = True

# 多GPU

In [6]:
with tf.Graph().as_default(), tf.device('/cpu:0'):
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat(50000).shuffle(buffer_size=128).batch(64)
    iter_ds = dataset.make_one_shot_iterator()
    opt = tf.train.AdamOptimizer(0.01)
    grads = []
    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        for i in range(4):
            with tf.device('/gpu:%d' % i):
#                 with tf.name_scope('%s_%d' % ("tower", i)) as scope:
                    print 'on gpu:' + str(i)
                    x, y = iter_ds.get_next()
                    loss = regression_model(x, y)
                    grad_var = opt.compute_gradients(loss)
#                     for g,v in grad_var:
#                         print '--------------------------'
#                         print g, v
#                     print '==============================================='
                    grads.append(grad_var)
    avg_grad = average_gradients_my(grads)
#     print avg_grad
    train_op = opt.apply_gradients(avg_grad, global_step=global_step)
    # run
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    try:
        for i in range(10000):
            _loss, _ = sess.run([loss, train_op])
            if (i+1) % 500 == 0:
                print '%d : %.8f' % (i + 1, _loss)
    except:
        print 'end'

on gpu:0
on gpu:1
on gpu:2
on gpu:3
500 : 499.16913431
1000 : 17.76827216
1500 : 18.32002711
2000 : 16.10967602
2500 : 13.56527992
3000 : 12.27010479
3500 : 9.53923743
4000 : 7.41220514
4500 : 5.02793317
5000 : 3.06279595
5500 : 1.68188442
6000 : 0.74586814
6500 : 0.27757217
7000 : 0.07693048
7500 : 0.01599540
8000 : 0.00155063
8500 : 0.00010204
9000 : 0.00000272
9500 : 0.00000003
10000 : 0.00000000


# CPU

In [None]:
with tf.device('/cpu:0'):
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).repeat(6400).shuffle(buffer_size=128).batch(64)
    iter_ds = dataset.make_one_shot_iterator()
    loss = regression_model(iter_ds)
    train_op = tf.train.AdamOptimizer(0.01).minimize(loss)
    # 
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    try:
        for i in range(10000):
            _loss, _ = sess.run([loss, train_op])
            if (i+1) % 500 == 0:
                print '%d : %.8f' % (i + 1, _loss)
    except:
        print 'end'