In [1]:
import tensorflow as tf
from tensorflow import keras
import os,sys
import numpy as np

In [4]:
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.cifar10.load_data()
print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)

(50000, 32, 32, 3) (50000, 1)
(10000, 32, 32, 3) (10000, 1)


In [3]:
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [7]:
classes  = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

In [8]:
y_train[:5]

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

In [9]:
x_train_scaled = x_train/255.0
x_test_scaled = x_test/255.0

In [11]:
y_train_categorical = tf.keras.utils.to_categorical(y_train, num_classes = 10, dtype='float32')
y_test_categorical = tf.keras.utils.to_categorical(y_test, num_classes = 10, dtype='float32')
print(y_train_categorical.shape)
print(y_test_categorical.shape)

(50000, 10)
(10000, 10)


In [13]:
def get_model():
  model = tf.keras.Sequential([tf.keras.layers.Flatten(input_shape=(32,32,3)),
                               tf.keras.layers.Dense(3000,activation='relu'),
                               tf.keras.layers.Dense(1000,activation='relu'),
                               tf.keras.layers.Dense(10,activation='sigmoid')
                               
  ])
  
  model.compile(optimizer='SGD', loss='categorical_crossentropy',metrics=['accuracy'])

  return model

print(get_model().summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 3072)              0         
                                                                 
 dense (Dense)               (None, 3000)              9219000   
                                                                 
 dense_1 (Dense)             (None, 1000)              3001000   
                                                                 
 dense_2 (Dense)             (None, 10)                10010     
                                                                 
Total params: 12,230,010
Trainable params: 12,230,010
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
train_tf_dataset = tf.data.Dataset.from_tensor_slices((x_train_scaled,y_train_categorical))
test_tf_dataset = tf.data.Dataset.from_tensor_slices((x_train_scaled,y_train_categorical))
print(type(train_tf_dataset))
print(type(test_tf_dataset))

<class 'tensorflow.python.data.ops.dataset_ops.TensorSliceDataset'>
<class 'tensorflow.python.data.ops.dataset_ops.TensorSliceDataset'>


In [17]:
strategy = tf.distribute.MirroredStrategy()
print(strategy)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
<tensorflow.python.distribute.mirrored_strategy.MirroredStrategy object at 0x7f0d41182290>


In [18]:
print(strategy.num_replicas_in_sync)

1


In [19]:
BUFFER_SIZE = len(x_train_scaled)
print("BUFFER_SIZE: ",BUFFER_SIZE)

BATCH_SIZE_PER_REPLICA = 250
print("BATCH_SIZE_PER_REPLICA: ",BATCH_SIZE_PER_REPLICA)

BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
print("GLOBAL_BATCH_SIZE: ",BATCH_SIZE)

# EPOCHS = 10

BUFFER_SIZE:  50000
BATCH_SIZE_PER_REPLICA:  250
GLOBAL_BATCH_SIZE:  250


In [21]:
train_dataset = train_tf_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_tf_dataset.batch(BATCH_SIZE)
print(train_dataset)
print(test_dataset)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float64, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>
<BatchDataset element_spec=(TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float64, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))>


In [22]:
%%timeit -n1 -r1
with strategy.scope():
  gpu_model = get_model()
  gpu_model.fit(train_dataset,epochs=50)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Epoch 1/50
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tens

In [27]:
%%timeit -n1 -r1
with tf.device('/CPU:0'):
  cpu_model = get_model()
  cpu_model.fit(train_dataset,epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1 loop, best of 1: 59min 47s per loop


                                                  -: END :-