In [1]:
import tensorflow as tf
#
from tensorflow.keras.applications import EfficientNetV2S
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
#
import numpy as np

In [2]:
nsize = 32
batch_size = 32
#
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#
x_train = np.expand_dims(x_train, axis=-1)
x_train = np.repeat(x_train, 3, axis=-1)
x_train = x_train.astype('float32') / 255
x_train = tf.image.resize(x_train, [nsize,nsize])
#
y_train = tf.keras.utils.to_categorical(y_train, num_classes = 10)
#
x_test = np.expand_dims(x_test, axis=-1)
x_test = np.repeat(x_test, 3, axis=-1)
x_test = x_test.astype('float32') / 255
x_test = tf.image.resize(x_test, [nsize,nsize])
#
y_test = tf.keras.utils.to_categorical(y_test, num_classes = 10)

2023-05-05 19:27:44.261259: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-05 19:27:45.767153: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22833 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:1a:00.0, compute capability: 7.5
2023-05-05 19:27:45.768319: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22833 MB memory:  -> device: 1, name: Quadro RTX 6000, pci bus id: 0000:67:00.0, compute capability: 7.5
2023-05-05 19:27:45.771982: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:G

In [3]:
strategy = tf.distribute.MirroredStrategy()
print("Number of devices: {}".format(strategy.num_replicas_in_sync))
#
# Open a strategy scope
with strategy.scope():
    """
    Load the Model
    """
    #
    input_tensor = Input(shape=(nsize,nsize,3))
    #
    stem_model = EfficientNetV2S(
        include_top=False,
        weights='imagenet',
        input_tensor=input_tensor,
        input_shape=None,
        classifier_activation=None,
        include_preprocessing=True
    )
    #
    flatten = Flatten()(stem_model.output)
    dense1 = Dense(4080, activation='relu')(flatten)
    dense2 = Dense(4080, activation='relu')(dense1)
    output_tensor = Dense(10,activation='softmax')(dense2)
    #
    model = Model(input_tensor, output_tensor)
    #
    model.summary()
    """
    Compile the Model, dependent on the loss function defined in custom_utils.py
    """
    #
    model.compile(
        optimizer = Adam(learning_rate=0.0001),
        loss = CategoricalCrossentropy(),
        metrics = ["acc"]
    )

model.fit(x_train, y_train, epochs=1)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2')
Number of devices: 3
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadc

 block2b_drop (Dropout)         (None, 8, 8, 48)     0           ['block2b_project_bn[0][0]']     
                                                                                                  
 block2b_add (Add)              (None, 8, 8, 48)     0           ['block2b_drop[0][0]',           
                                                                  'block2a_project_bn[0][0]']     
                                                                                                  
 block2c_expand_conv (Conv2D)   (None, 8, 8, 192)    82944       ['block2b_add[0][0]']            
                                                                                                  
 block2c_expand_bn (BatchNormal  (None, 8, 8, 192)   768         ['block2c_expand_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 block2c_e

                                                                                                  
 block3c_project_conv (Conv2D)  (None, 4, 4, 64)     16384       ['block3c_expand_activation[0][0]
                                                                 ']                               
                                                                                                  
 block3c_project_bn (BatchNorma  (None, 4, 4, 64)    256         ['block3c_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block3c_drop (Dropout)         (None, 4, 4, 64)     0           ['block3c_project_bn[0][0]']     
                                                                                                  
 block3c_add (Add)              (None, 4, 4, 64)     0           ['block3c_drop[0][0]',           
          

 agePooling2D)                                                                                    
                                                                                                  
 block4b_se_reshape (Reshape)   (None, 1, 1, 512)    0           ['block4b_se_squeeze[0][0]']     
                                                                                                  
 block4b_se_reduce (Conv2D)     (None, 1, 1, 32)     16416       ['block4b_se_reshape[0][0]']     
                                                                                                  
 block4b_se_expand (Conv2D)     (None, 1, 1, 512)    16896       ['block4b_se_reduce[0][0]']      
                                                                                                  
 block4b_se_excite (Multiply)   (None, 2, 2, 512)    0           ['block4b_activation[0][0]',     
                                                                  'block4b_se_expand[0][0]']      
          

                                                                                                  
 block4d_se_expand (Conv2D)     (None, 1, 1, 512)    16896       ['block4d_se_reduce[0][0]']      
                                                                                                  
 block4d_se_excite (Multiply)   (None, 2, 2, 512)    0           ['block4d_activation[0][0]',     
                                                                  'block4d_se_expand[0][0]']      
                                                                                                  
 block4d_project_conv (Conv2D)  (None, 2, 2, 128)    65536       ['block4d_se_excite[0][0]']      
                                                                                                  
 block4d_project_bn (BatchNorma  (None, 2, 2, 128)   512         ['block4d_project_conv[0][0]']   
 lization)                                                                                        
          

                                                                                                  
 block4f_project_conv (Conv2D)  (None, 2, 2, 128)    65536       ['block4f_se_excite[0][0]']      
                                                                                                  
 block4f_project_bn (BatchNorma  (None, 2, 2, 128)   512         ['block4f_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block4f_drop (Dropout)         (None, 2, 2, 128)    0           ['block4f_project_bn[0][0]']     
                                                                                                  
 block4f_add (Add)              (None, 2, 2, 128)    0           ['block4f_drop[0][0]',           
                                                                  'block4e_add[0][0]']            
          

                                                                                                  
 block5c_expand_conv (Conv2D)   (None, 2, 2, 960)    153600      ['block5b_add[0][0]']            
                                                                                                  
 block5c_expand_bn (BatchNormal  (None, 2, 2, 960)   3840        ['block5c_expand_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 block5c_expand_activation (Act  (None, 2, 2, 960)   0           ['block5c_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block5c_dwconv2 (DepthwiseConv  (None, 2, 2, 960)   8640        ['block5c_expand_activation[0][0]
 2D)      

                                                                                                  
 block5e_expand_activation (Act  (None, 2, 2, 960)   0           ['block5e_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block5e_dwconv2 (DepthwiseConv  (None, 2, 2, 960)   8640        ['block5e_expand_activation[0][0]
 2D)                                                             ']                               
                                                                                                  
 block5e_bn (BatchNormalization  (None, 2, 2, 960)   3840        ['block5e_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block5e_a

 2D)                                                             ']                               
                                                                                                  
 block5g_bn (BatchNormalization  (None, 2, 2, 960)   3840        ['block5g_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block5g_activation (Activation  (None, 2, 2, 960)   0           ['block5g_bn[0][0]']             
 )                                                                                                
                                                                                                  
 block5g_se_squeeze (GlobalAver  (None, 960)         0           ['block5g_activation[0][0]']     
 agePooling2D)                                                                                    
          

 block5i_activation (Activation  (None, 2, 2, 960)   0           ['block5i_bn[0][0]']             
 )                                                                                                
                                                                                                  
 block5i_se_squeeze (GlobalAver  (None, 960)         0           ['block5i_activation[0][0]']     
 agePooling2D)                                                                                    
                                                                                                  
 block5i_se_reshape (Reshape)   (None, 1, 1, 960)    0           ['block5i_se_squeeze[0][0]']     
                                                                                                  
 block5i_se_reduce (Conv2D)     (None, 1, 1, 40)     38440       ['block5i_se_reshape[0][0]']     
                                                                                                  
 block5i_s

 block6b_se_expand (Conv2D)     (None, 1, 1, 1536)   99840       ['block6b_se_reduce[0][0]']      
                                                                                                  
 block6b_se_excite (Multiply)   (None, 1, 1, 1536)   0           ['block6b_activation[0][0]',     
                                                                  'block6b_se_expand[0][0]']      
                                                                                                  
 block6b_project_conv (Conv2D)  (None, 1, 1, 256)    393216      ['block6b_se_excite[0][0]']      
                                                                                                  
 block6b_project_bn (BatchNorma  (None, 1, 1, 256)   1024        ['block6b_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block6b_d

 block6d_project_conv (Conv2D)  (None, 1, 1, 256)    393216      ['block6d_se_excite[0][0]']      
                                                                                                  
 block6d_project_bn (BatchNorma  (None, 1, 1, 256)   1024        ['block6d_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block6d_drop (Dropout)         (None, 1, 1, 256)    0           ['block6d_project_bn[0][0]']     
                                                                                                  
 block6d_add (Add)              (None, 1, 1, 256)    0           ['block6d_drop[0][0]',           
                                                                  'block6c_add[0][0]']            
                                                                                                  
 block6e_e

 block6f_drop (Dropout)         (None, 1, 1, 256)    0           ['block6f_project_bn[0][0]']     
                                                                                                  
 block6f_add (Add)              (None, 1, 1, 256)    0           ['block6f_drop[0][0]',           
                                                                  'block6e_add[0][0]']            
                                                                                                  
 block6g_expand_conv (Conv2D)   (None, 1, 1, 1536)   393216      ['block6f_add[0][0]']            
                                                                                                  
 block6g_expand_bn (BatchNormal  (None, 1, 1, 1536)  6144        ['block6g_expand_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 block6g_e

 block6i_expand_conv (Conv2D)   (None, 1, 1, 1536)   393216      ['block6h_add[0][0]']            
                                                                                                  
 block6i_expand_bn (BatchNormal  (None, 1, 1, 1536)  6144        ['block6i_expand_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 block6i_expand_activation (Act  (None, 1, 1, 1536)  0           ['block6i_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block6i_dwconv2 (DepthwiseConv  (None, 1, 1, 1536)  13824       ['block6i_expand_activation[0][0]
 2D)                                                             ']                               
          

 block6k_expand_activation (Act  (None, 1, 1, 1536)  0           ['block6k_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block6k_dwconv2 (DepthwiseConv  (None, 1, 1, 1536)  13824       ['block6k_expand_activation[0][0]
 2D)                                                             ']                               
                                                                                                  
 block6k_bn (BatchNormalization  (None, 1, 1, 1536)  6144        ['block6k_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block6k_activation (Activation  (None, 1, 1, 1536)  0           ['block6k_bn[0][0]']             
 )        

                                                                                                  
 block6m_bn (BatchNormalization  (None, 1, 1, 1536)  6144        ['block6m_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block6m_activation (Activation  (None, 1, 1, 1536)  0           ['block6m_bn[0][0]']             
 )                                                                                                
                                                                                                  
 block6m_se_squeeze (GlobalAver  (None, 1536)        0           ['block6m_activation[0][0]']     
 agePooling2D)                                                                                    
                                                                                                  
 block6m_s

 )                                                                                                
                                                                                                  
 block6o_se_squeeze (GlobalAver  (None, 1536)        0           ['block6o_activation[0][0]']     
 agePooling2D)                                                                                    
                                                                                                  
 block6o_se_reshape (Reshape)   (None, 1, 1, 1536)   0           ['block6o_se_squeeze[0][0]']     
                                                                                                  
 block6o_se_reduce (Conv2D)     (None, 1, 1, 64)     98368       ['block6o_se_reshape[0][0]']     
                                                                                                  
 block6o_se_expand (Conv2D)     (None, 1, 1, 1536)   99840       ['block6o_se_reduce[0][0]']      
          

2023-05-05 19:27:54.247167: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_28340"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\020FlatMapDataset:4"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 32
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUC

INFO:tensorflow:batch_all_reduce: 456 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 456 all-reduces with algorithm = nccl, num_packs = 1


2023-05-05 19:29:04.036732: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600
2023-05-05 19:29:04.873617: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600
2023-05-05 19:29:05.071591: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8600

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.




<keras.callbacks.History at 0x7f23d0fa8df0>