In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import VGG19
import numpy as np
import tensorflow.keras.backend as K
K.set_image_data_format('channels_first')


2024-06-10 19:26:28.497404: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

def create_model(model, neurons_l, type_m, out_classes, dropout=None):
        #freeze layers of input model
    #unfreeze at least one layer if unfreeze layers != 0
    unfrozen_layers = max(1, round(len(model.layers) * 100/100))
    freeze_layers = len(model.layers) - unfrozen_layers
    for layer in model.layers[0:freeze_layers]:
        layer.trainable = False
    x = model.output
    x = GlobalAveragePooling2D()(x)
    
    for neurons in neurons_l:
        x = Dense(neurons, activation='relu')(x)
        if dropout:
            x = Dropout(dropout)(x)
    
    if type_m == 'categorical':
        out = Dense(out_classes, activation='softmax')(x)
    elif type_m == 'regression':
        out = Dense(1, kernel_initializer='normal')(x)

    model = tf.keras.models.Model(inputs=model.input, outputs=out)
    return model

# Example usage:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(3, 100, 100))
neurons_l = [1024, 512]
type_m = 'regression'
out_classes = 10  # This is only relevant if type_m is 'categorical'
dropout = 0.5

model = create_model(base_model, neurons_l, type_m, out_classes, dropout)
model.summary()


2024-06-10 19:26:32.668829: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-10 19:26:32.669076: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-10 19:26:32.747165: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3, 100, 100)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 64, 100, 100)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 64, 100, 100)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 64, 50, 50)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 128, 50, 50)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 128, 50, 50)      

In [3]:
test_input = np.random.rand(10, 3, 100, 100).astype(np.float32)
pred = model.predict_on_batch(test_input)
print(test_input.shape)
print("Test batch prediction shape:", pred.shape)
print("Test batch prediction shape:", type(pred))

print("Test batch prediction:", pred)

2024-06-10 19:26:36.248851: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [10,3,100,100]
	 [[{{node Placeholder/_0}}]]
2024-06-10 19:26:36.607494: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600


(10, 3, 100, 100)
Test batch prediction shape: (10, 1)
Test batch prediction shape: <class 'numpy.ndarray'>
Test batch prediction: [[0.23973349]
 [0.23195314]
 [0.26145318]
 [0.26798415]
 [0.2451318 ]
 [0.25212687]
 [0.24894571]
 [0.25728992]
 [0.24228129]
 [0.24030724]]


2024-06-10 19:26:37.480304: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
import logging
import random
import os

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = '42'

def create_model(model, neurons_l, type_m, out_classes, dropout=None):
    x = model.output
    x = GlobalAveragePooling2D()(x)
    
    for neurons in neurons_l:
        x = Dense(neurons, activation='relu')(x)
        if dropout:
            x = Dropout(dropout)(x)
    
    if type_m == 'categorical':
        out = Dense(out_classes, activation='softmax')(x)
    elif type_m == 'regression':
        out = Dense(1, kernel_initializer='normal')(x)

    model = tf.keras.models.Model(inputs=model.input, outputs=out)
    return model

# Create base model
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(3, 100, 100))

# Define parameters
neurons_l = [1024, 512]
type_m = 'regression'
out_classes = 10  # Only relevant if type_m is 'categorical'
dropout = 0.5

# Use MirroredStrategy for distributed training
strategy = tf.distribute.MirroredStrategy(["GPU:" + str(i) for i in [0,1]])

with strategy.scope():
    model = create_model(base_model, neurons_l, type_m, out_classes, dropout)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])


# Print model summary
model.summary()

# Test with a synthetic batch input
test_input = np.random.rand(10, 3, 100, 100).astype(np.float32)
# logger.debug("Test input data: %s", test_input)

pred = model.predict(test_input)
# Print test output
print(test_input.shape)
print("Test batch prediction shape:", pred.shape)
print("Test batch prediction type:", type(pred))
print("Test batch prediction:", pred)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


ValueError: Variable (<tf.Variable 'block1_conv1/kernel:0' shape=(3, 3, 3, 64) dtype=float32, numpy=
array([[[[ 0.34119523,  0.09563112,  0.0177449 , ..., -0.11436455,
          -0.05099866, -0.00299793],
         [ 0.46418372,  0.03355668,  0.10245045, ..., -0.06945956,
          -0.04020201,  0.04048637],
         [ 0.39416704, -0.08419707, -0.03631314, ..., -0.10720515,
          -0.03804016,  0.04690642]],

        [[ 0.33999205,  0.13363543,  0.02129423, ..., -0.13025227,
          -0.16508926, -0.06969624],
         [ 0.41810837,  0.05260524,  0.09755926, ..., -0.09385028,
          -0.20492788, -0.0573062 ],
         [ 0.37740308, -0.07876257, -0.04775979, ..., -0.11827433,
          -0.19008617, -0.01889699]],

        [[-0.04484424,  0.06471398, -0.07631404, ..., -0.12629718,
          -0.29905206, -0.2825364 ],
         [-0.04806903, -0.00658076, -0.02234544, ..., -0.0878844 ,
          -0.3915486 , -0.34632796],
         [-0.04594866, -0.11583115, -0.14462094, ..., -0.12290562,
          -0.35782176, -0.27979308]]],


       [[[ 0.23215917,  0.133657  ,  0.12134422, ..., -0.1063385 ,
           0.28406844,  0.3594997 ],
         [ 0.30511212,  0.05677647,  0.21688674, ..., -0.06828708,
           0.3440761 ,  0.44033417],
         [ 0.2671299 , -0.07969447,  0.05988706, ..., -0.09225675,
           0.31764674,  0.42209673]],

        [[ 0.08978214,  0.18505956,  0.15264879, ..., -0.04266965,
           0.25779948,  0.35873157],
         [ 0.10385381,  0.08851637,  0.2392226 , ..., -0.01210995,
           0.27064082,  0.40848857],
         [ 0.09986369, -0.06240906,  0.07442063, ..., -0.02214639,
           0.25912452,  0.423499  ]],

        [[-0.30331427,  0.08002605, -0.03926321, ..., -0.12958746,
          -0.19778992, -0.21510386],
         [-0.37314063, -0.00698938,  0.02153259, ..., -0.09827439,
          -0.2535741 , -0.25541356],
         [-0.34100872, -0.13399366, -0.11510294, ..., -0.11911335,
          -0.23109646, -0.19202407]]],


       [[[-0.07260918,  0.10084777,  0.01313597, ..., -0.12594968,
           0.1464741 ,  0.05009392],
         [-0.07646758,  0.03879711,  0.09974211, ..., -0.08732687,
           0.2247974 ,  0.10158388],
         [-0.07573577, -0.07806503, -0.03540679, ..., -0.1208065 ,
           0.20088433,  0.09790061]],

        [[-0.246675  ,  0.1414054 ,  0.02605635, ..., -0.10128672,
           0.16340195,  0.02832468],
         [-0.3107071 ,  0.06031388,  0.10412455, ..., -0.06832542,
           0.20279962,  0.05222717],
         [-0.2803425 , -0.07094654, -0.0387974 , ..., -0.08843154,
           0.18996507,  0.07766484]],

        [[-0.33683276,  0.06601517, -0.08144748, ..., -0.13460518,
          -0.1342358 , -0.27096185],
         [-0.46453714, -0.00576723, -0.02660675, ..., -0.10017379,
          -0.15603794, -0.32566148],
         [-0.41602272, -0.11491341, -0.14672887, ..., -0.13079506,
          -0.1379628 , -0.2658845 ]]]], dtype=float32)>) was not created in the distribution strategy scope of (<tensorflow.python.distribute.mirrored_strategy.MirroredStrategy object at 0x7f82485184f0>). It is most likely because some layers, model, or optimizer was being created outside the distribution strategy scope. Try to make sure your code looks similar to the following.
with strategy.scope():
  model=_create_model()
  model.compile(...)

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
import logging
import random
import os

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = '42'

# Create the strategy
strategy = tf.distribute.MirroredStrategy(["GPU:" + str(i) for i in [0, 1]])

# strategy = tf.distribute.MirroredStrategy()

def create_model(base_model, neurons_l, type_m, out_classes, dropout=None):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    
    for neurons in neurons_l:
        x = Dense(neurons, activation='relu')(x)
        if dropout:
            x = Dropout(dropout)(x)
    
    if type_m == 'categorical':
        out = Dense(out_classes, activation='softmax')(x)
    elif type_m == 'regression':
        out = Dense(1, kernel_initializer='normal')(x)

    model = tf.keras.models.Model(inputs=base_model.input, outputs=out)
    return model

# Define parameters
neurons_l = [1024, 512]
type_m = 'regression'
out_classes = 10  # Only relevant if type_m is 'categorical'
dropout = 0.5

with strategy.scope():
    # Create base model
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=(3, 100, 100))

    # Create custom model within the strategy scope
    model = create_model(base_model, neurons_l, type_m, out_classes, dropout)
    
    # Compile the model within the strategy scope
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Log model creation
logger.debug('Model created with layers:')
# for layer in model.layers:
#     logger.debug(f"Layer {layer.name}: {layer.get_weights()}")

# Print model summary
model.summary()

# Create a multi-batch dataset
batch_size = 10
num_batches = 5
data = np.random.rand(batch_size * num_batches, 3, 100, 100).astype(np.float32)
labels = np.random.rand(batch_size * num_batches, 1).astype(np.float32)
dataset = tf.data.Dataset.from_tensor_slices((data, labels)).batch(batch_size)

# Predict on the dataset
preds = []
for batch_data, batch_labels in dataset:
    logger.debug(f"Batch data shape: {batch_data.shape}")
    pred = model.predict_on_batch(batch_data)
    logger.debug(f"Batch prediction shape: {pred.shape}")
    preds.append(pred)

# Concatenate predictions to form the complete prediction array
preds = np.concatenate(preds, axis=0)

# Print predictions
logger.debug(f"Complete predictions shape: {preds.shape}")
print("Complete predictions shape:", preds.shape)
print("Predictions:", preds)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
DEBUG:__main__:Model created with layers:


Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 3, 100, 100)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 64, 100, 100)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 64, 100, 100)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 64, 50, 50)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 128, 50, 50)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 128, 50, 50)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 128, 25, 25)       0   

2024-06-10 19:29:24.060333: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [50,1]
	 [[{{node Placeholder/_1}}]]
DEBUG:__main__:Batch data shape: (10, 3, 100, 100)
2024-06-10 19:29:24.084612: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [10,3,100,100]
	 [[{{node Placeholder/_0}}]]
2024-06-10 19:29:24.084966: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:786] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset:

Complete predictions shape: (50, 1)
Predictions: [[-0.01420981]
 [-0.04474981]
 [-0.00461541]
 [-0.03722925]
 [-0.00197983]
 [-0.01651862]
 [-0.00297237]
 [-0.01427188]
 [ 0.01288254]
 [-0.00117422]
 [-0.0077542 ]
 [-0.01310588]
 [-0.02000218]
 [-0.04665419]
 [-0.00359902]
 [-0.01954608]
 [-0.01405914]
 [-0.01254627]
 [-0.01834228]
 [ 0.00922775]
 [ 0.00778042]
 [-0.00880324]
 [ 0.00323083]
 [-0.02351457]
 [-0.01960595]
 [-0.03952255]
 [-0.01385491]
 [-0.02710311]
 [-0.00397971]
 [-0.03467187]
 [-0.03804477]
 [ 0.0037501 ]
 [-0.02806786]
 [-0.01269479]
 [-0.00957624]
 [-0.00251121]
 [-0.00830674]
 [-0.00585702]
 [-0.01625368]
 [-0.00856563]
 [-0.02051067]
 [-0.0295184 ]
 [ 0.00170929]
 [-0.02685451]
 [ 0.00751171]
 [-0.0301689 ]
 [-0.02217676]
 [-0.01135078]
 [-0.02497144]
 [-0.03294414]]
