In [1]:
import tensorflow as tf
import numpy as np

In [2]:
devs = tf.config.get_visible_devices()
print(devs)

print(tf.config.threading.get_inter_op_parallelism_threads())
print(tf.config.threading.get_intra_op_parallelism_threads())
tf.config.threading.set_inter_op_parallelism_threads(12)
tf.config.threading.set_intra_op_parallelism_threads(12)
print(tf.config.threading.get_inter_op_parallelism_threads())
print(tf.config.threading.get_intra_op_parallelism_threads())

gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = gpus[1:] 
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus, 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
logical_devs = tf.config.list_logical_devices()
physical_devs = tf.config.experimental.list_physical_devices()

print("physical_devs",physical_devs)
print("logical_devs", logical_devs)



print(tf.version.VERSION)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]
0
0
12
12
3 Physical GPUs, 3 Logical GPUs
physical_devs [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:2', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:3', device_type='XLA_GPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2'

In [3]:
import ShAReD_Net.training.train_distributed as train

import ShAReD_Net.training.slim as training_slim
import ShAReD_Net.model.slim as model_slim

In [4]:
from ShAReD_Net.configure import config



In [5]:
import ShAReD_Net.data.transform.transform as transform

data_split = "train"
train_ds = transform.create_dataset(data_split)

In [6]:
low_level_extractor = model_slim.LowLevelExtractor(color_channel=13, texture_channel=16, texture_compositions=16, out_channel=32)

encoder = model_slim.Encoder(dense_blocks_count=2, dense_filter_count=48)

pos_decoder = model_slim.PosDecoder(dense_blocks_count=2, dense_filter_count=48)

pose_decoder = model_slim.PoseDecoder(keypoints=15, z_bins=20, dense_blocks_count=2, dense_filter_count=48)

In [7]:

train_model = training_slim.SlimTrainingModel(low_level_extractor, encoder, pos_decoder, pose_decoder)

In [8]:
dist_strat = tf.distribute.OneDeviceStrategy(device="/gpu:0")

In [9]:
def get_train_model():
        return train_model

def loss_pre(loss):
    detection_loss, estimator_loss = loss
    loss_xy, loss_z = estimator_loss
    return tf.reduce_sum(detection_loss) + tf.reduce_sum(loss_xy) + tf.reduce_sum(loss_z)

def finish_training(train_model, loss, step):
    print(train_model.count_params())

def batching(dataset, batch_size):
    batched_ds = dataset.batch(batch_size)

    def unragg(img, pos_stuff, pose_stuff):
        roi_indexes, roi_pose = pose_stuff

        def unragg_indexes(indexes, row_length):
            new_indexes = np.empty([indexes.shape[0], indexes.shape[1]+1], dtype=np.int32)
            new_indexes[:,1:] = indexes
            i = 0
            b = 0
            for length in row_length[1]:
                index = indexes[i:int(i+length)]
                new_indexes[i:int(i+length),0] = b
                i += length
                b += 1
                
            return new_indexes

        roi_indexes_flat = tf.numpy_function(unragg_indexes, [roi_indexes.flat_values, roi_indexes.nested_row_lengths()], Tout=roi_indexes.dtype)

        roi_pose_flat =  roi_pose.flat_values

        return img, pos_stuff, (roi_indexes_flat, roi_pose_flat)

    unragged_ds = batched_ds.map(unragg)
    return unragged_ds

In [10]:
step_callbacks = train.standart_callbacks()
step_callbacks[2] = finish_training
#step_callbacks["train_init"] = init_model
step_callbacks["batching"] = batching
#step_callbacks["input_preprocessing"] = input_preprocessing 
step_callbacks["loss_pre"] = loss_pre

In [11]:
train.train(2, get_train_model, train_ds, dist_strat, batch_size = 4, learning_rate=0.001, step_callbacks = step_callbacks)


orShape([4, None, None, 120])]
DenseModule (4, None, None, 256)
DenseBlock (4, None, None, 24)
BnDoConfReluConfRelu (4, None, None, 24)
DenseBlock (4, None, None, 72)
BnDoConfReluConfRelu (4, None, None, 72)
Scale (4, None, None, 120)
ScaledShAReD (TensorShape([4, None, None, 64]), TensorShape([4, None, None, 120]))
Scale (4, None, None, 64)
ResAttention [TensorShape([4, None, None, 64]), TensorShape([4, None, None, 120])]
Attention [TensorShape([4, None, None, 64]), TensorShape([4, None, None, 120])]
DenseModule (4, None, None, 256)
DenseBlock (4, None, None, 24)
BnDoConfReluConfRelu (4, None, None, 24)
DenseBlock (4, None, None, 72)
BnDoConfReluConfRelu (4, None, None, 72)
Scale (4, None, None, 120)
Scale (4, None, None, 120)
ScaledShAReD [TensorShape([4, None, None, 64]), TensorShape([4, None, None, 120])]
Scale (4, None, None, 64)
ResAttention [TensorShape([4, None, None, 64]), TensorShape([4, None, None, 120])]
Attention [TensorShape([4, None, None, 64]), TensorShape([4, None, Non