# TensorFlowでのモデルサイズ推定

In [1]:
import numpy as np
from collections import defaultdict
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

## モデルサイズ計算関数

下記のページのプログラム
https://stackoverflow.com/questions/43137288/how-to-determine-needed-memory-of-keras-model
では、混合精度演算には対応していないため、モデル中の各レイヤのdtype_policyを元にデータサイズを推定するように修正を試みた。

また、訓練時のバックプロパゲーションでの購買計算を考慮し、メモリサイズを2倍とした。

In [2]:
def get_model_memory_usage_mixed_mode(batch_size, model):
    import numpy as np
    try:
        from keras import backend as K
    except:
        from tensorflow.keras import backend as K

    def get_bytes(dtype_policy_name):
        if dtype_policy_name == 'float16':
            return 2
        elif dtype_policy_name == 'float32':
            return 4
        elif dtype_policy_name == 'float64':
            return 8
        if dtype_policy_name == 'mixed_float16':
            return 2
        if dtype_policy_name == 'mixed_bfloat16':
            return 2
        else:
            raise "unknow dtype_plocy;{}".format(dtype_policy_name)
    
    internal_model_mem_count = 0
    trainable_count = 0
    non_trainable_count = 0
    total_memory = 0
    detail_dict = {}
    for l in model.layers:
        layer_type = l.__class__.__name__
        layer_name = l.name
        if layer_type == 'Model':
            model_size, model_detail = get_model_memory_usage_mixed_mode(batch_size, l)
            internal_model_mem_count += model_size
            detail_dict[layer_name] = model_detail
        single_layer_mem = 1
        out_shape = l.output_shape
        if type(out_shape) is list:
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
 
        trainable_count = np.sum([K.count_params(p) for p in l.trainable_weights])
        non_trainable_count = np.sum([K.count_params(p) for p in l.non_trainable_weights])
        
        detail_dict[layer_name] = {"trainable": trainable_count,
                                   "non_trainable": non_trainable_count,
                                   "output": single_layer_mem}

        total_memory += get_bytes(l.dtype_policy.name) * batch_size * single_layer_mem * 2 # for Back Propagation
        total_memory += get_bytes(l.dtype_policy.name) * (trainable_count + non_trainable_count) * 2 # for Back Propagation
    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count
    return gbytes, detail_dict

In [3]:
def print_summary(detail_dict):
    total_dict = defaultdict(lambda:0)
    for value in detail_dict.values():
        for k,v in value.items():
            total_dict[k] += v
    print("Trainable Prameters:", total_dict["trainable"])
    print("Non Trainable Parameters:", total_dict["non_trainable"])
    print("Variables:", total_dict["output"])

## PointNetモデルの場合

下記のページより  
https://keras.io/examples/vision/pointnet/

In [4]:
NUM_POINTS = 650000
NUM_CLASSES = 10

In [5]:
def conv_bn(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


def dense_bn(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


In [6]:
class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        self.num_features = num_features
        self.l2reg = l2reg
        self.eye = tf.eye(num_features)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))

In [7]:
def tnet(inputs, num_features):

    # Initalise bias as the indentity matrix
    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_bn(inputs, 32)
    x = conv_bn(x, 64)
    x = conv_bn(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 256)
    x = dense_bn(x, 128)
    x = layers.Dense(
        num_features * num_features,
        kernel_initializer="zeros",
        bias_initializer=bias,
        activity_regularizer=reg,
    )(x)
    feat_T = layers.Reshape((num_features, num_features))(x)
    # Apply affine transformation to input features
    return layers.Dot(axes=(2, 1))([inputs, feat_T])

In [8]:
inputs = keras.Input(shape=(NUM_POINTS, 6))

x = tnet(inputs, 6)
x = conv_bn(x, 32)
x = conv_bn(x, 32)
x = tnet(x, 32)
x = conv_bn(x, 32)
x = conv_bn(x, 64)
x = conv_bn(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_bn(x, 128)
x = layers.Dropout(0.3)(x)

x = layers.Dense(NUM_CLASSES, activation="softmax")(x)
outputs = layers.Activation("softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="pointnet")
model.summary()


2022-06-10 15:58:12.209374: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "pointnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 650000, 6)]  0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 650000, 32)   224         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 650000, 32)  128         ['conv1d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 activation (Activation)        (None, 650000, 32)   0           ['batch_normalization[0][0

In [9]:
for batch_size in [4, 8, 16, 32]:
    mem_size, detail_dict = get_model_memory_usage_mixed_mode(batch_size, model)
    print("Batch Size:{} , Memory Usage: {} GB".format(batch_size, mem_size))
    print_summary(detail_dict)

Batch Size:4 , Memory Usage: 110.578 GB
Trainable Prameters: 746574.0
Non Trainable Parameters: 6080.0
Variables: 3710207516
Batch Size:8 , Memory Usage: 221.151 GB
Trainable Prameters: 746574.0
Non Trainable Parameters: 6080.0
Variables: 3710207516
Batch Size:16 , Memory Usage: 442.297 GB
Trainable Prameters: 746574.0
Non Trainable Parameters: 6080.0
Variables: 3710207516
Batch Size:32 , Memory Usage: 884.588 GB
Trainable Prameters: 746574.0
Non Trainable Parameters: 6080.0
Variables: 3710207516


## 3D CNNの場合
下記のページより  
https://keras.io/examples/vision/3D_image_classification/

In [10]:
def get_model(width=128, height=128, depth=64, channel=1):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, channel))

    x = layers.Conv3D(filters=64, kernel_size=3)(inputs)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(units=10)(x)
    outputs = layers.Activation(tf.keras.activations.softmax)(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=150, height=300, depth=150, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 150, 300, 150, 3  0         
                             )]                                  
                                                                 
 conv3d (Conv3D)             (None, 148, 298, 148, 64  5248      
                             )                                   
                                                                 
 activation_18 (Activation)  (None, 148, 298, 148, 64  0         
                             )                                   
                                                                 
 max_pooling3d (MaxPooling3D  (None, 74, 149, 74, 64)  0         
 )                                                               
                                                                 
 batch_normalization_17 (Bat  (None, 74, 149, 74, 64)  256   

In [11]:
for batch_size in [4, 8, 16, 32]:
    mem_size, detail_dict = get_model_memory_usage_mixed_mode(batch_size, model)
    print("Batch Size:{} , Memory Usage: {} GB".format(batch_size, mem_size))
    print_summary(detail_dict)

Batch Size:4 , Memory Usage: 32.723 GB
Trainable Prameters: 1359946.0
Non Trainable Parameters: 1024.0
Variables: 1097651620
Batch Size:8 , Memory Usage: 65.435 GB
Trainable Prameters: 1359946.0
Non Trainable Parameters: 1024.0
Variables: 1097651620
Batch Size:16 , Memory Usage: 130.86 GB
Trainable Prameters: 1359946.0
Non Trainable Parameters: 1024.0
Variables: 1097651620
Batch Size:32 , Memory Usage: 261.711 GB
Trainable Prameters: 1359946.0
Non Trainable Parameters: 1024.0
Variables: 1097651620


### 3D-CNN 512x512x512 の場合

In [18]:
# Build model.
model = get_model(width=512, height=512, depth=512, channel=4)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 512, 512, 512, 4  0         
                             )]                                  
                                                                 
 conv3d_12 (Conv3D)          (None, 510, 510, 510, 64  6976      
                             )                                   
                                                                 
 activation_33 (Activation)  (None, 510, 510, 510, 64  0         
                             )                                   
                                                                 
 max_pooling3d_12 (MaxPoolin  (None, 255, 255, 255, 64  0        
 g3D)                        )                                   
                                                                 
 batch_normalization_29 (Bat  (None, 255, 255, 255, 64  256  

In [19]:
for batch_size in [4, 8, 16, 32]:
    mem_size, detail_dict = get_model_memory_usage_mixed_mode(batch_size, model)
    print("Batch Size:{} , Memory Usage: {} GB".format(batch_size, mem_size))
    print_summary(detail_dict)

Batch Size:4 , Memory Usage: 674.766 GB
Trainable Prameters: 1361674.0
Non Trainable Parameters: 1024.0
Variables: 22641055508
Batch Size:8 , Memory Usage: 1349.522 GB
Trainable Prameters: 1361674.0
Non Trainable Parameters: 1024.0
Variables: 22641055508
Batch Size:16 , Memory Usage: 2699.034 GB
Trainable Prameters: 1361674.0
Non Trainable Parameters: 1024.0
Variables: 22641055508
Batch Size:32 , Memory Usage: 5398.058 GB
Trainable Prameters: 1361674.0
Non Trainable Parameters: 1024.0
Variables: 22641055508
