# TensorFlowでのモデルサイズ推定に関する考察

In [1]:
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

## 1. 簡便なモデルサイズ計算法と実際のメモリ使用量との比較

### GPUメモリ確保モードの変更

既定では、利用可能なメモリを一括確保する設定であるため、これを必要の都度確保する設定に変更している。

https://www.tensorflow.org/guide/gpu

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


### 開始前のGPUメモリ

In [3]:
mem_info_before = tf.config.experimental.get_memory_info('GPU:0')
mem_info_before

{'current': 0, 'peak': 0}

In [4]:
!nvidia-smi

Mon Apr 18 02:19:31 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    33W /  70W |    264MiB / 15109MiB |      3%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### 簡便なモデルサイズ計算関数

下記のQ&Aページのプログラムを使用する。  
https://stackoverflow.com/questions/43137288/how-to-determine-needed-memory-of-keras-model


In [5]:
def get_model_memory_usage(batch_size, model):
    import numpy as np
    try:
        from keras import backend as K
    except:
        from tensorflow.keras import backend as K

    shapes_mem_count = 0
    internal_model_mem_count = 0
    for l in model.layers:
        layer_type = l.__class__.__name__
        if layer_type == 'Model':
            internal_model_mem_count += get_model_memory_usage(batch_size, l)
        single_layer_mem = 1
        out_shape = l.output_shape
        if type(out_shape) is list:
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
        shapes_mem_count += single_layer_mem

    trainable_count = np.sum([K.count_params(p) for p in model.trainable_weights])
    non_trainable_count = np.sum([K.count_params(p) for p in model.non_trainable_weights])

    number_size = 4.0
    if K.floatx() == 'float16':
        number_size = 2.0
    if K.floatx() == 'float64':
        number_size = 8.0

    total_memory = number_size * (batch_size * shapes_mem_count + trainable_count + non_trainable_count)
    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count
    return gbytes

### 3D CNNの例
下記のページより  
https://keras.io/examples/vision/3D_image_classification/

In [6]:
NUM_CLASSES = 10

In [7]:
def get_model(width=128, height=128, depth=64, channel=1):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, channel))

    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512, activation="relu")(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(units=NUM_CLASSES)(x)
    outputs = layers.Activation(keras.activations.softmax, dtype='float32')(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=150, height=300, depth=150, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 150, 300, 150, 3  0         
                             )]                                  
                                                                 
 conv3d (Conv3D)             (None, 148, 298, 148, 64  5248      
                             )                                   
                                                                 
 max_pooling3d (MaxPooling3D  (None, 74, 149, 74, 64)  0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 74, 149, 74, 64)  256       
 ormalization)                                                   
                                                                 
 conv3d_1 (Conv3D)           (None, 72, 147, 72, 64)   110656

### 各バッチサイズによるモデルサイズ推定

In [8]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 2.31GB
Batch Size:2 , Memory Usage: 4.615GB
Batch Size:4 , Memory Usage: 9.225GB
Batch Size:8 , Memory Usage: 18.444GB
Batch Size:16 , Memory Usage: 36.884GB
Batch Size:32 , Memory Usage: 73.762GB


### ダミーデータによるモデル訓練

Google ColabでのGPUメモリに合わせて、バッチサイズを1で実施する。

In [9]:
BATCH_SIZE = 1
dummy_x = np.random.randn(BATCH_SIZE, 150, 300, 150, 3)
dummy_y = np.random.randint(0, 9, size=BATCH_SIZE)

In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.fit(dummy_x, dummy_y)



<keras.callbacks.History at 0x7f962fb25d90>

### 訓練実行後のGPUメモリ

In [11]:
mem_info_after = tf.config.experimental.get_memory_info('GPU:0')
mem_info_after

{'current': 152983808, 'peak': 9541989120}

In [12]:
peak_allocated_mem_gb = (mem_info_after['peak'] - mem_info_before['peak']) / 1024**3
peak_allocated_mem_gb

8.886669874191284

TensorFlowから見ると、推定モデルサイズ2.31GBに対して4倍近くのメモリが確保されている。

In [13]:
!nvidia-smi

Mon Apr 18 02:19:46 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0    45W /  70W |  14586MiB / 15109MiB |     14%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

実際には14GB程度のメモリが占有されている。

### 考察

#### (1) 上記のモデルサイズ推定プログラムの問題点

上記の推定プログラムでは、レイヤごとの訓練可能パラメータ（重み）と訓練不可パラメータおよび出力テンソルのサイズを合計して、モデルの使用メモリサイズを推定している。

これは推論時を想定した計算であり、訓練時のバックプロパゲーションの勾配計算に必要なメモリを想定していない。

また、次の実験が示すように、実質的に同じ計算グラフを生成するモデルでも、記述方法が異なると計算が異なってしまうという問題を抱えている。

#### Activationを別レイヤにした場合の計算

In [14]:
def get_model(width=128, height=128, depth=64, channel=1):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, channel))

    x = layers.Conv3D(filters=64, kernel_size=3)(inputs)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(units=10)(x)
    outputs = layers.Activation(tf.keras.activations.softmax)(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn-2")
    return model


# Build model.
model = get_model(width=150, height=300, depth=150, channel=3)
model.summary()

Model: "3dcnn-2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 150, 300, 150, 3  0         
                             )]                                  
                                                                 
 conv3d_4 (Conv3D)           (None, 148, 298, 148, 64  5248      
                             )                                   
                                                                 
 activation_1 (Activation)   (None, 148, 298, 148, 64  0         
                             )                                   
                                                                 
 max_pooling3d_4 (MaxPooling  (None, 74, 149, 74, 64)  0         
 3D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 74, 149, 74, 64)  256 

In [15]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 4.094GB
Batch Size:2 , Memory Usage: 8.183GB
Batch Size:4 , Memory Usage: 16.361GB
Batch Size:8 , Memory Usage: 32.718GB
Batch Size:16 , Memory Usage: 65.43GB
Batch Size:32 , Memory Usage: 130.855GB


上記のように、Activationをレイヤとして定義すると、メモリ推定量が増加する。

#### (2)TensorFlowから見たメモリアロケーションとnvidia-smiでの数値の差

TensorFlowから見たget_memory_infoで得られるメモリアロケーションのピーク値とnvidia-smiで確認したメモリ占有量が異なり、後者が大きくなっている理由として、リリース後のメモリが再利用されず新たにメモリが確保されるため、メモリフラグメンテーションが起きている可能性がある。

#### (3)精度の高いメモリサイズ推定法について

単にバックプロパゲーション時の勾配計算用のメモリだけであれば、これまでの計算のメモリサイズを2倍にすれば大きな差はないはずであるが、実際に確保されたメモリーを見ると3倍から4倍のメモリーが必要と考えられる。

深層学習モデルの学習に必要なメモリサイズに関する研究では、Microsoftが下記の論文を発表している。

https://www.microsoft.com/en-us/research/uploads/prod/2020/09/dnnmem.pdf

この論文では、TensorFlow、PyTorch、MXNetについて、その計算グラフやモデルのソースコード、訓練用パラメータから、メモリの解放と確保の順番を考慮して必要メモリを推定するアルゴリズムを紹介しているが、残念ながら計算プログラムDNNMemについては公開されていない。

このため、tf.kerasを使用したモデルの使用メモリ量の試算に当たっては、Activationレイヤを別に定義するなどの方法をとるとともに、バックプロパゲーションに必要な勾配計算用として2倍のメモリサイズを想定することとし、さらに一時的なメモリやフラグメンテーションによって必要なメモリが増大することを考慮する必要がある。

## 2. 混合精度演算適用時のメモリ使用量試算について

深層学習モデルの通常の計算は4バイトの単精度浮動小数点数（Float32）が使われているが、データの標準化を前提にした畳み込みやDenseの積和演算では、半精度浮動小数点数（Float16）を使ってもモデルの性能はほとんど変わらないことが知られている。
Voltaアーキテクチャ以降のNVIDIAのGPUには、混合精度計算機能が追加されており、Float16の計算を高速に行うTensorCoreが装備
され、半分のメモリサイズで高速に計算を行うことが可能である。

上記で紹介したメモリサイズ試算関数では、モデル全体のパラメータ数を元にした計算を行なっているため、混合精度での試算が不正確になる可能性がある。

このため、レイヤごとにdtype_policyを元にした計算を行うように修正した関数を用いることとした。

また、上記の1で論じた勾配計算用メモリを考慮して、パラメータ数を2倍として試算を行うように修正をしている。

### 修正版モデルサイズ計算関数

In [16]:
def get_model_memory_usage_mixed_mode(batch_size, model):
    import numpy as np
    try:
        from keras import backend as K
    except:
        from tensorflow.keras import backend as K

    def get_bytes(dtype_policy_name):
        if dtype_policy_name == 'float16':
            return 2
        elif dtype_policy_name == 'float32':
            return 4
        elif dtype_policy_name == 'float64':
            return 8
        if dtype_policy_name == 'mixed_float16':
            return 2
        if dtype_policy_name == 'mixed_bfloat16':
            return 2
        else:
            raise "unknow dtype_plocy;{}".format(dtype_policy_name)
    
    internal_model_mem_count = 0
    trainable_count = 0
    non_trainable_count = 0
    total_memory = 0
    for l in model.layers:
        layer_type = l.__class__.__name__
        if layer_type == 'Model':
            internal_model_mem_count += get_model_memory_usage_mixed_mode(batch_size, l)
        single_layer_mem = 1
        out_shape = l.output_shape
        if type(out_shape) is list:
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
 
        trainable_count = np.sum([K.count_params(p) for p in l.trainable_weights])
        non_trainable_count = np.sum([K.count_params(p) for p in l.non_trainable_weights])

        total_memory += get_bytes(l.dtype_policy.name) * batch_size * single_layer_mem
        total_memory += get_bytes(l.dtype_policy.name) * (trainable_count + non_trainable_count)
    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count
    return gbytes

### 混合精度演算設定

https://www.tensorflow.org/guide/mixed_precision

In [17]:
dtype = "mixed_float16"
policy = tf.keras.mixed_precision.Policy(dtype)
tf.keras.mixed_precision.set_global_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla T4, compute capability 7.5


### PointNetモデルの場合

下記のページより  
https://keras.io/examples/vision/pointnet/

In [18]:
NUM_POINTS = 650000
NUM_CLASSES = 10

In [19]:
def conv_bn(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


def dense_bn(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


In [20]:
class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        self.num_features = num_features
        self.l2reg = l2reg
        if keras.mixed_precision.global_policy().compute_dtype == 'float16':
            self.eye = tf.eye(num_features, dtype=tf.float16)
        else:
            self.eye = tf.eye(num_features, dtype=tf.float32)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))

In [21]:
def tnet(inputs, num_features):

    # Initalise bias as the indentity matrix
    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_bn(inputs, 32)
    x = conv_bn(x, 64)
    x = conv_bn(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 256)
    x = dense_bn(x, 128)
    x = layers.Dense(
        num_features * num_features,
        kernel_initializer="zeros",
        bias_initializer=bias,
        activity_regularizer=reg,
    )(x)
    feat_T = layers.Reshape((num_features, num_features))(x)
    # Apply affine transformation to input features
    return layers.Dot(axes=(2, 1))([inputs, feat_T])

In [22]:
inputs = keras.Input(shape=(NUM_POINTS, 6))

x = tnet(inputs, 6)
x = conv_bn(x, 32)
x = conv_bn(x, 32)
x = tnet(x, 32)
x = conv_bn(x, 32)
x = conv_bn(x, 64)
x = conv_bn(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_bn(x, 128)
x = layers.Dropout(0.3)(x)

x = layers.Dense(NUM_CLASSES, activation="softmax")(x)
outputs = layers.Activation("softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="pointnet")
model.summary()


Model: "pointnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 650000, 6)]  0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 650000, 32)   224         ['input_3[0][0]']                
                                                                                                  
 batch_normalization_8 (BatchNo  (None, 650000, 32)  128         ['conv1d[0][0]']                 
 rmalization)                                                                                     
                                                                                                  
 activation_6 (Activation)      (None, 650000, 32)   0           ['batch_normalization_8[0]

In [23]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {} GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 6.919 GB
Batch Size:2 , Memory Usage: 13.838 GB
Batch Size:4 , Memory Usage: 27.674 GB
Batch Size:8 , Memory Usage: 55.346 GB
Batch Size:16 , Memory Usage: 110.69 GB
Batch Size:32 , Memory Usage: 221.379 GB


### 3D CNNの場合
下記のページより  
https://keras.io/examples/vision/3D_image_classification/

In [24]:
def get_model(width=128, height=128, depth=64, channel=1):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, channel))

    x = layers.Conv3D(filters=64, kernel_size=3)(inputs)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(units=10)(x)
    outputs = layers.Activation(tf.keras.activations.softmax)(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=150, height=300, depth=150, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 150, 300, 150, 3  0         
                             )]                                  
                                                                 
 conv3d_8 (Conv3D)           (None, 148, 298, 148, 64  5248      
                             )                                   
                                                                 
 activation_24 (Activation)  (None, 148, 298, 148, 64  0         
                             )                                   
                                                                 
 max_pooling3d_8 (MaxPooling  (None, 74, 149, 74, 64)  0         
 3D)                                                             
                                                                 
 batch_normalization_25 (Bat  (None, 74, 149, 74, 64)  256   

In [25]:
for batch_size in [4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:4 , Memory Usage: 8.332GB
Batch Size:8 , Memory Usage: 16.661GB
Batch Size:16 , Memory Usage: 33.319GB
Batch Size:32 , Memory Usage: 66.635GB


### 3D-CNN 512x512x512 の場合

In [26]:
# Build model.
model = get_model(width=512, height=512, depth=512, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 512, 512, 512, 3  0         
                             )]                                  
                                                                 
 conv3d_12 (Conv3D)          (None, 510, 510, 510, 64  5248      
                             )                                   
                                                                 
 activation_29 (Activation)  (None, 510, 510, 510, 64  0         
                             )                                   
                                                                 
 max_pooling3d_12 (MaxPoolin  (None, 255, 255, 255, 64  0        
 g3D)                        )                                   
                                                                 
 batch_normalization_29 (Bat  (None, 255, 255, 255, 64  256  

In [27]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 42.675GB
Batch Size:2 , Memory Usage: 85.347GB
Batch Size:4 , Memory Usage: 170.692GB
Batch Size:8 , Memory Usage: 341.381GB
Batch Size:16 , Memory Usage: 682.759GB
Batch Size:32 , Memory Usage: 1365.515GB


## 3. 混合精度演算不適用時のメモリ使用量試算 

通常精度（float32）を設定。

In [28]:
dtype = "float32"
policy = tf.keras.mixed_precision.Policy(dtype)
tf.keras.mixed_precision.set_global_policy(policy)

### PointNetモデルの場合

下記のページより  
https://keras.io/examples/vision/pointnet/

In [29]:
NUM_POINTS = 650000
NUM_CLASSES = 10

In [30]:
def conv_bn(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


def dense_bn(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


In [31]:
class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        self.num_features = num_features
        self.l2reg = l2reg
        if keras.mixed_precision.global_policy().compute_dtype == 'float16':
            self.eye = tf.eye(num_features, dtype=tf.float16)
        else:
            self.eye = tf.eye(num_features, dtype=tf.float32)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))

In [32]:
def tnet(inputs, num_features):

    # Initalise bias as the indentity matrix
    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_bn(inputs, 32)
    x = conv_bn(x, 64)
    x = conv_bn(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 256)
    x = dense_bn(x, 128)
    x = layers.Dense(
        num_features * num_features,
        kernel_initializer="zeros",
        bias_initializer=bias,
        activity_regularizer=reg,
    )(x)
    feat_T = layers.Reshape((num_features, num_features))(x)
    # Apply affine transformation to input features
    return layers.Dot(axes=(2, 1))([inputs, feat_T])

In [33]:
inputs = keras.Input(shape=(NUM_POINTS, 6))

x = tnet(inputs, 6)
x = conv_bn(x, 32)
x = conv_bn(x, 32)
x = tnet(x, 32)
x = conv_bn(x, 32)
x = conv_bn(x, 64)
x = conv_bn(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_bn(x, 128)
x = layers.Dropout(0.3)(x)

x = layers.Dense(NUM_CLASSES, activation="softmax")(x)
outputs = layers.Activation("softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="pointnet")
model.summary()


Model: "pointnet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 650000, 6)]  0           []                               
                                                                                                  
 conv1d_11 (Conv1D)             (None, 650000, 32)   224         ['input_6[0][0]']                
                                                                                                  
 batch_normalization_33 (BatchN  (None, 650000, 32)  128         ['conv1d_11[0][0]']              
 ormalization)                                                                                    
                                                                                                  
 activation_34 (Activation)     (None, 650000, 32)   0           ['batch_normalization_33[0

In [34]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {} GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 13.824 GB
Batch Size:2 , Memory Usage: 27.646 GB
Batch Size:4 , Memory Usage: 55.289 GB
Batch Size:8 , Memory Usage: 110.576 GB
Batch Size:16 , Memory Usage: 221.148 GB
Batch Size:32 , Memory Usage: 442.294 GB


### 3D CNNの場合
下記のページより  
https://keras.io/examples/vision/3D_image_classification/

In [35]:
def get_model(width=128, height=128, depth=64, channel=1):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, channel))

    x = layers.Conv3D(filters=64, kernel_size=3)(inputs)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3)(x)
    x = layers.Activation(tf.keras.activations.relu)(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(units=10)(x)
    outputs = layers.Activation(tf.keras.activations.softmax)(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=150, height=300, depth=150, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 150, 300, 150, 3  0         
                             )]                                  
                                                                 
 conv3d_16 (Conv3D)          (None, 148, 298, 148, 64  5248      
                             )                                   
                                                                 
 activation_52 (Activation)  (None, 148, 298, 148, 64  0         
                             )                                   
                                                                 
 max_pooling3d_16 (MaxPoolin  (None, 74, 149, 74, 64)  0         
 g3D)                                                            
                                                                 
 batch_normalization_50 (Bat  (None, 74, 149, 74, 64)  256   

In [36]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 4.094GB
Batch Size:2 , Memory Usage: 8.183GB
Batch Size:4 , Memory Usage: 16.361GB
Batch Size:8 , Memory Usage: 32.718GB
Batch Size:16 , Memory Usage: 65.43GB
Batch Size:32 , Memory Usage: 130.855GB


### 3D-CNN 512x512x512 の場合

In [37]:
# Build model.
model = get_model(width=512, height=512, depth=512, channel=3)
model.summary()

Model: "3dcnn"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 512, 512, 512, 3  0         
                             )]                                  
                                                                 
 conv3d_20 (Conv3D)          (None, 510, 510, 510, 64  5248      
                             )                                   
                                                                 
 activation_57 (Activation)  (None, 510, 510, 510, 64  0         
                             )                                   
                                                                 
 max_pooling3d_20 (MaxPoolin  (None, 255, 255, 255, 64  0        
 g3D)                        )                                   
                                                                 
 batch_normalization_54 (Bat  (None, 255, 255, 255, 64  256  

In [38]:
for batch_size in [1, 2, 4, 8, 16, 32]:
  mem_size = get_model_memory_usage_mixed_mode(batch_size, model)
  print("Batch Size:{} , Memory Usage: {}GB".format(batch_size, mem_size))

Batch Size:1 , Memory Usage: 83.85GB
Batch Size:2 , Memory Usage: 167.694GB
Batch Size:4 , Memory Usage: 335.383GB
Batch Size:8 , Memory Usage: 670.761GB
Batch Size:16 , Memory Usage: 1341.517GB
Batch Size:32 , Memory Usage: 2683.029GB
