In [3]:
import tensorflow as tf
import numpy as np

class Voxel3DCNNModel(tf.keras.Model):
    def __init__(self):
        super(Voxel3DCNNModel, self).__init__()
        self.encoder_conv1 = tf.keras.layers.Conv3D(filters = 32, kernel_size = 3, activation = 'mish', padding = 'same')
        self.encoder_pooling1 = tf.keras.layers.MaxPooling3D(pool_size = 2)
        self.encoder_conv2 = tf.keras.layers.Conv3D(filters = 64, kernel_size = 3, activation = 'mish', padding = 'same')
        self.encoder_pooling2 = tf.keras.layers.MaxPooling3D(pool_size = 2)
        self.encoder_conv3 = tf.keras.layers.Conv3D(filters = 128, kernel_size = 3, activation = 'mish', padding = 'same')
        self.encoder_pool3 = tf.keras.layers.MaxPooling3D(pool_size = 2)

        self.bottleneck = tf.keras.layers.Conv3D(filters = 256, kernel_size = 3, activation = 'mish', padding = 'same')

        self.decoder_up3 = tf.keras.layers.UpSampling3D(size = 2)
        self.decoder_conv3 = tf.keras.layers.Conv3D(filters = 128, kernel_size = 3, activation = 'mish', padding = 'same')
        self.decoder_up2 = tf.keras.layers.UpSampling3D(size= 2)
        self.decoder_conv2 = tf.keras.layers.Conv3D(filters = 64, kernel_size = 3, activation = 'mish', padding = 'same')
        self.decoder_up1 = tf.keras.layers.UpSampling3D(size = 2)
        self.decoder_conv1 = tf.keras.layers.Conv3D(filters = 32, kernel_size = 3, activation = 'mish', padding = 'same')
        #注目領域マスク出力部分
        self.decoder_output = tf.keras.layers.Conv3D(filters = 1, kernel_size = 1, activation = 'sigmoid', padding = 'same')

        # 感覚印象スコア出力部分
        self.flatten = tf.keras.layers.GlobalAveragePooling3D()
        self.dense1 = tf.keras.layers.Dense(units = 128, activation = 'mish')
        self.dense2 = tf.keras.layers.Dense(units = 64, activation = 'mish')
        self.scores_output = tf.keras.layers.Dense(units = 16, activation = 'linear')

    def call(self, inputs):
        # encoder
        x = self.encoder_conv1(inputs)
        x = self.encoder_pooling1(x)
        x = self.encoder_conv2(x)
        x = self.encoder_pooling2(x)
        x = self.encoder_conv3(x)
        x = self.encoder_pool3(x)

        x = self.bottleneck(x)

        # decoder
        y = self.decoder_up3(x)
        y = self.decoder_conv3(y)
        y = self.decoder_up2(y)
        y = self.decoder_conv2(y)
        y = self.decoder_up1(y)
        y = self.decoder_conv1(y)
        # 注目領域マスク
        mask = self.decoder_output(y)

        # 感覚印象スコア
        z = self.flatten(x)
        z = self.dense1(z)
        z = self.dense2(z)
        scores = self.scores_output(z)
        scores = tf.sigmoid(scores) * 4 + 1  # 感覚印象スコアを1～5の範囲へ

        return mask, scores



# モデル定義
model = Voxel3DCNNModel()
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=[tf.keras.losses.BinaryCrossentropy(), tf.keras.losses.MeanAbsoluteError()],
              metrics=[tf.keras.metrics.Accuracy(), tf.keras.losses.MeanSquaredError()])


# ダミーVoxelデータによる推論動作の確認
BATCH_SIZE = 8
VOXEL_DEPTH = 64
VOXEL_HEIGHT = 64
VOXEL_WIDTH = 64
NUM_CHANNELS = 4

voxel_inputs = np.random.rand(BATCH_SIZE, VOXEL_DEPTH, VOXEL_HEIGHT, VOXEL_WIDTH, NUM_CHANNELS).astype(np.float32)

mask_output, scores_output = model(voxel_inputs)

print("注目領域マスク出力の形状:", mask_output.shape)
print("感覚印象スコア出力の形状:", scores_output.shape)

注目領域マスク出力の形状: (8, 64, 64, 64, 1)
感覚印象スコア出力の形状: (8, 16)
