<p><strong>Channel Attention - SE Module</strong></p>
<img src="https://images.viblo.asia/1df5dad3-4fd1-494e-bb81-f78f3a3ed1d6.png">

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Reshape, Multiply, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
def se_block(input_tensor, reduction=16):
    channel = input_tensor.shape[-1]
    se = GlobalAveragePooling2D()(input_tensor)
    se = Dense(channel // reduction, activation='relu')(se)
    se = Dense(channel, activation='sigmoid')(se)
    se = Reshape([1, 1, channel])(se)
    x = Multiply()([input_tensor, se])
    return x

def create_resnet50(input_shape, num_classes):
    base_model = ResNet50(weights=None, include_top=False, input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

def create_se_resnet50(input_shape, num_classes):
    base_model = ResNet50(weights=None, include_top=False, input_shape=input_shape)
    x = base_model.output

    # Áp dụng SE Module cho mỗi block
    for layer in base_model.layers:
        if 'conv' in layer.name:
            x = se_block(layer.output)

    x = GlobalAveragePooling2D()(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

In [None]:
def train_and_evaluate(model, X_train, y_train, X_test, y_test, batch_size=64, epochs=100):
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping])

    return history, model

# Tải dữ liệu CIFAR-10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
y_train, y_test = to_categorical(y_train), to_categorical(y_test)

input_shape = X_train.shape[1:]
num_classes = y_train.shape[-1]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
# Tạo và huấn luyện ResNet-50
resnet50_model = create_resnet50(input_shape, num_classes)
history_resnet50, resnet50_model = train_and_evaluate(resnet50_model, X_train, y_train, X_test, y_test)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


In [None]:
# Tạo và huấn luyện SE-ResNet-50
se_resnet50_model = create_se_resnet50(input_shape, num_classes)
history_se_resnet50, se_resnet50_model = train_and_evaluate(se_resnet50_model, X_train, y_train, X_test, y_test)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


In [None]:
# Đánh giá mô hình
resnet50_eval = resnet50_model.evaluate(X_test, y_test)
se_resnet50_eval = se_resnet50_model.evaluate(X_test, y_test)

print("ResNet-50 Accuracy: ", resnet50_eval[1])
print("SE-ResNet-50 Accuracy: ", se_resnet50_eval[1])

ResNet-50 Accuracy:  0.7089999914169312
SE-ResNet-50 Accuracy:  0.7138000130653381


**Spatial Attention - Deformable Convolution** <br>
<div style="text-align: center">
<img  src="https://images.viblo.asia/full/039ec952-9ceb-4c2d-b2cc-be85332d6dbc.png" >

<img src="https://images.viblo.asia/5fabf07f-80d1-44c3-8e17-b3cd9ce33ea8.png">


<img src="https://images.viblo.asia/full/70f87d1c-a3a3-450d-8aa9-179a95c9c1a3.png">

</div>

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Conv2D, GlobalAveragePooling2D, Dense, Reshape, Multiply, Input, BatchNormalization, ReLU, Add, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np


In [None]:
class DeformableConv2D(Layer):
    def __init__(self, filters, kernel_size, **kwargs):
        self.filters = filters
        self.kernel_size = kernel_size
        super(DeformableConv2D, self).__init__(**kwargs)

    def build(self, input_shape):
        self.offset_conv = Conv2D(2 * self.kernel_size * self.kernel_size,
                                  kernel_size=self.kernel_size,
                                  padding='same',
                                  use_bias=False)
        self.regular_conv = Conv2D(self.filters,
                                   kernel_size=self.kernel_size,
                                   padding='same')
        super(DeformableConv2D, self).build(input_shape)

    def call(self, inputs):
        offset = self.offset_conv(inputs)
        return self.deformable_conv2d(inputs, offset)

    def deformable_conv2d(self, x, offset):
        _, h, w, _ = x.shape
        grid_x, grid_y = tf.meshgrid(tf.range(w), tf.range(h))
        grid = tf.stack([grid_x, grid_y], axis=-1)
        grid = tf.cast(grid, tf.float32)

        offsets = tf.reshape(offset, (-1, h, w, self.kernel_size, self.kernel_size, 2))
        offsets = tf.reduce_mean(offsets, axis=[-3, -2], keepdims=True)

        grid = grid + offsets[:,:,:,0,0,:] # Extract the relevant part of offsets
        grid = tf.clip_by_value(grid, 0, h-1)
        grid = tf.round(grid)  # Convert to integer

        grid = tf.cast(grid, tf.int32)
        batch_indices = tf.range(tf.shape(x)[0])
        batch_indices = tf.reshape(batch_indices, (-1, 1, 1, 1))
        batch_indices = tf.tile(batch_indices, (1, h, w, 1))
        batch_indices = tf.cast(batch_indices, tf.int32)

        indices = tf.concat([batch_indices, grid], axis=-1) # Now both tensors have rank 4
        sampled_input = tf.gather_nd(x, indices)
        sampled_input = tf.reshape(sampled_input, (-1, h, w, x.shape[-1]))

        return self.regular_conv(sampled_input)

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], input_shape[2], self.filters)

def spatial_attention(input_tensor):
    avg_pool = tf.reduce_mean(input_tensor, axis=-1, keepdims=True)
    max_pool = tf.reduce_max(input_tensor, axis=-1, keepdims=True)
    concat = tf.concat([avg_pool, max_pool], axis=-1)
    attention = Conv2D(1, kernel_size=7, padding='same', activation='sigmoid')(concat)
    return input_tensor * attention

In [None]:
def create_basic_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, padding='same', activation='relu')(inputs)
    x = Conv2D(32, kernel_size=3, padding='same', activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

def create_advanced_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, padding='same')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    # Deformable Convolution
    x = DeformableConv2D(32, kernel_size=3)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    # Spatial Attention
    x = spatial_attention(x)

    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

def train_and_evaluate(model, X_train, y_train, X_test, y_test, batch_size=64, epochs=100):
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping])

    return history, model


In [None]:
def train_and_evaluate(model, X_train, y_train, X_test, y_test, batch_size=64, epochs=100):
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping])

    return history, model

#Tải dữ liệu CIFAR-10
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
y_train, y_test = tf.keras.utils.to_categorical(y_train), tf.keras.utils.to_categorical(y_test)

input_shape = X_train.shape[1:]
num_classes = y_train.shape[-1]

In [None]:
# Tạo và huấn luyện mô hình cơ bản
basic_model = create_basic_model(input_shape, num_classes)
history_basic, basic_model = train_and_evaluate(basic_model, X_train, y_train, X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


In [None]:
# Tạo và huấn luyện mô hình nâng cao với Deformable Convolution và Spatial Attention
advanced_model = create_advanced_model(input_shape, num_classes)
history_advanced, advanced_model = train_and_evaluate(advanced_model, X_train, y_train, X_test, y_test)

Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


In [None]:
# Đánh giá mô hình
basic_eval = basic_model.evaluate(X_test, y_test)
advanced_eval = advanced_model.evaluate(X_test, y_test)

print("Basic Model Accuracy: ", basic_eval[1])
print("Advanced Model Accuracy: ", advanced_eval[1])

Basic Model Accuracy:  0.5785999894142151
Advanced Model Accuracy:  0.6333000063896179


**Channel và Spatial Attention kết hợp - CBAM**

<img src="https://images.viblo.asia/full/76cec2c1-c87d-4d4b-be85-9ece78ee81ab.png">

<img src="https://images.viblo.asia/full/976bae0c-961b-4f8b-9dd1-cfc4faf98c41.png">

<img src="https://images.viblo.asia/full/4a438a02-49f4-4e5b-bf96-afd484f22a0c.png">

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Conv2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Dense, Reshape, Concatenate, Multiply, Input, BatchNormalization, ReLU, Add, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np


In [None]:
class ChannelAttention(Layer):
    def __init__(self, filters, reduction_ratio=8, **kwargs):
        super(ChannelAttention, self).__init__(**kwargs)
        self.filters = filters
        self.reduction_ratio = reduction_ratio
        self.avg_pool = GlobalAveragePooling2D()
        self.max_pool = GlobalMaxPooling2D()
        self.dense1 = Dense(filters // reduction_ratio, activation='relu', use_bias=False)
        self.dense2 = Dense(filters, use_bias=False)

    def call(self, inputs):
        avg_pool = self.avg_pool(inputs)
        max_pool = self.max_pool(inputs)
        avg_pool = self.dense1(avg_pool)
        max_pool = self.dense1(max_pool)
        avg_pool = self.dense2(avg_pool)
        max_pool = self.dense2(max_pool)
        scale = tf.nn.sigmoid(avg_pool + max_pool)
        scale = Reshape((1, 1, self.filters))(scale)
        return inputs * scale

class SpatialAttention(Layer):
    def __init__(self, **kwargs):
        super(SpatialAttention, self).__init__(**kwargs)
        self.conv = Conv2D(1, kernel_size=7, strides=1, padding='same', activation='sigmoid', use_bias=False)

    def call(self, inputs):
        avg_pool = tf.reduce_mean(inputs, axis=-1, keepdims=True)
        max_pool = tf.reduce_max(inputs, axis=-1, keepdims=True)
        concat = Concatenate(axis=-1)([avg_pool, max_pool])
        scale = self.conv(concat)
        return inputs * scale

class CBAM(Layer):
    def __init__(self, filters, reduction_ratio=8, **kwargs):
        super(CBAM, self).__init__(**kwargs)
        self.channel_attention = ChannelAttention(filters, reduction_ratio)
        self.spatial_attention = SpatialAttention()

    def call(self, inputs):
        x = self.channel_attention(inputs)
        x = self.spatial_attention(x)
        return x



In [None]:
def create_basic_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, padding='same', activation='relu')(inputs)
    x = Conv2D(32, kernel_size=3, padding='same', activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

def create_cbam_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, padding='same', activation='relu')(inputs)
    x = CBAM(32)(x)
    x = Conv2D(64, kernel_size=3, padding='same', activation='relu')(x)
    x = CBAM(64)(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    return model

In [None]:
def train_and_evaluate(model, X_train, y_train, X_test, y_test, batch_size=64, epochs=100):
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[early_stopping])

    return history, model

#Tải dữ liệu CIFAR-10
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0
y_train, y_test = tf.keras.utils.to_categorical(y_train), tf.keras.utils.to_categorical(y_test)

input_shape = X_train.shape[1:]
num_classes = y_train.shape[-1]

In [None]:
# Tạo và huấn luyện mô hình cơ bản
basic_model = create_basic_model(input_shape, num_classes)
history_basic, basic_model = train_and_evaluate(basic_model, X_train, y_train, X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


In [None]:
# Tạo và huấn luyện mô hình nâng cao với Deformable Convolution và Spatial Attention
cbam_model = create_cbam_model(input_shape, num_classes)
history_cbam, cbam_model = train_and_evaluate(cbam_model, X_train, y_train, X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Đánh giá mô hình
basic_eval = basic_model.evaluate(X_test, y_test)
cbam_eval = cbam_model.evaluate(X_test, y_test)

print("Basic Model Accuracy: ", basic_eval[1])
print("CBAM Model Accuracy: ", cbam_eval[1])

Basic Model Accuracy:  0.588100016117096
CBAM Model Accuracy:  0.6625000238418579
