In [51]:
import tensorflow as tf
import keras
from tensorflow.keras import Sequential,Model,initializers,layers,Input

class Data_augmentation(layers.Layer):
    def __init__(self,num_classes,image_size):
        super(Data_augmentation,self).__init__()
        self.num_classes=num_classes
        self.image_size=image_size

        self.Normalization=layers.Normalization()
        self.Resizing=layers.Resizing(self.image_size, self.image_size)
        self.Randomflip=layers.RandomFlip("horizontal")
        self.Randomrotation=layers.RandomRotation(factor=0.02)
        self.Randomzoom=layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        )
    
    def call(self,inputs):
        x=self.Normalization(inputs)
        x=self.Resizing(x)
        x=self.Randomflip(x)
        x=self.Randomrotation(x)
        x=self.Randomzoom(x)

        return x

class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches,self).__init__()
        self.patch_size = patch_size

    def call(self,images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = tf.shape(patches)[-1]
        patches = tf.reshape(patches, [batch_size,patches.shape[1]*patches.shape[2],patch_dims])
        return patches

class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super().__init__()
        self.num_patches = num_patches
        self.projection_dim=projection_dim
        self.projection = layers.Dense(units=self.projection_dim)
        self.positional_embedding = self.add_weight(
            "position_embeddings", shape=[self.num_patches + 1, self.projection_dim],
            initializer=tf.keras.initializers.RandomNormal(), dtype=tf.float32
        )


        self.classification_token = self.add_weight(
            "classification_token", shape=[1, 1, self.projection_dim],
            initializer=tf.keras.initializers.RandomNormal(), dtype=tf.float32
        )


    def call(self, patch):
        #patch=self.projection(patch) # why using dense layer not 1d convolution?
        cls_pos = tf.broadcast_to(
            self.classification_token, [tf.shape(patch)[0], 1, self.projection_dim]) # (1,1,64) ->(batch,1,64)
    
        x = tf.concat([cls_pos, self.projection(patch)], axis=1) # (batch,1,64) (batch,64,64)->(batch,65,64)

        x = x + self.positional_embedding  # (batch,65,64) + (65,64) -> (batch,65,64)
        
        return x

In [53]:
a=tf.zeros(shape=(8,65,64))
b=tf.zeros(shape=(65,64))
c=a+b
c.shape
#c=tf.concat([tf.zeros(shape=(8,1,64)),tf.zeros(shape=(8,1,64))],axis=1)

TensorShape([8, 65, 64])

In [48]:
class Multi_Head_Attention(layers.Layer):
    
    def __init__(self,d_model,num_heads,dropout=0):
        super(Multi_Head_Attention,self).__init__()
        self.d_model=d_model #512
        self.num_heads=num_heads #8
        assert d_model % self.num_heads == 0
        self.d_k=self.d_model//num_heads
        self.dropout=dropout
        self.wq = layers.Dense(self.d_model)
        self.wk = layers.Dense(self.d_model)
        self.wv = layers.Dense(self.d_model)
        self.dense = layers.Dense(self.d_model)

    def Split_Heads(self,input):
        batch_size=tf.shape(input)[0]
        seq_length=tf.shape(input)[1]
        x=tf.reshape(input, [batch_size,seq_length, self.num_heads, self.d_k]) #(batch,65,8,8)    (batch,seq_length,num_head,d_k)
        output=tf.transpose(x, perm=[0,2,1,3]) #(batch,8,65,8)      (batch,num_head,seq_length,d_k)
        return output

    def Scaled_Dot_Product_Attnetion(self,query,key,value):

        key_dim=tf.cast(tf.shape(key)[-1],tf.float32) # 8
        
        query = tf.multiply(query, 1.0 / tf.sqrt(key_dim)) # Normalize (batch,8,65,8)
       
        attention_score=tf.matmul(query,key,transpose_a=False,transpose_b=True) #(batch,8,65,8) matmul (batch,8,8,65)-> (batch,8,65,65) (batch,num_head,seq_length,seq_length)
   
        attention_prob=tf.nn.softmax(attention_score,axis=-1) #(batch,8,65,65) 

        attention_prob=tf.nn.dropout(attention_prob,self.dropout)

        attention_value=tf.matmul(attention_prob,value) #(batch,8,65,65) matmul (batch,8,65,8)-> (batch,8,65,8) (batch,num_head,seq_length,d_k)

        return attention_value # 원문에서는 드랍아웃을 추가해서 Dropout 된거(attention output) 안된거(attention score) 둘다 리턴


    def call(self,input):

        batch_size=tf.shape(input)[0] # batch
        seq_length=tf.shape(input)[1] # 65

        query=input
        key=input
        value=input

        q = self.wq(query) # (batch,65,64)
        k = self.wk(key) # (batch,65,64)
        v= self.wv(value) # (batch,65,64)

        query=self.Split_Heads(q) #(batch,8,65,8) (batch,num_head,seq_length,d_k)
        key=self.Split_Heads(k) #(batch,8,65,8)
        value=self.Split_Heads(v) #(batch,65,8,8)
    
        concat_attention=self.Scaled_Dot_Product_Attnetion(query,key,value) # (batch,8,65,8) (batch,num_head,seq_length,seq_length)

        concat_attention=tf.transpose(concat_attention,perm=[0,2,1,3]) # (batch,65,8,8) (batch,seq_length,num_head,key d_k)

        concat_attention=tf.reshape(concat_attention,shape=(batch_size,seq_length,self.d_model)) # (batch,65,64) (batch,seq_length,d_model)

        output=self.dense(concat_attention) # (batch,65,64) (batch,seq_length,d_model)
    
        return output

class MLP_ViT(layers.Layer):
    def __init__(self,units,dropout_rate):
        self.units=units
        self.dropout_rate=dropout_rate
        self.dropout=layers.Dropout(self.dropout_rate)
        self.mlp1=layers.Dense(units=2048,activation=tf.nn.gelu)
        self.mlp2=layers.Dense(units=self.units,activation=tf.nn.gelu)
        super(MLP_ViT,self).__init__()

    def call(self,input):
        x=self.mlp1(input) # (batch,65,2048) 
        x=self.dropout(x)
        x=self.mlp2(x) # (batch,65,64) 
        x=self.dropout(x)
    
        return x

In [49]:
import tensorflow as tf
from tensorflow.keras import layers,Model,Input

class ViT():
    def __init__(self,input_size,num_classes,patch_size,projection_dim,transformer_layers,d_model,num_heads,dropout_MHA,dropout_MLP):
        self.input_size= input_size
        self.num_classes=num_classes
        self.image_size=self.input_size[0]
        self.patch_size=patch_size
        self.num_patches=(self.image_size//self.patch_size)**2
        self.projection_dim=projection_dim
        self.transformer_layers=transformer_layers
        self.MLP_units=[self.projection_dim*2,self.projection_dim]
        self.d_model=d_model
        self.num_heads=num_heads
        self.dropout_MHA=dropout_MHA
        self.dropout_MLP=dropout_MLP
        self.output=layers.Dense(units=self.num_classes)
        
    def Encoder_Block(self,input):
        x=layers.LayerNormalization(epsilon=1e-6)(input)
        x=Multi_Head_Attention(num_heads=self.num_heads, d_model=self.d_model)(x)
        x1=layers.Add()([x,input])
        x2=layers.LayerNormalization(epsilon=1e-6)(x1)
        x2=MLP_ViT(units=self.d_model,dropout_rate=self.dropout_MLP)(x2)
        x3=layers.Add()([x1,x2])
        return x3

    def call(self):
        input=Input(shape=(self.input_size))
        x=Data_augmentation(num_classes=self.num_classes,image_size=self.image_size)(input) # (batch,6)
        x=Patches(patch_size=self.patch_size)(x)
        x=PatchEncoder(num_patches=self.num_patches,projection_dim=self.projection_dim)(x)
        for i in range(self.transformer_layers):
            x=self.Encoder_Block(x)

        x=MLP_ViT(units=2048,dropout_rate=self.dropout_MLP)(x) # (batch,65,2048) (batch,seq_length,2048)
        print(x.shape)
        x=layers.LayerNormalization(epsilon=1e-6)(x)
        x=layers.GlobalAveragePooling1D()(x)
        #x=layers.Flatten()(x)
        output=self.output(x)
        model=Model(inputs=input,outputs=output)

        return model

In [52]:
model=ViT( 
    input_size=(224,224,3),
    num_classes=100,
    patch_size=8,
    projection_dim=256,
    transformer_layers=8,
    d_model=256,
    num_heads=8,
    dropout_MHA=0.5,
    dropout_MLP=0).call()

(None, 785, 2048)


In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model,to_file='model.png',show_shapes=True)

In [76]:
model.summary()

Model: "model_19"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_23 (InputLayer)          [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 data_augmentation_20 (Data_aug  (None, 224, 224, 3)  7          ['input_23[0][0]']               
 mentation)                                                                                       
                                                                                                  
 patches_20 (Patches)           (None, 784, 192)     0           ['data_augmentation_20[0][0]']   
                                                                                           

In [7]:
lr_rate=tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 10000, 0.97, staircase=False, name=None)

    
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_rate),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),metrics=['acc'])

In [78]:
dir=r'D:\dataset\butterfly\train'

batch_size=8

train_ds=tf.keras.preprocessing.image_dataset_from_directory(
dir,
labels="inferred",
label_mode="int",
class_names=None,
color_mode="rgb",
batch_size=batch_size,
image_size=(224, 224),
shuffle=True,
seed=10,
validation_split=0.1,
subset='training',
interpolation="gaussian",
follow_links=False,
crop_to_aspect_ratio=False,)


validation_ds=tf.keras.preprocessing.image_dataset_from_directory(
dir,
labels="inferred",
label_mode="int",
class_names=None,
color_mode="rgb",
batch_size=batch_size,
image_size=(224, 224),
shuffle=True,
seed=10,
validation_split=0.1,
subset='validation',
interpolation="gaussian",
follow_links=False,
crop_to_aspect_ratio=False,)

normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255.)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
validation_ds=validation_ds.map(lambda x, y: (normalization_layer(x), y))


lr_rate=tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 10000, 0.97, staircase=False, name=None)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_rate),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['acc'])

history=model.fit(train_ds,validation_data=validation_ds,epochs=100,batch_size=batch_size,verbose=1)

Found 12639 files belonging to 100 classes.
Using 11376 files for training.
Found 12639 files belonging to 100 classes.
Using 1263 files for validation.
Epoch 1/100

KeyboardInterrupt: 

In [77]:
lr_rate=tf.keras.optimizers.schedules.ExponentialDecay(1e-3, 10000, 0.97, staircase=False, name=None)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_rate),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['acc'])

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

batch_size=32

history=model.fit(x=x_train,y=y_train,validation_data=(x_test,y_test),epochs=100,batch_size=batch_size,verbose=1)

Epoch 1/100


ValueError: in user code:

    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\jin\anaconda3\envs\tf_210_py_390\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_19" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, 32, 32, 3)
