In [1]:
from google.colab import drive 
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [2]:
!cp -av 'drive/MyDrive/CV/final_project/data_pure.zip' './' 
!unzip -u -q "data_pure.zip"

'drive/MyDrive/CV/final_project/data_pure.zip' -> './data_pure.zip'


In [3]:
!pip install -U -q tensorflow-addons

[?25l[K     |▎                               | 10 kB 32.6 MB/s eta 0:00:01[K     |▋                               | 20 kB 37.3 MB/s eta 0:00:01[K     |▉                               | 30 kB 41.0 MB/s eta 0:00:01[K     |█▏                              | 40 kB 25.9 MB/s eta 0:00:01[K     |█▌                              | 51 kB 16.5 MB/s eta 0:00:01[K     |█▊                              | 61 kB 13.9 MB/s eta 0:00:01[K     |██                              | 71 kB 14.9 MB/s eta 0:00:01[K     |██▍                             | 81 kB 16.5 MB/s eta 0:00:01[K     |██▋                             | 92 kB 16.0 MB/s eta 0:00:01[K     |███                             | 102 kB 14.7 MB/s eta 0:00:01[K     |███▎                            | 112 kB 14.7 MB/s eta 0:00:01[K     |███▌                            | 122 kB 14.7 MB/s eta 0:00:01[K     |███▉                            | 133 kB 14.7 MB/s eta 0:00:01[K     |████▏                           | 143 kB 14.7 MB/s eta 0:

In [11]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import tensorflow.keras.layers as L
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow import keras
from tensorflow.keras.optimizers import Adam

seed = 1
base_dir = './'
image_size=224

df = pd.read_csv(base_dir+"data/train.csv")
df['filename'] = df.apply(lambda row: row['id']+'.jpg',axis=1)

def prep_fn(img):
    img = img.astype(np.float32) / 255.0
    #img = (img - 0.5) * 2
    return img

data_gen_args = dict(preprocessing_function=prep_fn,
                     width_shift_range=0.2,
                     height_shift_range=0.2,
                     zoom_range=0.1,
                     rotation_range=20,
                     horizontal_flip=False,
                     vertical_flip=False,
                     validation_split=0.1)

train_datagen = ImageDataGenerator(**data_gen_args)
val_datagen = ImageDataGenerator(preprocessing_function=prep_fn,validation_split=0.1)

train_generator = train_datagen.flow_from_dataframe(dataframe=df,
                                                    directory=base_dir+'data/train/',
                                                    x_col='filename',
                                                    y_col=['x','y'],
                                                    subset="training",
                                                    batch_size = 32,
                                                    seed=seed,
                                                    shuffle=True,
                                                    class_mode='raw',
                                                    target_size=(224,224))
val_generator = val_datagen.flow_from_dataframe(dataframe=df,
                                                directory=base_dir+'data/train/',
                                                x_col='filename',
                                                y_col=['x','y'],
                                                subset="validation",
                                                batch_size = 32,
                                                seed=seed,
                                                shuffle=True,
                                                class_mode='raw',
                                                target_size=(224,224))

Found 6750 validated image filenames.
Found 750 validated image filenames.


In [34]:
df_test = pd.read_csv("data/imagenames.csv")
df_test['filename'] = df_test.apply(lambda row: row['id']+'.jpg',axis=1)
df_test['x']=0
df_test['y']=0
test_datagen = ImageDataGenerator(preprocessing_function=prep_fn)
test_generator = test_datagen.flow_from_dataframe(dataframe=df_test,
                                                directory=base_dir+'data/test/',
                                                x_col='filename',
                                                y_col=['x','y'],
                                                batch_size = 32,
                                                shuffle=False,
                                                class_mode='raw',
                                                target_size=(224,224))

Found 1200 validated image filenames.


In [41]:
projection_dim = 128
conv_filters = [32,64, projection_dim]
num_patches = (image_size//2**(len(conv_filters))) **2
num_heads = 3
transformer_units = [projection_dim*2, projection_dim]
transformer_layers = 4

In [7]:
def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = L.Dense(units, activation = tf.nn.gelu)(x)
        x = L.Dropout(dropout_rate)(x)
    return x

In [43]:
class CCTTokenizer(L.Layer):
    def __init__(self):
        super(CCTTokenizer, self).__init__()
        self.num_patches = num_patches
        self.projection_dim = projection_dim
        self.conv_model = keras.Sequential()
        for i in conv_filters:
            self.conv_model.add(
                L.Conv2D(i,(3,3),activation='relu', padding='same')
            )
            self.conv_model.add(
                L.MaxPool2D((2, 2), strides=(2, 2))
            )

        self.position_embedding = L.Embedding(
            input_dim = self.num_patches, output_dim = self.projection_dim
        )

    def call(self, images):
        outputs = self.conv_model(images)
        positions = tf.range(start = 0, limit = self.num_patches, delta = 1)

        reshaped = tf.reshape(
            outputs,
            (-1, tf.shape(outputs)[1] * tf.shape(outputs)[2], tf.shape(outputs)[-1]),
        )+self.position_embedding(positions)
        return reshaped
    
    

In [44]:
def vision_transformer():
    inputs = L.Input(shape = (image_size, image_size, 3))

    #conv_features= convolution_block(inputs)

    # Encode patches.
    encoded_patches = CCTTokenizer()(inputs)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        
        # Layer normalization 1.
        x1 = L.LayerNormalization(epsilon = 1e-6)(encoded_patches)
        
        # Create a multi-head attention layer.
        attention_output = L.MultiHeadAttention(
            num_heads = num_heads, key_dim = projection_dim, dropout = 0.1
        )(x1, x1)
        
        # Skip connection 1.
        x2 = L.Add()([attention_output, encoded_patches])
        
        # Layer normalization 2.
        x3 = L.LayerNormalization(epsilon = 1e-6)(x2)
        
        # MLP.
        x3 = mlp(x3, hidden_units = transformer_units, dropout_rate = 0.1)
        
        # Skip connection 2.
        encoded_patches = L.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = L.LayerNormalization(epsilon = 1e-6)(encoded_patches)
    attention_weights = tf.nn.softmax(L.Dense(1)(representation), axis=1)
    weighted_representation = tf.matmul(
        attention_weights, representation, transpose_a=True
    )
    weighted_representation = tf.squeeze(weighted_representation, -2)

    # Add MLP.
    vit_features = L.Dense(128,activation='relu')(weighted_representation)
    # Classify outputs.
    out = L.Dense(2)(vit_features)
    
    # Create the model.
    model = tf.keras.Model(inputs = inputs, outputs = out)
    
    return model

In [45]:
model = vision_transformer()
model.compile(optimizer = Adam(learning_rate=0.001), 
              loss=MeanAbsoluteError())

In [46]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
cct_tokenizer_6 (CCTTokenizer)  (None, 784, 128)     193600      input_7[0][0]                    
__________________________________________________________________________________________________
layer_normalization_47 (LayerNo (None, 784, 128)     256         cct_tokenizer_6[0][0]            
__________________________________________________________________________________________________
multi_head_attention_21 (MultiH (None, 784, 128)     197888      layer_normalization_47[0][0]     
                                                                 layer_normalization_47[0][0

In [47]:
with tf.device('/device:GPU:0'):
    #model.load_weights("drive/MyDrive/CV/Model/CCT")
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    r=model.fit(train_generator, validation_data=val_generator,
                validation_steps=20,steps_per_epoch=211,epochs=100,callbacks=[callback])

Epoch 1/100

KeyboardInterrupt: ignored

In [None]:
model.save_weights("drive/MyDrive/CV/Model/vit")

In [None]:
with tf.device('/device:GPU:0'):
    ypred = model.predict(test_generator)
ypred.shape

In [None]:
df_test['x'] = ypred[:,0]
df_test['y'] = ypred[:,1]
if 'filename' in df_test.keys():
    df_test=df_test.drop(columns=['filename'])
df_test.head()
df_test.to_csv('pred_CCT.csv',index=False)