In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import cv2
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from patchify import patchify
import implementation_ViT as vit


In [10]:
# A Dictionary Hyperparameters  

hp = {}
hp['image_size'] = 200
hp['num_channels'] = 3
hp['patch_size'] = 25
hp['num_patches'] = 64
hp['flat_patches_shape'] = (int(hp['num_patches']), hp['patch_size']*hp['patch_size']*hp['num_channels'])

hp['batch_size'] = 32
hp['lr'] = 1e-4
hp['num_epochs'] = 500
hp['num_classes'] = 5
hp['class_name'] = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']


hp["num_layers"] = 12
hp["hidden_dim"] = 768
hp["mlp_dim"] = 3072
hp["num_heads"] = 12
hp["dropout_rate"] = 0.1

In [11]:
import tensorflow as tf

In [12]:
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [27]:
def load_data(path, split=0.1):
    images = shuffle(glob(os.path.join(path, '*', '*.jpg')))
    print(int(len(images)))
    split_size = int(len(images)* split)
    #print(split_size)
    train_x, valid_x = train_test_split(images, test_size=split_size, random_state=42)
    train_x, test_x = train_test_split(train_x, test_size=split_size, random_state=42)
    print(int(len(train_x)), int(len(test_x)))
    return train_x, valid_x, test_x

In [22]:
import matplotlib.pyplot as plt
def process_image_label(path):
    #path = path.decode()
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (hp['image_size'], hp['image_size']))
    image = image/255.0
    #print(image.shape)
    
    # Preprocessing to patches 
    patch_shape = (hp['patch_size'], hp['patch_size'], hp['num_channels'])
    patches = patchify(image, patch_shape, hp['patch_size'])
    
    #print(hp['flat_patches_shape'])
    
    patches_r = np.reshape(patches, hp['flat_patches_shape'])
    patches_r = patches_r.astype(np.float32)

    
    patches = np.reshape(patches, (64, 25, 25, 3))
    n = 8
    #plt.figure(figsize=(4, 4))
    #for i in range(64):
        #cv2.imwrite(f'files/{i}.png' ,patches[i])
        #ax = plt.subplot(n, n, i + 1)
        #patch_img = tf.reshape(patches[i], (hp['patch_size'], hp['patch_size'], 3))
        #plt.imshow(patch_img.numpy().astype("uint8"))
        #plt.axis("off")
        
    #patches_r = patches_r.astype(np.float32)
    
    # Label
    #print(path)
    #path = str(path)
    #class_name = []
    
    '''this is unbelieveable https://stackoverflow.com/questions/3167154/how-to-split-a-dos-path-into-its-components-in-python'''
    
    
    class_name = path.split(os.sep)[-2]
    #print(class_name)
    class_idx = hp['class_name'].index(class_name)
    class_idx = np.array(class_idx, dtype=np.int32)
    print(class_name, class_idx)
    
    return patches_r, class_idx
    
    
    

In [23]:
def parse(path):
    patches, labels = tf.numpy_function(process_image_label, [path], [tf.float32, tf.int32])
    labels = tf.one_hot(labels, hp['num_classes'])
    
    patches.set_shape(hp['flat_patches_shape'])
    labels.set_shape(hp['num_classes'])
    
    return patches, labels

In [24]:
def tf_dataset(images, batch=32):
    ds = tf.data.Dataset.from_tensor_slices((images))
    ds = ds.map(parse).batch(batch).prefetch(8)
    return ds

In [29]:
#from implementation_ViT import ViT
#import implementation_ViT
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers.legacy import Adam

if __name__ == '__main__':
    # Seeding
    np.random.seed(42)
    tf.random.set_seed(42)
    
    # Directory for storing files
    create_dir('files')
    
    # paths
    dataset_path = r'C:\Users\DCL\Image-Classification-using-Vision-Transformer-ViT-\flower_photos'
    model_path = os.path.join('files', 'model.h5')
    csv_path = os.path.join('files', 'log.csv')
    
    train_x, valid_x, test_x = load_data(dataset_path)
    print(f"Train: {len(train_x)} - Valid: {len(valid_x)} - Test: {len(test_x)}")

    train_ds = tf_dataset(train_x, batch=hp["batch_size"])
    valid_ds = tf_dataset(valid_x, batch=hp["batch_size"])
    
    #for x, y in train_ds:
       # print(x.shape, y.shape)
       # break
    # Model
    model = vit.ViT(hp)
    model.compile(
        loss = 'categorical_crossentropy',
        optimizer = tf.keras.optimizers.Adam(hp['lr'], clipvalue=1.0),
        metrics = ['acc']
    )
    callbacks = [
        ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_lr=1e-10, verbose=1),
        CSVLogger(csv_path),
        EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False),
    ]

    model.fit(
        train_ds,
        epochs=hp["num_epochs"],
        validation_data=valid_ds,
        callbacks=[callbacks],
    )

3670
2936 367
Train: 2936 - Valid: 367 - Test: 367


ResourceExhaustedError: Exception encountered when calling layer "multi_head_attention_50" (type MultiHeadAttention).

{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[768,12,768] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:AddV2]

Call arguments received by layer "multi_head_attention_50" (type MultiHeadAttention):
  • query=tf.Tensor(shape=(None, 65, 768), dtype=float32)
  • value=tf.Tensor(shape=(None, 65, 768), dtype=float32)
  • key=None
  • attention_mask=None
  • return_attention_scores=False
  • training=None
  • use_causal_mask=False