## 4095 Final Project

In [10]:
import tensorflow as tf
import numpy as np
#import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import cv2
from numpy import random as rng
from sklearn.utils import shuffle
import pickle
import time

from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K

#from vit_keras import vit

# import timm
# import onnx
# import torch
# from onnx2keras import onnx_to_keras

---

### Preprocessing the data
Here we load the image data from the downloaded omniglot dataset

In [2]:
def load_images(path):
    #Load the image file and return the coordinates of pixels in the binary image
    
    X = []
    y = []
    lang_dict = {} #used to map the alphabet characters to their class numbers.
    classNum = 0
    
    #Next, we iterate over all the alphabet folders in the Omniglot dataset
    for alphabet in tqdm(sorted(os.listdir(path))):
        lang_dict[alphabet] = [classNum, None]
        #set the path to the current alphabet folder.
        alpha_path = os.path.join(path, alphabet)
        
        #We iterate over over all the letter folders in the current alphabet folder
        for letter in sorted(os.listdir(alpha_path)):
            cat_images = []  #concatenate
            
            #iterates over all the image files in the current letter folder
            for img in sorted(os.listdir(os.path.join(alpha_path, letter))):
                #define the path to the current image file
                img_path = os.path.join(alpha_path, letter, img)
                              
                #read the current image file and convert it to grayscale
                img_gray = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY)
                
                #Resize the grayscale image
                img_resized = cv2.resize(img_gray, (224, 224))
                
                # Convert the resized grayscale image to RGB
                img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2RGB)
                cat_images.append(img_rgb)
                
                y.append(classNum)
            
            classNum+=1
            X.append(cat_images)   #appends the list of images for the current letter
            lang_dict[alphabet][1] = classNum - 1 #Sets the second val in the list to the current class number-1
    #Make X and y numpy arrays
    X = np.array(X)
    print(X.shape)
    y = np.array(y)
    return X, y, lang_dict


    

In [3]:
#We store the path to our training directory and evaluation directory
img_train_PATH = '/Users/siddharthsinha/Desktop/Spring_2023/CSE_5819/Honors_work/omniglot/python/images_background'
img_eval_PATH = '/Users/siddharthsinha/Desktop/Spring_2023/CSE_5819/Honors_work/omniglot/python/images_evaluation'

In [4]:
trainImages, trainLabels, lang_dict = load_images(img_train_PATH)
valImages, valLabels, lang_dictVal = load_images(img_eval_PATH)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:10<00:00,  2.88it/s]


(964, 20, 224, 224, 3)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:07<00:00,  2.61it/s]


(659, 20, 224, 224, 3)


---
The model can be trained using a batch generator that randomly samples pairs of images from the training set. We also define the functions make_one_shot_task and test_one_shot to test the siamese network. 

In [24]:
def get_batch(batch_size,dset='train'):
    if dset == 'train':
        X = trainImages
    else:
        X = valImages
    
    n_classes, n_examples, w, h, _ = X.shape
    cat = rng.choice(n_classes, size=batch_size, replace=False)
    targets = np.zeros((batch_size,))
    targets[batch_size//2:] = 1
    pairs = [np.zeros((batch_size,w,h,3)) for _ in range(2)]
    for i in range(batch_size):
        ex_no = rng.randint(n_examples)
        pairs[0][i,:,:,:] = X[cat[i],ex_no,:,:].reshape(w,h,3)
        cat2 = 0
        if i >= batch_size // 2:
            cat2 = cat[i]
        else:
            cat2 = (cat[i] + rng.randint(1,n_classes)) % n_classes
        ex_no2 = rng.randint(n_examples)
        pairs[1][i,:,:,:] = X[cat2,ex_no2,:,:].reshape(w,h,3)
    return pairs,targets


In [22]:
def make_one_shot_task(N, dset='val'):
    if dset == 'train':
        X = trainImages
    else:
        X = valImages
    n_classes, n_examples, w, h, _ = X.shape  # Updated to unpack 5 values
    cats = rng.choice(n_classes, size=(N,))
    indices = rng.choice(n_examples, size=(N,))
    true_cat = cats[0]
    ex1 = rng.randint(n_examples)
    test_image = np.array([X[true_cat, ex1]] * N).reshape(N, w, h, 3)  # Updated to handle 3 channels
    support_set = X[cats, indices].reshape(N, w, h, 3)  # Updated to handle 3 channels
    targets = np.zeros((N,))
    targets[0] = 1

    test_image, support_set, targets = shuffle(test_image, support_set, targets)

    return [test_image, support_set], targets

In [23]:
def test_one_shot(model,N,k,dset='val'):
    n_correct = 0
    for _ in range(k):
        inputs, outputs = make_one_shot_task(N,dset)
        preds = model.predict(inputs)
        if np.argmax(outputs) == np.argmax(preds):
            n_correct += 1
    return n_correct / k

---
### Siamese Neural Network

In [8]:
from vit_keras import vit

def create_vit_embedding_model(input_shape):
    vit_model = vit.vit_b32(
        image_size=input_shape[0],
        activation=None,
        pretrained=True,
        include_top=False,
        pretrained_top=False,
        weights='imagenet21k',
    )

    input_layer = Input(input_shape)
    outputs = vit_model(input_layer)
    return Model(inputs=input_layer, outputs=outputs)

def get_siamese(input_shape, patch_size, num_patches):
    # Define input tensors
    left_input = Input(input_shape)
    right_input = Input(input_shape)

    embedding_model = create_vit_embedding_model(input_shape)

    left_emb = embedding_model(left_input)
    right_emb = embedding_model(right_input)

    L1_Layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1]))
    L1_Dist = L1_Layer([left_emb, right_emb])
    OP = Dense(1, activation='sigmoid', kernel_regularizer='l2')(L1_Dist)

    siamese_net = Model(inputs=[left_input, right_input], outputs=OP)

    return siamese_net


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.7.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


### Training Loop and Hyperparameters

In [11]:
num_iterations = 7000
batch_size = 128

evaluateEvery = 100
k = 250
N = 1

#n_classes, n_examples, w, h = trainImages.shape
print(trainImages.shape)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.05,
    decay_steps=4000,
    decay_rate=0.0001)

opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

input_shape = (224, 224, 3)
model = get_siamese(input_shape, 7, 225)

model.compile(
    loss='binary_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

model.summary()

(964, 20, 224, 224, 3)
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 input_6 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 model_1 (Functional)           (None, 768)          88045824    ['input_5[0][0]',                
                                                                  'in

In [25]:
lossArr = []
trainAccArr = []
valAccArr = []
currTime = time.time()
x,y = get_batch(batch_size)

for i in range(0,num_iterations+1):
    x,y = get_batch(batch_size)
    loss = model.train_on_batch(x,y)
    if i % evaluateEvery == 0:
        lossArr.append(loss[0])
        trainAcc = round(test_one_shot(model,N,k,'train') * 100,2)
        valAcc = round(test_one_shot(model,N,k,'val') * 100,2)
        trainAccArr.append(trainAcc)
        valAccArr.append(valAcc)
        print('Iteration',i,'('+str(round(time.time() - currTime,1))+'s) - Loss:',loss[0],'Acc:',round(loss[1],2),'',end='')
        print(k,str(N)+'-way train accuracy:', trainAcc,'%, ',end='')
        print(k,str(N)+'-way val accuracy:', valAcc,'%')
        currTime = time.time()


Iteration 0 (350.5s) - Loss: 1.6035696268081665 Acc: 0.5 250 1-way train accuracy: 100.0 %, 250 1-way val accuracy: 100.0 %


KeyboardInterrupt: 

In [26]:
print('Final Validation Accuracy:', round(test_one_shot(model,N,k,'val') * 100,2))

Final Validation Accuracy: 100.0
