In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow.keras.utils as np_utils
import gc
import _pickle as pickle
import resnet2
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
def save(file,name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'wb')
    else:
        outfile = open(name+'.pickle', 'wb')
    pickle.dump(file, outfile)
    outfile.close
    
def load(name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'rb')
    else:
        outfile = open(name+'.pickle', 'rb')
    file = pickle.load(outfile)
    outfile.close
    return file

from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [2]:
def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
          tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
          tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
      ])

class VisionAttention(tf.keras.layers.Layer):
    def __init__(self, d_model = 512,batch_size = 128, n_neighbors = 15, d_proj = None, dff = 512, rate = 0.2, training = True, temperature = 3):
        super(VisionAttention, self).__init__()
        
        self.d_model = d_model
        self.top_k = n_neighbors
        self.d_proj = d_proj
        self.batch_size = batch_size
        self.temperature = temperature
        self.dense = tf.keras.layers.Dense(d_model)
        
        if d_proj is not None:
            self.projection = tf.keras.layers.Dense(d_proj)
        
        self.ffn = point_wise_feed_forward_network(d_model, dff)
        
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        
    
    def call(self, v, training = True):
        ## v of shape batch_size * d_model
        # normalization
        normalized_v = tf.linalg.normalize(v, ord='euclidean', axis=1, name=None)[0]
        cosine_matrix = tf.matmul(normalized_v, normalized_v, transpose_b=True)
#         cosine_matrix = 0.5 - tf.math.divide(cosine_matrix, 2)
        
        ## Computing local umap distances
        top_k_distances, ind_k = tf.math.top_k(cosine_matrix, k=self.top_k+1, sorted=True)
#         top_k_distances = top_k_distances[:, 1:]
        
        cosine_matrix = 0.5 - tf.math.divide(cosine_matrix, 2)
        top_k_distances = 0.5 - tf.math.divide(top_k_distances, 2)
        
        mean = tf.math.reduce_mean(top_k_distances, axis=-1, keepdims=False, name=None)
        std = tf.math.reduce_std(top_k_distances, axis=-1, keepdims=False, name=None)*self.temperature

        umap_matrix = tf.math.exp(-(cosine_matrix - mean)/std)
        umap_matrixT = tf.transpose(umap_matrix)
        mult_matrix = tf.math.multiply(umap_matrix, umap_matrixT)
        
#         diagonal = np.zeros(mult_matrix.shape[0])
        
        ## Computing umap similarity matrix
        umap_sim_matrix = umap_matrix + umap_matrixT - mult_matrix
        
#         umap_sim_matrix = tf.linalg.set_diag(umap_sim_matrix, diagonal)
        # softmax attention
        
        attention_weights = tf.nn.softmax( - umap_sim_matrix, axis=-1)
        attn_output = tf.matmul(attention_weights, v, transpose_b=False)
        
        ## feed forward
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(v + attn_output)  # (batch_size, input_seq_len, d_model)

        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(v + ffn_output)  # (batch_size, input_seq_len, d_model)
        
        if self.d_proj == None:
            return out2
        else:
            
            projected = self.projection(out2)
            return projected

In [None]:
OT = pickle.load(open('miniImageNet_category_split_train_phase_train.pickle', 'rb'), encoding='latin1')
# OT1 = pickle.load(open('miniImageNet_category_split_train_phase_val.pickle', 'rb'), encoding='latin1')
# OT2 = pickle.load(open('miniImageNet_category_split_train_phase_test.pickle', 'rb'), encoding='latin1')

for i in range(5):
    plt.figure(i)
    fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1,5)
    for j in range(5):
        fig.axes[j].get_xaxis().set_visible(False)
        fig.axes[j].get_yaxis().set_visible(False)
    ax1.imshow(OT['data'][600*i+1])
    ax2.imshow(OT['data'][600*i+2])    
    ax3.imshow(OT['data'][600*i+3])
    ax4.imshow(OT['data'][600*i+4])
    ax5.imshow(OT['data'][600*i+5])
    
Y_meta = np.array(OT['labels'])
X_meta = OT['data']

# Y_meta = np.concatenate([Y_meta, np.array(OT1['labels'])], axis = 0)
# Y_meta = np.concatenate([Y_meta, np.array(OT2['labels'])], axis = 0)

# X_meta = np.concatenate([X_meta, OT1['data']], axis = 0)
# X_meta = np.concatenate([X_meta, OT2['data']], axis = 0)

X_meta = (X_meta / 255)-0.5

y_meta = np_utils.to_categorical(Y_meta)
del OT
# del OT1
# del OT2
gc.collect()

In [None]:
## Preparing layers

build = resnet2.ResnetBuilder()
residual = build.build_resnet_18((84,84,3),64)

uproj = VisionAttention(d_model = residual.output.shape[-1],batch_size = 128, n_neighbors = 15, d_proj = None, dff = 512, temperature = 3)

In [None]:
batch_size = 128

In [None]:
inputs = tf.keras.layers.Input(shape = (84,84,3), batch_size = batch_size)
resnet_features = residual(inputs)

projected = uproj(resnet_features)

output = tf.keras.layers.Dense(64, activation = 'softmax')(projected)

model = tf.keras.Model(inputs, output)

In [None]:
model.summary()

In [None]:
aug = ImageDataGenerator(rotation_range=20, zoom_range=0.15,
                         width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15,
                         horizontal_flip=True, fill_mode="nearest")
aug_val = ImageDataGenerator(rotation_range=0, zoom_range=0,
                         width_shift_range=0, height_shift_range=0, shear_range=0,
                         horizontal_flip=True, fill_mode="nearest")
optimizer=SGD(lr=0.1)

model.compile(loss = 'categorical_crossentropy',
              optimizer=optimizer,#keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_meta, y_meta, test_size=0.1, random_state=42)

In [None]:
del X_meta
del y_meta
gc.collect()

In [None]:
X_train.shape[0] / batch_size

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose = 1,min_delta=0.005,
                              patience=3, min_lr=3e-7)

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=6, verbose=1, mode='auto',baseline=None, restore_best_weights=True)


batch_size = batch_size
epochs = 40

with tf.device('/GPU:0'):
    history = model.fit(aug.flow(X_train, y_train, batch_size=batch_size),
        validation_data=aug_val.flow(X_test, y_test,batch_size=batch_size), steps_per_epoch=len(X_train) // batch_size,
        epochs=epochs, callbacks=[early_stop, reduce_lr])

In [None]:
model.save_weights('weights.h5')

## Test

In [3]:
OT1 = pickle.load(open('miniImageNet_category_split_test.pickle', 'rb'), encoding='latin1')
OT2 = pickle.load(open('miniImageNet_category_split_val.pickle', 'rb'), encoding='latin1')

from copy import deepcopy
Y_val = deepcopy(OT2['labels'])
X_val = deepcopy(OT2['data'])

## Concatenating val and test datas in order to get more classes for experiment
for i in OT1['labels']:
    Y_val.append(i)
X_val = np.concatenate([X_val, OT1['data']], axis = 0)

## Restructuring the images into an array of size 64*600*84*84*3 to ease the acces to a given class
tab = []
for i in range(36):
    tab.append([])

for i in range(len(Y_val)):
    tab[Y_val[i]-64].append(X_val[i])

X = np.array(tab)

In [4]:
n = 5
k = 5
import random

## A function in order to generate a random array of n distincts values between m and M
def choose_n_classes(n, m, M):
    tab = []
    while len(tab)<n:
        r = random.randint(m,M)
        if not(r in tab):
            tab.append(r)
    tab.sort()
    tab = np.array(tab)
    
    dico = {}
    for i in range(n):
        dico[tab[i]] = i
    
    return tab, dico


## A function to generate data ready for an experiment, tab and dico are here in order to repeat the experiment 
## on the same 5 classes, but with a different number of drawn sample in the class
def build_dataset(X, n,k,batch_size = 128, value = True, tab = True, dico = True):
    if value == True:
        tab, dico = choose_n_classes(n, 64,99)
    
    x_train = []
    x_test = []
    y_train = []
    y_test = []
    
    print(dico)
    
    for elt in tab:
        ind, dico1 = choose_n_classes(k, 0, 600)
        ind1, dico2 = choose_n_classes(100, 0, 600)
        for i in range(600):
            if i in ind:
                x_train.append(X[elt-64,i])
                y_train.append(dico[elt])
#             elif i in ind1:
#                 x_test.append(X[elt-64,i])
#                 y_test.append(dico[elt])
            else:
                x_test.append(X[elt-64,i])
                y_test.append(dico[elt])
                
    x_train = np.array(x_train)
    x_test = np.array(x_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    
    if batch_size is not None:          
        shuffle_id = np.random.choice(list(range(len(x_test))), size = batch_size - k*n)
        x_test = x_test[shuffle_id]
        y_test = y_test[shuffle_id]
                                      
                                      
    return np.array(x_train), np.array(x_test), np.array(y_train), np.array(y_test), tab, dico

In [5]:
batch_size = 128
build = resnet2.ResnetBuilder()
residual = build.build_resnet_18((84,84,3),64)

uproj = VisionAttention(d_model = residual.output.shape[-1], n_neighbors = 15, d_proj = None, temperature = 3)

inputs = tf.keras.layers.Input(shape = (84,84,3), batch_size = batch_size)
resnet_features = residual(inputs)

projected = uproj(resnet_features)

output = tf.keras.layers.Dense(64, activation = 'softmax')(projected)

model_transfert = tf.keras.Model(inputs, output)

In [6]:
model_transfert.load_weights('weights.h5')

In [7]:
model_transfert.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(128, 84, 84, 3)]        0         
_________________________________________________________________
model (Model)                multiple                  11179648  
_________________________________________________________________
vision_attention (VisionAtte (128, 512)                527360    
_________________________________________________________________
dense_3 (Dense)              (128, 64)                 32832     
Total params: 11,739,840
Trainable params: 11,732,032
Non-trainable params: 7,808
_________________________________________________________________


In [8]:
inputs = model_transfert.input
outputs = model_transfert.get_layer('vision_attention').output
model = tf.keras.Model(inputs=inputs,   outputs=outputs)

In [9]:
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(128, 84, 84, 3)]        0         
_________________________________________________________________
model (Model)                multiple                  11179648  
_________________________________________________________________
vision_attention (VisionAtte (128, 512)                527360    
Total params: 11,707,008
Trainable params: 11,699,200
Non-trainable params: 7,808
_________________________________________________________________


In [10]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [11]:
def experiment(n, k):
    
    X_train, X_test, y_train, y_test, tab, dico = build_dataset(X, n, k, batch_size = batch_size)
    print(X_train.shape)
    Xall = np.concatenate([X_train, X_test])
    print(Xall.shape)
    with tf.device('/GPU:0'):
        Xemb = model.predict(Xall)
    
    print(Xemb.shape)
    
    Xt = Xemb[:len(X_train)]
    Xv = Xemb[len(X_train):]
    
    clf = LogisticRegression(random_state=0, solver='saga', multi_class='multinomial', max_iter = 1000, penalty='l2')
    clf.fit(Xt,y_train)
    
    y_pred = clf.predict(Xv)
    
    print(accuracy_score(y_test, y_pred))
    
    return accuracy_score(y_test, y_pred)

In [17]:
n = 5
k = 5

experiment(n,k)

{73: 0, 78: 1, 84: 2, 85: 3, 88: 4}
(25, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.5048543689320388


0.5048543689320388

In [None]:
n = 5
k = 1

res = []

for i in range(100):
    res.append(experiment(n,k))

{64: 0, 72: 1, 75: 2, 80: 3, 91: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.34959349593495936
{80: 0, 82: 1, 89: 2, 93: 3, 97: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.21951219512195122
{69: 0, 70: 1, 71: 2, 80: 3, 96: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.3008130081300813
{72: 0, 88: 1, 95: 2, 96: 3, 97: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.2682926829268293
{74: 0, 82: 1, 83: 2, 94: 3, 99: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.2601626016260163
{69: 0, 81: 1, 86: 2, 89: 3, 95: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.16260162601626016
{65: 0, 69: 1, 73: 2, 82: 3, 83: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.34959349593495936
{70: 0, 71: 1, 72: 2, 82: 3, 84: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.2764227642276423
{68: 0, 74: 1, 79: 2, 87: 3, 93: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.3008130081300813
{65: 0, 67: 1, 69: 2, 70: 3, 77: 4}
(5, 84, 84, 3)
(128, 84, 84, 3)
(128, 512)
0.3008130081300813
{75: 0, 78: 1, 8