# COMP 551 final project code
paper: Unleashing the Potential of CNNs for Interpretable Few-Shot Learning

## data loading 
meta training set: 64 classes - training CNN
meta testing set: 20 classes - extract VC and validate
meta validation set: 16 classes - Not being used

On each trial (5-shot)
- Use 25 samples(5 classes) from training split to train CNN to achieve high accuracy.
- Use 25 samples(5 classes) from train subset of the testing split to extract VC
- Use 15 samples for each class(5 classes) form test subset of the testing split to classify images using VC.

10 trials:

desired output file: 

training set : (64 classes in total)
- train_x_1.csv : 5 sample each of 5 classes , train_y_1.csv : 5 sample for each of 5 classes
- test_x_1.csv : 5 sample each of 3 classes, test_y_1.csv : 5 sample each of 3 classes

- train_x_2.csv.....
- test_x_2.csv
....
....
- train_x_10.csv.....
- test_x_10.csv

testing set : (20 classes in total)
- train_x_1.csv : 5 sample each of 5 classes , train_y_1.csv : 5 sample for each of 5 classes
- test_x_1.csv : 5 sample each of 3 classes, test_y_1.csv : 5 sample each of 3 classes

- train_x_2.csv.....
- test_x_2.csv
....
....
- train_x_10.csv.....
- test_x_10.csv

the mini-imagenet structure
```
paper_reproduce_dataset/train/n01882714/images/n01882714_0.jpeg
```

use load_labels() to controll the number of class uses in a trial
then use load_data to get train_x, train_y



In [6]:
#helper function to import data
import re

def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)
    return l

#load data
import skimage
from skimage.io import ImageCollection,concatenate_images,imread

from skimage.color import gray2rgb
import numpy as np
from skimage import io
from os import listdir
from os.path import isfile, join


def print_image(data):
    from matplotlib import pyplot as plt
    plt.imshow(data, interpolation='nearest')
    plt.show()

def load_labels():
    labels = []
    with open("paper_reproduce_dataset/wnids.txt","r") as input:
        for row in input:
            labels.append(row[0:9])
    return labels

def imreadconvert(Xname):
    
    X=imread(Xname)
    if len(X.shape)==3:
        return X
    else:
        return gray2rgb(X)  
    
def load_data(label_dict,dataset):
    print ("loading",dataset, "data!")
    nsamples=10 #5 images for each of 200 labels
    file_names=[]
    labels=[] 
    
    for label in label_dict:
        #print str(label) 
        cur_dir="paper_reproduce_dataset/"+label+"/images"
        onlyfiles = [f for f in sort_nicely(listdir(cur_dir)) if isfile(join(cur_dir, f))]
        onlyfiles = random.sample(onlyfiles,nsamples)
        onlyfiles=[cur_dir+'/'+f for f in onlyfiles]
        file_names=file_names+onlyfiles    
        cur_labels=nsamples*[label]
        labels=labels+cur_labels
    image_collect = ImageCollection(file_names,load_func=imreadconvert)
    x_data = concatenate_images(image_collect)   
    print ("loaded",dataset, "data")
    y_data=np.asarray(labels)
    y_data=np.reshape(y_data,(len(y_data),))
   
    print("x_",dataset,".shape =",x_data.shape)
    print("y_",dataset,".shape =",y_data.shape)
   
    return x_data,y_data 
        

    

In [7]:
#VGG model structure
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten,Activation
from keras.layers import Conv2D, MaxPooling2D , AveragePooling2D,ZeroPadding2D


def VGG_13(num_class):
    model = Sequential()
    
    
    model.add(Conv2D(64, (3, 3), activation="relu",padding="same",input_shape=(64,64,3)))
    model.add(Conv2D(64, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(128, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(128, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Conv2D(256, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(256, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_class, activation='softmax'))

#     if weights_path:
#         model.load_weights(weights_path)

    return model



In [8]:
#data split and train model function
import random

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import LabelBinarizer

from keras import optimizers

def save_model(model_to_save, json_name,h5_name):
#     serialize model to JSON
    model_json = model_to_save.to_json()
    with open(json_name, "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model_to_save.save_weights(h5_name)
    print("Saved model to disk")

    
from keras.models import model_from_json  

def load_model(json_name,h5_name):
    # load json and create model
    json_file = open(json_name, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(h5_name)
    print("Loaded model from disk")
    return loaded_model


def split_metadata():
    label_dict=load_labels()
    train_split_label = label_dict[0:64]
    test_split_label = label_dict[64:80]
    
    #spliting dataset to 64 training class with 10 samples per class
    #and 16 testing class with 10 samples per class
    x_train,y_train = load_data(train_split_label,"train")
    print()
    x_test,y_test = load_data(test_split_label,"test")
    return x_train,y_train,x_test,y_test

def run_CNN_trial(train_trial):
    #set hyper param
    epochs = 100
    learning_rate = 0.001
    
    train_trial = list(zip(*train_trial))
    x_tr = np.asarray(train_trial[0])
    y_tr = np.asarray(train_trial[1])
    
    #preprocessing training data
    lb = preprocessing.LabelBinarizer()
    onehot_y_tr = lb.fit_transform(y_tr)
    num_class = onehot_y_tr.shape[1]
    x_train_tr,x_test_tr,y_train_tr,y_test_tr = train_test_split(x_tr, onehot_y_tr, test_size=0.5, random_state=42)
    
    cnn_model = VGG_13(num_class)
    print("start training CNN model.")
    
    optimizer = optimizers.SGD(lr=learning_rate)
    cnn_model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    from keras.callbacks import ModelCheckpoint

    checkpoint = ModelCheckpoint("best_model.hdf5", monitor='acc', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    
    cnn_model.fit(x_train_tr, y_train_tr,
              epochs=epochs,
              callbacks=callbacks_list,    
              verbose=1,
              validation_data=(x_test_tr, y_test_tr))

    score = cnn_model.evaluate(x_test_tr, y_test_tr, verbose=0)
    
#     print('Test loss:', score[0])
#     print('Test accuracy:', score[1])
    
#     print("finish training CNN model.")
    
    return cnn_model

def train_model():
    
    x_train,y_train,x_test,y_test = split_metadata()
   
    train_data = list(zip(x_train,y_train))
    test_data = list(zip(x_test,y_test))
    #random choose 50 samples from training split to train CNN for one trail
    train_trial = random.sample(train_data,50)
    
#     cur_image = "paper_reproduce_dataset/"+"n01443537/images/"+"n01443537_0.JPEG"
#     print_image(imreadconvert(cur_image))
    
#     print_image(train_trial[0][0])
#     print(train_trial[0][1])
#     print_image(train_trial[1][0])
#     print(train_trial[1][1])
#     print_image(train_trial[2][0])
#     print(train_trial[2][1])
    
    cnn_model = run_CNN_trial(train_trial)
    return x_test,y_test,cnn_model 


In [9]:
# K-mean clustering visual concept
from sklearn.cluster import KMeans

from keras import backend as K

#get intermidiate layer output from model
#3rd maxpooling output
def get_int_layer(model,input):
    get_3rd_layer_output = K.function([model.layers[0].input],
                                      [model.layers[8].output])
    layer_output = get_3rd_layer_output([input])[0]
    return layer_output

def extract_VC(layer_output):
    num_sample =  layer_output.shape[1] * layer_output.shape[2]
    input_kmean = layer_output.reshape(num_sample,layer_output.shape[0] * layer_output.shape[3])
    kmeans = KMeans(n_clusters=20,random_state=0).fit(input_kmean)
#     print(kmeans.labels_)
    visual_concepts = kmeans.cluster_centers_
    
    return visual_concepts

def get_VC_dict(cnn_model,x_test,y_test):
    test_data_dict_x=dict()
    test_data_dict_y=dict()
    vc_dict=dict()
    for row in range(x_test.shape[0]):
        if y_test[row] not in test_data_dict_x:
            test_data_dict_x[y_test[row]]=[x_test[row]]
        else:
            test_data_dict_x[y_test[row]].append(x_test[row])
    #for each class use half data to extract VC, half to validate
    test_data_dict_train_x={key:0 for key in test_data_dict_x.keys()}
    
    test_data_dict_valid_x={key:0 for key in test_data_dict_x.keys()}
#     print(test_data_dict_x)
    for key in test_data_dict_x.keys():
        half_len = int(len(test_data_dict_x[key])/2)
        test_data_dict_train_x[key]=test_data_dict_x[key][0:half_len]
        test_data_dict_valid_x[key]=test_data_dict_x[key][half_len::]
    
    vc_dict={key:0 for key in test_data_dict_x.keys()}

    for key in test_data_dict_x.keys():
        layer_output = get_int_layer(cnn_model,test_data_dict_train_x[key])
        vc_dict[key]=extract_VC(layer_output)
        
    return vc_dict,test_data_dict_train_x,test_data_dict_valid_x
# for el in list(test_data_dict_valid_y.values()):
#     print(el)

In [16]:
#distance function for each pixel p in intermediate layer and one Visual Concept
#input shape : f_p:(256,) f_vc:(256,)
from numpy import linalg as LA
def d_p_v(f_p,f_vc):
    f_p=f_p.astype(np.float64)
    f_vc=f_vc.astype(np.float64)
    
    fp_fvc = np.dot(f_p,f_vc)
    norm_fp_fvc = LA.norm(f_p)*LA.norm(f_vc)
    distance = 1- fp_fvc/norm_fp_fvc
    
    return distance

# print(list(vc_dict.values())[0][0])
# print(distance_vc(a,list(vc_dict.values())[0][0]))

#VC-encoding
def thresholding(f_p,f_vc):
    
    f_vc=f_vc.reshape(5,256)
    f_vc = np.mean(f_vc,0)
    threshold = 0.05
    distance_vc = d_p_v(f_p,f_vc)
#     print(distance_vc)
    if distance_vc<threshold:
        return 1
    else:
        return 0
    
    
#Nearest Neighbor similarity
#layer_output is in shape of 64*64

def manhattan_neighbours(x, y, w, h, distance):
    for i in range(x-distance, x+1+distance):
        for j in range(y-distance, y+1+distance):
            if i < 0 or j < 0:
                continue
            if i >= w or j >= h:
                continue
            if i == x and j == y:
                continue
            yield (i, j)
        

def inverse_similarity(input_image,train_image):
    print("inverse_similarity")
    global vc_dict, loaded_model
    input_image = input_image.reshape(1,64,64,3)
    train_image = train_image.reshape(1,64,64,3)

    ps = get_int_layer(loaded_model, input_image)[0]  # b
    pps = get_int_layer(loaded_model, train_image)[0]  # b prime
    x, y = ps.shape[:2]

    vcs = []
    for li in vc_dict.values():
        vcs.extend(li)

    num1 = 0
    denum1 = 0
    num2 = 0
    denum2 = 0
    for vc in vcs:
        for i in range(x):
            for j in range(y):
                b1 = thresholding(ps[i,j,:], vc)
                b2 = thresholding(pps[i,j,:], vc)
                qmax1 = max(thresholding(pps[i2,j2,:], vc) for i2, j2 in manhattan_neighbours(i, j, x, y, 2))
                qmax2 = max(thresholding(ps[i2,j2,:], vc) for i2, j2 in manhattan_neighbours(i, j, x, y, 2))
                num1 += b1 * qmax1
                denum1 += b1
                num2 += b2 * qmax2
                denum2 += b2 
    if denum1==0 :
        denum1=100
    if denum2==0 :
        denum2=100
        
    similarity = 0.5 * (num1/denum1 + num2/denum2)
    if similarity ==0:
        print(similarity)
        return 1/0.001
    return 1/similarity
from sklearn.neighbors import KNeighborsClassifier

def nearest_neighbor(cnn_model,test_data_dict_valid_x,test_data_dict_train_x,vc_dict):
    print("start NN")
    train_x = []
    train_y = []
    test_x =[]
    test_y =[]
    image_classes = list(vc_dict.keys())
    for image_class in image_classes:
        for data_x in test_data_dict_train_x[image_class]:
            train_x.append(data_x)
            train_y.append(image_class)
        for data_x in test_data_dict_valid_x[image_class]:
            test_x.append(data_x)
            test_y.append(image_class)
#     print(len(train_x))
    train_x = train_x[0:15]
    train_y = train_y[0:15]
    test_x = test_x[0:15]
    test_y = test_y[0:15]
    
#     print(train_y,test_y)
    
    nbrs = KNeighborsClassifier(n_neighbors=4, algorithm='ball_tree', metric=inverse_similarity)
    for i in range(len(train_x)):
        el = train_x[i].reshape(train_x[i].shape[0]*train_x[i].shape[1]*train_x[i].shape[2])
        train_x[i]=el
    for i in range(len(test_x)):
        el = test_x[i].reshape(test_x[i].shape[0]*test_x[i].shape[1]*test_x[i].shape[2])
        test_x[i]=el
        
    nbrs.fit(train_x,train_y)
    print("finish fitting")
    return nbrs.score(test_x,test_y)
#     return nbrs.predict(input_image_x.reshape(1,12288))

# def total_accuracy(cnn_model,vc_dict,test_data_dict_valid_x,test_data_dict_train_x):
#     image_classes = list(vc_dict.keys())
#     true_positive = 0
#     total = 0
#     for image_class in image_classes:
#         image_list = test_data_dict_valid_x[image_class]
#         for image_id in range(len(image_list)):
#             pred_class = nearest_neighbor(cnn_model,image_list[image_id],test_data_dict_train_x,vc_dict)
#             if pred_class == image_class:
#                 true_positive += 1
#             total += 1
#     return true_positive/total

In [13]:
#train model
x_test,y_test,cnn_model = train_model()  
save_model(cnn_model, "VC_CNN.json", "VC_CNN.hdf5")

loaded_model = load_model("VC_CNN.json","best_model.hdf5")
x_train,y_train,x_test,y_test = split_metadata()


loading train data!
loaded train data
x_ train .shape = (640, 64, 64, 3)
y_ train .shape = (640,)

loading test data!
loaded test data
x_ test .shape = (160, 64, 64, 3)
y_ test .shape = (160,)
start training CNN model.
Train on 25 samples, validate on 25 samples
Epoch 1/100
Epoch 00000: acc improved from -inf to 0.00000, saving model to best_model.hdf5
Epoch 2/100
Epoch 00001: acc did not improve
Epoch 3/100
Epoch 00002: acc improved from 0.00000 to 0.04000, saving model to best_model.hdf5
Epoch 4/100
Epoch 00003: acc did not improve
Epoch 5/100
Epoch 00004: acc did not improve
Epoch 6/100
Epoch 00005: acc did not improve
Epoch 7/100
Epoch 00006: acc improved from 0.04000 to 0.08000, saving model to best_model.hdf5
Epoch 8/100
Epoch 00007: acc improved from 0.08000 to 0.12000, saving model to best_model.hdf5
Epoch 9/100
Epoch 00008: acc did not improve
Epoch 10/100
Epoch 00009: acc did not improve
Epoch 11/100
Epoch 00010: acc did not improve
Epoch 12/100
Epoch 00011: acc did not impro

Epoch 48/100
Epoch 00047: acc did not improve
Epoch 49/100
Epoch 00048: acc did not improve
Epoch 50/100
Epoch 00049: acc did not improve
Epoch 51/100
Epoch 00050: acc did not improve
Epoch 52/100
Epoch 00051: acc did not improve
Epoch 53/100
Epoch 00052: acc did not improve
Epoch 54/100
Epoch 00053: acc did not improve
Epoch 55/100
Epoch 00054: acc did not improve
Epoch 56/100
Epoch 00055: acc did not improve
Epoch 57/100
Epoch 00056: acc did not improve
Epoch 58/100
Epoch 00057: acc did not improve
Epoch 59/100
Epoch 00058: acc did not improve
Epoch 60/100
Epoch 00059: acc did not improve
Epoch 61/100
Epoch 00060: acc did not improve
Epoch 62/100
Epoch 00061: acc did not improve
Epoch 63/100
Epoch 00062: acc did not improve
Epoch 64/100
Epoch 00063: acc did not improve
Epoch 65/100
Epoch 00064: acc did not improve
Epoch 66/100
Epoch 00065: acc did not improve
Epoch 67/100
Epoch 00066: acc did not improve
Epoch 68/100
Epoch 00067: acc did not improve
Epoch 69/100
Epoch 00068: acc did 

Epoch 99/100
Epoch 00098: acc did not improve
Epoch 100/100
Epoch 00099: acc did not improve
Saved model to disk
Loaded model from disk
loading train data!
loaded train data
x_ train .shape = (640, 64, 64, 3)
y_ train .shape = (640,)

loading test data!
loaded test data
x_ test .shape = (160, 64, 64, 3)
y_ test .shape = (160,)


In [14]:
#Obtain VC dictionary and test dictionary
vc_dict,test_data_dict_train_x,test_data_dict_valid_x=get_VC_dict(loaded_model,x_test,y_test)


In [17]:
#print result accuracy for this trial
# accuracy = total_accuracy(loaded_model,vc_dict,test_data_dict_valid_x,test_data_dict_train_x)
accuracy = nearest_neighbor(loaded_model,test_data_dict_valid_x,test_data_dict_train_x,vc_dict)
print(accuracy)

start NN


KeyboardInterrupt: 