# COMP 551 final project code
paper: Unleashing the Potential of CNNs for Interpretable Few-Shot Learning

## data loading 
meta training set: 64 classes - training CNN
meta testing set: 20 classes - extract VC and validate
meta validation set: 16 classes - Not being used

On each trial (5-shot)
- Use 25 samples(5 classes) from training split to train CNN to achieve high accuracy.
- Use 25 samples(5 classes) from train subset of the testing split to extract VC
- Use 15 samples for each class(5 classes) form test subset of the testing split to classify images using VC.

10 trials:

desired output file: 

training set : (64 classes in total)
- train_x_1.csv : 5 sample each of 5 classes , train_y_1.csv : 5 sample for each of 5 classes
- test_x_1.csv : 5 sample each of 3 classes, test_y_1.csv : 5 sample each of 3 classes

- train_x_2.csv.....
- test_x_2.csv
....
....
- train_x_10.csv.....
- test_x_10.csv

testing set : (20 classes in total)
- train_x_1.csv : 5 sample each of 5 classes , train_y_1.csv : 5 sample for each of 5 classes
- test_x_1.csv : 5 sample each of 3 classes, test_y_1.csv : 5 sample each of 3 classes

- train_x_2.csv.....
- test_x_2.csv
....
....
- train_x_10.csv.....
- test_x_10.csv

the mini-imagenet structure
```
paper_reproduce_dataset/train/n01882714/images/n01882714_0.jpeg
```

use load_labels() to controll the number of class uses in a trial
then use load_data to get train_x, train_y



In [1]:
#helper function to import data
import re

def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)
    return l

#load data
import skimage
from skimage.io import ImageCollection,concatenate_images,imread

from skimage.color import gray2rgb
import numpy as np
from skimage import io
from os import listdir
from os.path import isfile, join


def print_image(data):
    from matplotlib import pyplot as plt
    plt.imshow(data, interpolation='nearest')
    plt.show()

def load_labels():
    labels = []
    with open("paper_reproduce_dataset/wnids.txt","r") as input:
        for row in input:
            labels.append(row[0:9])
    return labels

def imreadconvert(Xname):
    
    X=imread(Xname)
    if len(X.shape)==3:
        return X
    else:
        return gray2rgb(X)  
    
def load_data(label_dict,dataset):
    print ("loading",dataset, "data!")
    nsamples=10 #5 images for each of 200 labels
    file_names=[]
    labels=[] 
    
    for label in label_dict:
        #print str(label) 
        cur_dir="paper_reproduce_dataset/"+label+"/images"
        onlyfiles = [f for f in sort_nicely(listdir(cur_dir)) if isfile(join(cur_dir, f))]
        onlyfiles = random.sample(onlyfiles,nsamples)
        onlyfiles=[cur_dir+'/'+f for f in onlyfiles]
        file_names=file_names+onlyfiles    
        cur_labels=nsamples*[label]
        labels=labels+cur_labels
    image_collect = ImageCollection(file_names,load_func=imreadconvert)
    x_data = concatenate_images(image_collect)   
    print ("loaded",dataset, "data")
    y_data=np.asarray(labels)
    y_data=np.reshape(y_data,(len(y_data),))
   
    print("x_",dataset,".shape =",x_data.shape)
    print("y_",dataset,".shape =",y_data.shape)
   
    return x_data,y_data 
        

    

In [2]:
#VGG model structure
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten,Activation
from keras.layers import Conv2D, MaxPooling2D , AveragePooling2D,ZeroPadding2D


def VGG_13(num_class):
    model = Sequential()
    
    
    model.add(Conv2D(64, (3, 3), activation="relu",padding="same",input_shape=(64,64,3)))
    model.add(Conv2D(64, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(128, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(128, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Conv2D(256, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(256, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu",padding="same"))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_class, activation='softmax'))

#     if weights_path:
#         model.load_weights(weights_path)

    return model



Using TensorFlow backend.


In [3]:
#data split and train model function
import random

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import LabelBinarizer

from keras import optimizers

def split_metadata():
    label_dict=load_labels()
    train_split_label = label_dict[0:64]
    test_split_label = label_dict[64:80]
    
    #spliting dataset to 64 training class with 10 samples per class
    #and 16 testing class with 10 samples per class
    x_train,y_train = load_data(train_split_label,"train")
    print()
    x_test,y_test = load_data(test_split_label,"test")
    return x_train,y_train,x_test,y_test

def run_CNN_trial(train_trial):
    #set hyper param
    epochs = 30
    learning_rate = 0.01
    
    train_trial = list(zip(*train_trial))
    x_tr = np.asarray(train_trial[0])
    y_tr = np.asarray(train_trial[1])
    
    #preprocessing training data
    lb = preprocessing.LabelBinarizer()
    onehot_y_tr = lb.fit_transform(y_tr)
    num_class = onehot_y_tr.shape[1]
    x_train_tr,x_test_tr,y_train_tr,y_test_tr = train_test_split(x_tr, onehot_y_tr, test_size=0.5, random_state=42)
    
    cnn_model = VGG_13(num_class)
    print("start training CNN model.")
    
    optimizer = optimizers.SGD(lr=learning_rate)
    cnn_model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    cnn_model.fit(x_train_tr, y_train_tr,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test_tr, y_test_tr))

    score = cnn_model.evaluate(x_test_tr, y_test_tr, verbose=0)
    
#     print('Test loss:', score[0])
#     print('Test accuracy:', score[1])
    
#     print("finish training CNN model.")
    
    return cnn_model

def train_model():
    
    x_train,y_train,x_test,y_test = split_metadata()
   
    train_data = list(zip(x_train,y_train))
    test_data = list(zip(x_test,y_test))
    #random choose 50 samples from training split to train CNN for one trail
    train_trial = random.sample(train_data,50)
    
#     cur_image = "paper_reproduce_dataset/"+"n01443537/images/"+"n01443537_0.JPEG"
#     print_image(imreadconvert(cur_image))
    
#     print_image(train_trial[0][0])
#     print(train_trial[0][1])
#     print_image(train_trial[1][0])
#     print(train_trial[1][1])
#     print_image(train_trial[2][0])
#     print(train_trial[2][1])
    
    cnn_model = run_CNN_trial(train_trial)
    return x_test,y_test,cnn_model 

from keras import backend as K

#get intermidiate layer output from model
#3rd maxpooling output
def get_int_layer(model,input):
    get_3rd_layer_output = K.function([model.layers[0].input],
                                      [model.layers[8].output])
    layer_output = get_3rd_layer_output([input])[0]
    return layer_output

In [4]:
#train model
x_test,y_test,cnn_model = train_model()   

loading train data!
loaded train data
x_ train .shape = (640, 64, 64, 3)
y_ train .shape = (640,)

loading test data!
loaded test data
x_ test .shape = (160, 64, 64, 3)
y_ test .shape = (160,)
start training CNN model.
Train on 25 samples, validate on 25 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [14]:
print(y_test)
layer_output = get_int_layer(cnn_model,x_test[0:10])
print(x_test.shape)
print(layer_output.shape)
cnn_model.summary()

['n03796401' 'n03796401' 'n03796401' 'n03796401' 'n03796401' 'n03796401'
 'n03796401' 'n03796401' 'n03796401' 'n03796401' 'n02795169' 'n02795169'
 'n02795169' 'n02795169' 'n02795169' 'n02795169' 'n02795169' 'n02795169'
 'n02795169' 'n02795169' 'n02123045' 'n02123045' 'n02123045' 'n02123045'
 'n02123045' 'n02123045' 'n02123045' 'n02123045' 'n02123045' 'n02123045'
 'n01855672' 'n01855672' 'n01855672' 'n01855672' 'n01855672' 'n01855672'
 'n01855672' 'n01855672' 'n01855672' 'n01855672' 'n01882714' 'n01882714'
 'n01882714' 'n01882714' 'n01882714' 'n01882714' 'n01882714' 'n01882714'
 'n01882714' 'n01882714' 'n02917067' 'n02917067' 'n02917067' 'n02917067'
 'n02917067' 'n02917067' 'n02917067' 'n02917067' 'n02917067' 'n02917067'
 'n02988304' 'n02988304' 'n02988304' 'n02988304' 'n02988304' 'n02988304'
 'n02988304' 'n02988304' 'n02988304' 'n02988304' 'n04398044' 'n04398044'
 'n04398044' 'n04398044' 'n04398044' 'n04398044' 'n04398044' 'n04398044'
 'n04398044' 'n04398044' 'n02843684' 'n02843684' 'n

In [22]:
# K-mean clustering visual concept
from sklearn.cluster import KMeans

num_feature = layer_output.shape[1] * layer_output.shape[2] * layer_output.shape[3]
# input_kmean = layer_output.reshape(layer_output.shape[0],num_feature)
input_kmean = layer_output.reshape(640,256)
# input_kmean[input_kmean == np.inf] = 0
# input_kmean[input_kmean == -np.inf] = 0


kmeans = KMeans(random_state=0).fit(input_kmean)
print(kmeans.labels_)
visual_concepts = kmeans.cluster_centers_
print(visual_concepts.shape)

[6 6 2 2 2 2 2 6 6 2 1 1 1 1 2 6 5 3 3 3 3 1 2 6 3 0 7 7 0 1 2 6 3 0 7 7 0
 1 2 6 5 3 3 3 3 5 5 1 5 3 3 0 0 0 3 5 1 5 5 5 5 5 5 5 2 1 1 2 1 1 1 1 2 1
 1 2 2 1 5 1 5 3 3 5 5 5 5 1 5 0 0 0 0 0 0 5 1 5 3 0 0 0 0 5 2 1 1 5 5 5 5
 1 2 1 1 1 1 1 2 2 6 2 2 2 2 2 2 2 3 0 0 7 7 7 7 0 5 3 0 7 7 4 4 7 2 1 5 3
 7 4 4 0 2 1 5 3 7 4 4 0 2 1 5 3 0 7 7 0 1 1 5 5 5 3 0 0 3 7 7 7 0 0 7 7 0
 7 7 7 7 7 7 0 2 1 5 5 0 4 4 7 2 5 3 0 7 4 4 4 6 1 3 0 7 4 4 4 6 1 3 3 0 7
 4 7 6 1 5 3 3 0 7 0 6 1 5 5 5 5 3 5 6 2 1 2 2 2 2 2 6 6 6 6 6 2 2 2 1 5 5
 5 1 2 6 6 3 7 7 7 0 1 6 6 0 7 7 7 0 5 2 6 0 0 3 3 5 3 3 5 5 5 5 5 5 3 0 3
 1 1 1 1 1 1 1 2 1 5 5 5 5 5 1 1 1 5 5 5 5 1 1 1 0 7 7 0 3 5 1 1 7 4 7 0 3
 3 5 1 0 7 0 3 3 3 1 2 3 0 0 0 0 0 5 2 3 0 0 0 0 0 5 2 5 3 3 3 5 5 1 2 1 5
 5 5 5 5 5 1 1 5 5 5 5 5 5 1 2 2 6 6 6 6 6 6 2 2 6 2 1 1 2 2 1 1 5 3 0 3 1
 2 5 0 7 7 7 0 1 2 5 0 7 7 3 3 1 2 5 3 3 3 3 5 1 2 5 3 5 5 5 5 2 2 5 5 1 1
 1 1 2 6 3 0 0 3 7 4 4 7 0 7 7 0 7 4 4 7 0 7 7 0 0 7 7 0 3 7 0 3 3 3 5 1 3
 0 3 5 3 3 3 5 5 3 5 2 1 

In [18]:
#distance
def distance_vc(input_image,)
#VC-encoding

#Nearest Neighbor