In [None]:
import numpy as np
import os
import pandas as pd
import cv2
from tqdm import tqdm
import pickle
import scipy.cluster.hierarchy as sch

In [None]:
# Directory of the database
datapath='Data2'

In [None]:
def getFilenames(root):
    """This method generates a list of all filenames given a root directory.
        """
    files=[os.path.join(path, name) for path, subdirs, files in os.walk(root) for name in files]
    files=[k for k in files if 'JPG' in k]
    return files

In [None]:
def buildDataframe(filenames):
    """This method generates a dataframe given the filenames"""
    data=[]
    for file in filenames:
        list = file.split('/')
        type=list[1]
        filepath=file
        Object=list[2].split('_')[0]
        Sample=list[2].split('_')[1].split('.')[0]
        data.append([type,Object,Sample,filepath])
    df = pd.DataFrame(data,columns=['Type','Object','Sample','filepath'])
    return df


In [None]:
def extractSIFTfeatures(imagepath,nfeatures=500):
    """ This method returns specified number of SIFT keypoints and descriptors for a given image path"""
    image=cv2.imread(imagepath)  
    image_rgb= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sift = cv2.xfeatures2d.SIFT_create(nfeatures=nfeatures)	
    kp, des = sift.detectAndCompute(image_gray,None)
    return kp,des



In [None]:
filenames=getFilenames(datapath)

In [None]:
df=buildDataframe(filenames)

In [None]:
def featuresFromDataframe(df,nfeatures=500):
    """ 
    This method should iterate the dataframe, extract SIFT features , return a dictionary in the form d[obj1]=[(sampl1,kp1,des1),(sampl2,kp2,des2),(sampl3,kp3,des3)]}
    """
    dictionary={}
    kp_des=[]
    for index,row in tqdm(df.iterrows()):
        # print(row['filepath'])
        imagepath=row['filepath']
        key=row['Object']
        sample=row['Sample']
        kp,des=extractSIFTfeatures(imagepath,nfeatures)
        if key in dictionary.keys():
            dictionary[key].append((sample,kp,des))   
        else:
            dictionary[key]=[(sample,kp,des)]
    return dictionary

In [None]:
def removeKpFromDict(dic):
    """ Removes keypoints from already generated dicts to not have to do it again :)"""
    new_dict = {}
    for key in dic.keys():
        new_dict[key] = []
        for sample in dic[key]:
            sample , _ , des = sample
            new_dict[key].append((sample,des))
    return new_dict


## Image Feature Extraction


In [None]:
df_client=df[df['Type']=='client']
df_server=df[df['Type']=='server']

In [None]:
dict_server=featuresFromDataframe(df_server,1000)

149it [06:53,  2.78s/it]


In [None]:
dict_client=featuresFromDataframe(df_client,1000)

50it [02:11,  2.63s/it]


In [None]:
server_dict_no_kp = removeKpFromDict(dict_server)
client_dict_no_kp = removeKpFromDict(dict_client)


In [None]:
np.save('server_dict_no_kp_1000.npy',server_dict_no_kp)
np.save('client_dict_no_kp_1000.npy',client_dict_no_kp)

## Combine features for training

In [None]:
train_features=[]
_=0
for object in server_dict_no_kp.keys():
    for sample in range(len(server_dict_no_kp[object])):
        if _==0:
            train_features=server_dict_no_kp[object][sample][1]
            _+=1
        else:
            train_features=np.vstack((train_features,server_dict_no_kp[object][sample][1]))



In [None]:
np.save('train_features_1000.npy',train_features)