# BUILD DATABASE

In [4]:
import os
#importem la funcio get_params de params (funcio params)
from params import get_params

# definim la funcio build_database
def build_database(params):

    # List images
    # llista les imatges dins del path root(GDSA)->database(TB2016) per als grups de train, val i test
    image_names = os.listdir(os.path.join(params['root'],
                             params['database'],params['split'],'images'))

    # File to be saved
    # crea el fitxer a la carpeta save dins de root, escriu la llista d'imatges
    # i li dona el nom de split(train, val o test).txt
    file = open(os.path.join(params['root'],params['root_save'],
                             params['image_lists'],
                             params['split'] + '.txt'),'w')

    # Save image list to disk
    # guarda el fitxer creat anteriorment i el tanca
    for imname in image_names:
        file.write(imname + "\n")
    file.close()

if __name__=="__main__":

    # crida a la funcio get_params dins de params.py
    # obtenim tots els parametres necesaris per cridar la funcio build_database
    params = get_params()

    # cridem la funcio build_database  amb la que creem el fitxer de la llista d'imatges per cada grup
    # train, val i test
    for split in ['train','val','test']:
        params['split'] = split
        build_database(params)

# GET FEATURES

In [2]:
from params import get_params
import sys

# We need to add the source code path to the python path if we want to call modules such as 'utils'
params = get_params()
sys.path.insert(0,params['src'])

from utils.rootsift import RootSIFT
import os, time
import numpy as np
import pickle
import cv2
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import normalize, StandardScaler
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings("ignore")


def get_features(params,pca=None,scaler=None):

    # Read image names
    readfile = os.path.join(params['root'],params['root_save'],
                            params['image_lists'],params['split'] + '.txt')
    with open(readfile,'r') as f:
        image_list = f.readlines()

    # Initialize keypoint detector and feature extractor
    detector, extractor = init_detect_extract(params)

    # Initialize feature dictionary
    features = {}

    # Get trained codebook
    km = pickle.load(open(os.path.join(params['root'],params['root_save'],
                                     params['codebooks_dir'],'codebook_'
                                     + str(params['descriptor_size']) + "_"
                                     + params['descriptor_type']
                                     + "_" + params['keypoint_type'] + '.cb')
                                     ,'rb'))

    for image_name in image_list:

        # Read image
        im = cv2.imread(os.path.join(params['root'],params['database'],
                                     params['split'],
                                     'images',image_name.rstrip()))

        # Resize image
        im = resize_image(params,im)

        # Extract local features
        feats = image_local_features(im,detector,extractor)

        if feats is not None:

            if params['normalize_feats']:
                feats = normalize(feats)

            # If we scaled training features
            if scaler is not None:
                scaler.transform(feats)

            # Whiten if needed
            if pca is not None:

                pca.transform(feats)

            # Compute assignemnts
            assignments = get_assignments(km,feats)

            # Generate bow vector
            feats = bow(assignments,km)
        else:
            # Empty features
            feats = np.zeros(params['descriptor_size'])

        # Add entry to dictionary
        features[image_name] = feats


    # Save dictionary to disk with unique name
    save_file = os.path.join(params['root'],params['root_save'],
                             params['feats_dir'],
                             params['split'] + "_" +
                             str(params['descriptor_size']) + "_"
                             + params['descriptor_type'] + "_"
                             + params['keypoint_type'] + '.p')

    pickle.dump(features,open(save_file,'wb'))


def resize_image(params,im):

    # Get image dimensions
    height, width = im.shape[:2]

    # If the image width is smaller than the proposed small dimension,
    # keep the original size !
    resize_dim = min(params['max_size'],width)

    # We don't want to lose aspect ratio:
    dim = (resize_dim, height * resize_dim/width)

    # Resize and return new image
    return cv2.resize(im,dim)

def image_local_features(im,detector,extractor):

    '''
    Extract local features for given image
    '''

    positions = detector.detect(im,None)
    positions, descriptors = extractor.compute(im,positions)

    return descriptors

def init_detect_extract(params):

    '''
    Initialize detector and extractor from parameters
    '''
    if params['descriptor_type'] == 'RootSIFT':

        extractor = RootSIFT()
    else:
        extractor = cv2.xfeatures2d.SIFT_create()
        #extractor = cv2.DescriptorExtractor_create(params['descriptor_type'])

    #detector = cv2.FeatureDetector_create(params['keypoint_type'])
    detector = cv2.xfeatures2d.SIFT_create()
    return detector, extractor

def stack_features(params):

    '''
    Get local features for all training images together
    '''

    # Init detector and extractor
    detector, extractor = init_detect_extract(params)

    # Read image names
    readfile = os.path.join(params['root'],params['root_save'],
                            params['image_lists'],params['split'] + '.txt')

    with open(readfile,'r') as f:
        image_list = f.readlines()

    X = []
    for image_name in image_list:

        # Read image
        im = cv2.imread(os.path.join(params['root'],
                        params['database'],params['split'],
                        'images',image_name.rstrip()))

        # Resize image
        im = resize_image(params,im)

        feats = image_local_features(im,detector,extractor)
        # Stack all local descriptors together

        if feats is not None:
            if len(X) == 0:

                X = feats
            else:
                X = np.vstack((X,feats))

    if params['normalize_feats']:
        X = normalize(X)

    if params['whiten']:

        pca = PCA(whiten=True)
        pca.fit_transform(X)

    else:
        pca = None

    # Scale data to 0 mean and unit variance
    if params['scale']:

        scaler = StandardScaler()

        scaler.fit_transform(X)
    else:
        scaler = None

    return X, pca, scaler

def train_codebook(params,X):

    # Init kmeans instance
    km = MiniBatchKMeans(params['descriptor_size'])

    # Training the model with our descriptors
    km.fit(X)

    # Save to disk
    pickle.dump(km,open(os.path.join(params['root'],params['root_save'],
                                     params['codebooks_dir'],'codebook_'
                                     + str(params['descriptor_size']) + "_"
                                     + params['descriptor_type']
                                     + "_" + params['keypoint_type'] + '.cb'),
                                     'wb'))

    return km

def get_assignments(km,descriptors):

    assignments = km.predict(descriptors)

    return assignments


def bow(assignments,km):

    # Initialize empty descriptor of the same length as the number of clusters
    descriptor = np.zeros(np.shape(km.cluster_centers_)[0])

    # Build vector of repetitions
    for a in assignments:

        descriptor[a] += 1

    # L2 normalize
    descriptor = descriptor.reshape(1, -1)
    descriptor = normalize(descriptor)

    return descriptor

if __name__ == "__main__":

    params = get_params()

    # Change to training set
    params['split'] = 'train'

    print "Stacking features together..."
    # Save features for training set
    t = time.time()
    X, pca, scaler = stack_features(params)
    print "Done. Time elapsed:", time.time() - t
    print "Number of training features", np.shape(X)

    print "Training codebook..."
    t = time.time()
    train_codebook(params,X)
    print "Done. Time elapsed:", time.time() - t

    for split in ['train','val','test']:
        params['split'] = split
        print "Storing bow features for %s set..."%(params['split'])
        t = time.time()
        get_features(params, pca,scaler)
        print "Done. Time elapsed:", time.time() - t

Stacking features together...
Done. Time elapsed: 115.938817978
Number of training features (429889, 128)
Training codebook...
Done. Time elapsed: 217.238002062
Storing bow features for train set...
Done. Time elapsed: 166.634321928
Storing bow features for val set...
Done. Time elapsed: 98.351831913
Storing bow features for test set...
Done. Time elapsed: 164.082720995


# RANKING

In [3]:
import os
import pickle
import numpy as np
from params import get_params
from sklearn.metrics.pairwise import pairwise_distances

def rank(params):

    train_features = pickle.load(open(os.path.join(params['root'],
                                 params['root_save'],params['feats_dir'],
                                 'train' + "_" + str(params['descriptor_size'])
                                 + "_" + params['descriptor_type'] +
                                 "_" + params['keypoint_type'] + '.p'),'rb'))

    for split in ['val','test']:
        features = pickle.load(open(os.path.join(params['root'],
                                   params['root_save'],params['feats_dir'],
                                   split + "_" +
                                   str(params['descriptor_size']) + "_"
                                   + params['descriptor_type'] + "_"
                                   + params['keypoint_type'] + '.p'),'rb'))


        # For each image id in the validation set
        for id in features.keys():

            # Get its feature
            bow_feats = features[id]

            # The ranking is composed with the ids of all training images
            ranking = train_features.keys()

            X = np.array(train_features.values())

            # The .squeeze() method reduces the dimensions of an array to the
            # minimum. E.g. if we have a numpy array of shape (400,1,100)
            # it will transform it to (400,100)
            distances = pairwise_distances(bow_feats,X.squeeze())


            # Sort the ranking according to the distances.
            # We convert 'ranking' to numpy.array to sort it, and then back to list
            # (although we could leave it as numpy array).
            ranking = list(np.array(ranking)[np.argsort(distances.squeeze())])

            # Save to text file
            outfile = open(os.path.join(params['root'],params['root_save'],
                           params['rankings_dir'],params['descriptor_type'],
                           split,id.split('.')[0] + '.txt'),'w')

            for item in ranking:
                outfile.write(item.split('.')[0] + '\n')
            outfile.close()

if __name__ == "__main__":

    params = get_params()
    rank(params)

IOError: [Errno 2] No such file or directory: '/home/aleix/Documentos/Q5/GDSA/save/rankings/SIFT/test/wgsoteylpq.txt'

In [None]:
# Ens dona un error que no sabem a què es degut.