In [2]:
import itertools
import operator

from deap import base, creator, gp, tools
import numpy as np
import cv2
from scipy.spatial.distance import pdist, squareform
import math



def protectedDiv(x, y): return 0 if (y==0) else (x/y)


def codeFunction(*args):
    return args


def CzekanowskiDistance(u, v):
    uv = np.matrix([u, v])
    s=np.sum(uv)
    uv = np.min(uv, axis=0)
    num = 2*np.sum(uv)
    den = np.sum(u) + np.sum(v)
    return 1.0 - 1.0*num/den
    




def FitnessEvaluation(train_samples, feature_size, window_size, toolbox, individual):
    """Individual fitness evaluation. Based on the classification capabilities."""
    kNClasses = len(train_samples)
    kNInstances = len(train_samples[1])

    #>Generate lambda expression of individual being evaluated
    ind_lambda = toolbox.compile(expr=individual)

    #>Compute feature vectors for all the images in the training set based on
    #  current individual lambda and store the results on "train_set".
    train_set = InstancesFeatures(kNClasses*kNInstances, feature_size)
    train_set.populate(ind_lambda, train_samples, window_size)

    #>Compute pdist and label each individual using 1NN
    D = squareform(pdist(train_set.featuresMatrix().transpose()))
    D_cze = squareform(pdist(train_set.featuresMatrix().transpose(),CzekanowskiDistance))

    #>Classify sampled instances using 1NN and computes cluster distances
    db = 0.0
    dw = 0.0
    for i in range(0, len(D)):
        #>Accuracy (fitness term)
        dists_z = D[i]
        dists = np.delete(dists_z, i)
        min_idx = np.argmin(dists)
        # Correct the index shift due to removal of self distance
        if (min_idx >= i):
            min_idx = min_idx + 1
        # Compute and store individual label
        label_idx = min_idx // kNInstances
        train_set.labelInstance(i, train_samples[label_idx][0])

        #>Distance (fitness term)
        dists_cze_z = D_cze[i]
        mod_i = i // kNInstances
        # Separates the distances in blocks from same class and others
        d_same = dists_cze_z[mod_i*kNInstances:(mod_i+1)*kNInstances]
        d_diff = np.delete(dists_cze_z, list(range(mod_i*kNInstances, (mod_i+1)*kNInstances)))
        # Update the distance counters       
        db = db + np.min(d_diff)
        dw = dw + np.max(d_same)

    db = db/(kNClasses*kNInstances)
    dw = dw/(kNClasses*kNInstances)

    # print 'End of Classification:'
    # print train_set.correctClassifications()

    accuracy = train_set.correctClassifications()[1]
    distance = 1.0/(1 + np.exp(-5.0*(db - dw)))

    fitness = 1.0 - (accuracy + distance)/2.0

    return (fitness, )


def FitnessEvaluationMod(train_samples, feature_size, window_size, toolbox,
                        individual):
    """Individual fitness evaluation. Based on the classification capabilities."""
    kTreeMinDepth = 2
    kTreeMaxDepth = 7
    max_len = (2**kTreeMaxDepth-1)*math.log(feature_size,2) + 1
    min_len = (2**kTreeMinDepth-1)*math.log(feature_size,2) + 1


    kNClasses = len(train_samples)
    kNInstances = len(train_samples[1])

    #>Generate lambda expression of individual being evaluated
    ind_lambda = toolbox.compile(individual)

    #>Compute feature vectors for all the images in the training set based on
    #  current individual lambda and store the results on "train_set".
    train_set = InstancesFeatures(kNClasses*kNInstances, feature_size)
    train_set.populate(ind_lambda, train_samples, window_size)

    #>Compute pdist and label each individual using 1NN
    D = squareform(pdist(train_set.featuresMatrix().transpose()))
    D_cze = squareform(pdist(train_set.featuresMatrix().transpose(), CzekanowskiDistance))

    #>Classify sampled instances using 1NN and computes cluster distances
    db = 0.0
    dw = 0.0
    for i in range(0, len(D)):
        #>Accuracy (fitness term)
        dists_z = D[i]
        dists = np.delete(dists_z, i)
        min_idx = np.argmin(dists)
        # Correct the index shift due to removal of self distance
        if (min_idx >= i):
            min_idx = min_idx + 1
        # Compute and store individual label
        label_idx = min_idx // kNInstances
        train_set.labelInstance(i, train_samples[label_idx][0])

        #>Distance (fitness term)
        dists_cze_z = D_cze[i]
        mod_i = i // kNInstances
        # Separates the distances in blocks from same class and others
        d_same = dists_cze_z[mod_i*kNInstances:(mod_i+1)*kNInstances]
        d_diff = np.delete(dists_cze_z, list(range(mod_i*kNInstances, (mod_i+1)*kNInstances)))
        # Update the distance counters       
        db = db + np.min(d_diff)
        dw = dw + np.max(d_same)

    db = db/(kNClasses*kNInstances)
    dw = dw/(kNClasses*kNInstances)

    # print 'End of Classification:'
    # print train_set.correctClassifications()
    

    accuracy = train_set.correctClassifications()[1]
    distance = 1.0/(1 + np.exp(-5.0*(db - dw)))
    density = 1.0*(max_len - len(individual))/(max_len - min_len)

    fitness = 1.0 - (accuracy + distance + density)/3.0

    return (fitness, )



def CreatePrimitiveSet (window_size, code_size):
    """TODO:"""
    #About primitives:
    # input types requires length (use lists)
    # input types use list but arguments are seen as separate elements
    # return type must be a class.
    # return type must be hashable, so lists which are dynamic elements are not allowed.
    kCS = code_size
    kWS = window_size

    pset = gp.PrimitiveSetTyped("EID", itertools.repeat(float, kWS**2), tuple, "P")
    pset.addPrimitive(codeFunction, [float]*kCS, tuple)
    pset.addPrimitive(operator.add, [float, float], float)
    pset.addPrimitive(operator.sub, [float, float], float)
    pset.addPrimitive(operator.mul, [float, float], float)
    pset.addPrimitive(protectedDiv, [float, float], float)

    return pset


def DefineEvolutionToolbox (primitive_set, training_instances, feature_size, window_size):
    """TODO: Parameterize this function so it receives the evolution parameters 
    from file/struct"""
    import math
    kTreeMinDepth = 2
    kTreeMaxDepth = 10
    kTournamentSize = 7

    max_len = (2**kTreeMaxDepth-1)*math.log(feature_size,2) + 1
    min_len = (2**kTreeMinDepth-1)*math.log(feature_size,2) + 1

    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

    tbox = base.Toolbox()
    tbox.register("generate_expr", gp.genHalfAndHalf, pset=primitive_set, 
                  min_=kTreeMinDepth, max_=kTreeMaxDepth)
    tbox.register("generate_ind_tree", tools.initIterate, creator.Individual, 
                  tbox.generate_expr)
    tbox.register("generate_population", tools.initRepeat, list, tbox.generate_ind_tree)
    tbox.register("compile", gp.compile, pset=primitive_set)
    tbox.register("evaluate", FitnessEvaluation, training_instances, feature_size,
                    window_size, tbox )
#     tbox.register("evaluate", FitnessEvaluationMod, training_instances, feature_size,
#                 window_size, tbox, min_len, max_len )
    tbox.register("select", tools.selTournament, tournsize=kTournamentSize)
    tbox.register("mate", gp.cxOnePoint)
    tbox.register("expr_mut", gp.genFull, min_=1, max_=2, type_=float)
    tbox.register("mutate", gp.mutUniform, expr=tbox.expr_mut, pset=primitive_set)
    #enforce size constraint over generated individuals
    tbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"),
                                         max_value=kTreeMaxDepth))
    tbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), 
                                           max_value=kTreeMaxDepth))

    return tbox

In [3]:
import cv2
import numpy as np

class InstancesFeatures:
    def __init__(self, n_instances, n_features):
        self.class_ids_ = np.zeros(shape=n_instances, dtype=int)
        self.class_instances_ = np.zeros(shape=n_instances, dtype=int)
        self.feature_matrix_ = np.zeros(shape=(n_features, n_instances), dtype=int)
        self.label_1nn_ = -1*np.ones(shape=n_instances, dtype=int)

    def populate(self, ind_lambda, sample_instances, window_size):
        
        base_path = r'C:\\Kylberg\\'
        set_idx=0
        for s_i in sample_instances:

            for inst in s_i[1]:
                
                str_idx = indexToString(inst)
                img_path = base_path + str(s_i[0]) + '\\' + str_idx[0]  + str_idx[1] + '.png'
                gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(gray,(115,115), interpolation = cv2.INTER_AREA)
                #TODO: talvez normalizar imagem de entrada.
                np.savetxt('f.csv',img,delimiter=',')
                # Compute patch feature vector
                shape = self.feature_matrix_.shape
                fv = FeatureExtraction(img, ind_lambda, shape[0], window_size)
                # Fill the feature vector matrix
                self.addInstance(set_idx, s_i[0], inst, fv)
                set_idx = set_idx + 1

                

    def addInstance(self, idx, class_id, class_instance, features):
        self.class_ids_[idx] = class_id
        self.class_instances_[idx] = class_instance
        self.feature_matrix_[:,idx] = features

    def labelInstance(self, idx, label):
        self.label_1nn_[idx] = label

    def correctClassifications(self):
        labels = np.zeros(shape=len(self.label_1nn_), dtype=int)
        correct = 0
        total = 0
        for i in range(0, len(labels)):
            if (self.label_1nn_[i] == -1):
                labels[i] = -1
            else:
                total = total + 1
                if self.class_ids_[i] == self.label_1nn_[i]:
                    labels[i] = 1
                    correct = correct + 1

        acc = (1.0*correct)/total

        return (labels, acc)

    def numInstances(self):
        return len(self.class_ids_)

    def featuresVector(self, col_idx):
        return self.feature_matrix_[:, col_idx]

    def featuresMatrix(self):
        return self.feature_matrix_


def indexToString(inst_idx):
    if (inst_idx < 10):
        str_idx = '0' + str(inst_idx)
    else:
        str_idx = str(inst_idx)
    
    return str_idx

def FeatureExtraction(img, individual_lambda, features_len, window_size):
    """TODO: Add comment"""
    kNF = features_len
    kWS = window_size

    img_WH = img.shape
    height_w = img_WH[0] - kWS//2
    width_w = img_WH[1] - kWS//2

    features = np.zeros(kNF, dtype=np.int)
    # iterate over image pixels to fill the feature vector (histogram) 
    for r in range(kWS//2, height_w):
        for c in range(kWS//2, width_w):
            window = LinearWindow(img, kWS, (r,c))
            
            bs = np.array(individual_lambda(*window))
            bs = bs > 0.0
            # print bs 
            
            b = 0
            for bit in bs:
                b = (b << 1) | bit
            # print bin

            features[b] = features[b] + 1

    return features

In [4]:
import numpy as np
import cv2

def LinearWindow(img, window_size, xxx_todo_changeme):
    (row, col) = xxx_todo_changeme
    WS = window_size
    window = img[row-WS//2:row+WS//2+1, col-WS//2:col+WS//2+1]
    l=window.reshape(1, WS**2)[0]
    m=[np.min(l),np.max(l),np.std(l),np.mean(l)]
    stats = random.choices(m, k=WS**2)
    return stats
    
def TESTofScope():
    return 0

__all__ = ['LinearWindow']

In [None]:
import random

from deap import algorithms, gp, tools
import numpy as np

#>PARAMETERS--------------------------------------------
#>Statistics parameters
kNRoundsClasses =               1
kNRoundsInstances =             15           #15
#>Training parameters
kNClassesDataset =              28
kNTrainingClasses =             10           #10
kClassesSize =                  160
kNTrainingInstances =           2
#>Algorithm parameters:
kCodeSize =                     5           #5-7
kWindowSize =                   5           #5
#>Evolution parameters:
kPopSize =                      50           #25
kXOverRate =                    0.8
kMutRate =                      0.2
kElitRate =                     0.01
kMaxGenerations =               20           #30

kNTestInstances =               30
#-------------------------------------------------------

def ComputeAccuracyOverTestSet(best_ind, training_instances, n_test_instances, test_base_idx,
                                features_len, window_size):
    # Transform training instances to feature space
    kNClasses = len(training_instances)
    kNTrainInstances = len(training_instances[1])


    # Compute training features: 1NN base of classification
    train_set = InstancesFeatures(kNClasses*kNTrainInstances, features_len)
    train_set.populate(best_ind, training_instances, window_size)
        
    # Create set of test instances 
    test_instances = []
    for t_i in training_instances:
        test_instances = test_instances + [(t_i[0], list(range(test_base_idx, test_base_idx + n_test_instances)))]

    # Compute the test features
    test_set = InstancesFeatures(kNClasses*n_test_instances, features_len)
    test_set.populate(best_ind, test_instances, window_size)
    test_matrix=test_set.featuresMatrix()
    np.savetxt("test.csv",test_matrix.T,delimiter=",")

    # 1NN classification 
    train_matrix = train_set.featuresMatrix()
    np.savetxt("train.csv",train_matrix.T,delimiter=",")
    dists = np.zeros(shape=train_set.numInstances())
    for i in range(0, test_set.numInstances()):
        for j in range(0, train_set.numInstances()):
            dists[j] = np.linalg.norm(test_set.featuresVector(i) - train_matrix[:,j])
        
        min_idx = np.argmin(dists)
        label_idx = min_idx // kNTrainInstances
        test_set.labelInstance(i, training_instances[label_idx][0])

    return test_set.correctClassifications()[1]

kFeatureSize = 2**kCodeSize
print('Parameters: \n(classes = ' + str(kNTrainingClasses) + ' code = ' + str(kCodeSize) + ' pop = ' +  str(kPopSize) + ' gen = ' + str(kMaxGenerations) + ' tests_insts = ' + str(kNTestInstances) + ')\n')

classes = list(range(1, kNClassesDataset+1))

#>Iterate over classes
for n_cl in range(0, kNRoundsClasses):
    # Randomize classes to use on experiment
    sample_classes = random.sample(classes, kNTrainingClasses)
    sample_classes.sort()

    # For each selected class, randomize the training instances
    sample_instances = []
    for i in sample_classes:
        sample_instances = sample_instances + \
            [(i, random.sample(list(range(0, kClassesSize//2)), kNTrainingInstances))]
#     sample_instances = [(3, [17, 31]), (24, [6, 45]), (9, [5, 15]), (18, [14, 35]), (23, [26, 43]), (19, [4, 45]), (7, [14, 21]), (17, [21, 27]), (10, [23, 37]), (12, [10, 7])]
    print(sample_instances)

    #>Define Evolution framework
    pset = CreatePrimitiveSet(kWindowSize, kCodeSize)
    tbox = DefineEvolutionToolbox(pset, sample_instances, kFeatureSize, kWindowSize)

    #>Define Log structure
    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(lambda ind: ind.height)
    mstats = tools.MultiStatistics(fitness=stats_fit, height=stats_size)
    mstats.register("avg", np.mean)
    mstats.register("std", np.std)
    mstats.register("min", np.min)
    mstats.register("max", np.max)

    #>RERUN THE ALGORITHM WITH DIFFERENT SEEDS
    accs = np.zeros(shape=kNRoundsInstances) 
    for n_in in range(0, kNRoundsInstances):
        print('\nIteration #: ', n_in)
        # Generate population
        pop = tbox.generate_population(kPopSize)
        hof = tools.HallOfFame(1)

        # Evolutionary algorithm call
        pop, log = algorithms.eaSimple(pop, tbox, kXOverRate, kMutRate, kMaxGenerations,
                                    stats=mstats, halloffame=hof, verbose=True)
        # ACTIVATE TRY CATCH BLOCK AFTER DEBUG IS COMPLETE (AVOID LIBRARY STABILITY ISSUES)
        # try: 
        #     pop, log = algorithms.eaSimple(pop, tbox, kXOverRate, kMutRate, kMaxGenerations,
        #                                 stats=mstats, halloffame=hof, verbose=True)
        # except:
        #     print 'The evolutionary algorithm failed to evolve'
        #     continue

        print('\nBest individual:\n', hof[0])

        # fh_log = open('./../results/' + str(n_cl) + '_' + str(n_in) + '.log', 'w')
        # fh_hof = open('./../results/' + str(n_cl) + '_' + str(n_in) + '.hof', 'w')
        # pickle.dump(log, fh_log)
        # pickle.dump(hof[0], fh_hof)
        # fh_log.close()
        # fh_hof.close()

        ind_lambda = tbox.compile(expr=hof[0])
        accs[n_in] = ComputeAccuracyOverTestSet(ind_lambda, sample_instances, kNTestInstances,
                                             kClassesSize//2, kFeatureSize, kWindowSize)
        print('Accuracy = ', accs[n_in])

    print('Accuracies = ', accs)
    print('Mean = ', np.mean(accs))
    print('Std Dev = ', np.std(accs))

Parameters: 
(classes = 10 code = 5 pop = 50 gen = 20 tests_insts = 30)

[(3, [69, 26]), (4, [42, 18]), (10, [36, 59]), (15, [42, 63]), (17, [54, 30]), (19, [5, 50]), (21, [4, 57]), (22, [66, 12]), (25, [45, 30]), (26, [63, 69])]

Iteration #:  0




   	      	                        fitness                         	             height            
   	      	--------------------------------------------------------	-------------------------------
gen	nevals	avg     	max     	min     	std     	avg 	max	min	std    
0  	50    	0.707854	0.754621	0.600365	0.043393	4.72	10 	2  	2.60799
1  	38    	0.67256 	0.752797	0.550254	0.0429132	4.6 	10 	2  	1.76635
2  	44    	0.6603  	0.754086	0.574937	0.0445226	4.42	9  	2  	1.28203
3  	43    	0.648946	0.753449	0.500091	0.0653148	4.34	8  	2  	1.47797
4  	41    	0.626251	0.754068	0.47363 	0.0714086	5.42	9  	2  	1.62592
5  	48    	0.638256	0.754415	0.52431 	0.0591853	6.26	8  	3  	1.61009
6  	41    	0.617115	0.728206	0.524505	0.0521837	6.2 	10 	3  	1.86548
7  	43    	0.613055	0.703979	0.449995	0.059327 	6.86	10 	3  	1.37128
8  	45    	0.617334	0.727777	0.449995	0.0649456	7   	10 	4  	1.29615
9  	42    	0.58867 	0.703066	0.448537	0.0722806	6.76	9  	1  	1.60698
10 	43    	0.590525	0.70089 	0.448537	0.064