In [2]:
import string
from deap import base
from deap import creator
from deap import tools
import random
import numpy as np

In [1]:
def cxTwoPointCopy(ind1, ind2):
    """Execute a two points crossover with copy on the input individuals. The
    copy is required because the slicing in numpy returns a view of the data,
    which leads to a self overwritting in the swap operation. It prevents
    ::
    
        >>> import numpy
        >>> a = numpy.array((1,2,3,4))
        >>> b = numpy.array((5,6,7,8))
        >>> a[1:3], b[1:3] = b[1:3], a[1:3]
        >>> print(a)
        [1 6 7 4]
        >>> print(b)
        [5 6 7 8]
    """
    size = len(ind1)
    cxpoint1 = random.randint(1, size)
    cxpoint2 = random.randint(1, size - 1)
    if cxpoint2 >= cxpoint1:
        cxpoint2 += 1
    else: # Swap the two cx points
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1

    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] \
        = ind2[cxpoint1:cxpoint2].copy(), ind1[cxpoint1:cxpoint2].copy()
        
    return ind1, ind2

In [3]:
from NCF_clustering_fusion import compute_solutions_complexity, check_correlative_cluster_labels

In [10]:
V1 = np.array([0,0,0,1,1,2,2,2,2,1], dtype=np.int)
K_V1 = np.unique(V1).shape[0]
V2 = np.array([0,0,1,0,0,1,1,2,2,2], dtype=np.int)
K_V2 = np.unique(V2).shape[0]

compute_solutions_complexity(np.array([0,0,0,0,0,0,0,0,0,0], dtype=np.int),V1, 1, K_V1)

(array([2, 0, 0]), {0: 0, 1: 0, 2: 0}, 12.0)

In [4]:
def repairCorrelative(A):
    current_ids = np.unique(A)
    L = len(current_ids)
    correct_ids = np.arange(L)
    for i,j in zip(current_ids, correct_ids):
        if i != j:
            A[np.where(A == i)[0]] = j

In [None]:
A = np.array([3,0,3,1,1,2,0])
print(A)
repairCorrelative(A)
print(A)

In [11]:
def evalMatching(C, A=None, B=None, K_A=None, K_B=None):
    #K_A = np.unique(A).shape[0] #can be computed outside
    #K_B = np.unique(B).shape[0] #can be computed outside
    repairCorrelative(C)
    K_C = np.unique(C).shape[0]

    K_C_A = compute_solutions_complexity(C, A, K_C, K_A)[2]
    K_C_B = compute_solutions_complexity(C, B, K_C, K_B)[2]
    
    K_A_C = compute_solutions_complexity(A, C, K_A, K_C)[2]
    K_B_C = compute_solutions_complexity(B, C, K_B, K_C)[2]
    
    return K_C_A + K_C_B + K_A_C + K_B_C,

In [12]:
creator.create("Fitness", base.Fitness, weights=(-1.0,))
creator.create("Individual", np.ndarray, fitness=creator.Fitness)

V1 = np.array([0,0,0,1,1,2,2,2,2,1], dtype=np.int)
K_V1 = np.unique(V1).shape[0]
V2 = np.array([0,0,1,0,0,1,1,2,2,2], dtype=np.int)
K_V2 = np.unique(V2).shape[0]

NCLUSTERS = 3
NPTS = V1.shape[0]


tb1 = base.Toolbox()
tb1.register("attr_item", random.randint, 0, NCLUSTERS) # each gene corresponds to a chr
tb1.register("individual", tools.initRepeat, creator.Individual, tb1.attr_item, NPTS)
tb1.register("population", tools.initRepeat, list, tb1.individual)

tb1.register("evaluate", evalMatching, A=V2, B=V1, K_A=K_V1, K_B=K_V2)
tb1.register("mate", cxTwoPointCopy)
tb1.register("mutate", tools.mutFlipBit, indpb=0.05)
tb1.register("select", tools.selTournament, tournsize=3)



In [13]:
from deap import algorithms

def simple_run():

    random.seed(64)
    
    pop = tb1.population(n=300)
    
    # Numpy equality function (operators.eq) between two arrays returns the
    # equality element wise, which raises an exception in the if similar()
    # check of the hall of fame. Using a different equality function like
    # numpy.array_equal or numpy.allclose solve this issue.
    hof = tools.HallOfFame(1, similar=np.array_equal)
    
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    algorithms.eaSimple(pop, tb1, cxpb=0.5, mutpb=0.2, ngen=40, stats=stats,
                        halloffame=hof)

    return pop, stats, hof

In [14]:
p,s,h = simple_run()

gen	nevals	avg   	std    	min	max
0  	300   	148.44	26.1972	68 	203
1  	168   	130.327	23.5348	80 	178
2  	193   	114.093	18.5978	68 	168
3  	162   	102.897	13.6845	64 	150
4  	177   	95.5067	13.6422	56 	136
5  	174   	91.0833	13.5495	64 	140
6  	161   	85.3467	13.4719	64 	132
7  	185   	82.1067	15.4933	56 	132
8  	176   	79     	15.8531	56 	132
9  	178   	77.3467	17.7704	56 	136
10 	176   	71.8667	15.3773	56 	140
11 	169   	69.2533	17.5066	56 	140
12 	167   	65.5467	16.5234	56 	132
13 	191   	60.9733	12.7148	56 	128
14 	176   	57.92  	7.12883	56 	120
15 	176   	57.4933	6.08468	56 	100
16 	178   	57.1067	5.25629	56 	112
17 	173   	57.12  	4.52831	56 	92 
18 	176   	57.44  	5.55755	56 	100
19 	164   	57.2   	4.33897	56 	80 
20 	190   	57.4267	5.06405	56 	88 
21 	192   	57.6933	6.4961 	56 	112
22 	184   	57.8133	5.76123	56 	96 
23 	194   	57.4   	5.81607	56 	108
24 	177   	57.9733	7.82129	56 	136
25 	192   	57.68  	5.99146	56 	112
26 	182   	57.2667	5.59484	56 	112
27 	174   	57.4667	4.9

In [15]:
best_ind = tools.selBest(p, 1)[0]
print("Best individual is %s , %s" % (best_ind, best_ind.fitness.values))

Best individual is [2 2 0 1 1 0 0 0 0 1] , (56.0,)
