In [91]:
from pyeasyga import pyeasyga
import random 
from itertools import zip_longest
from copy import deepcopy

pics = []
# file = 'data/a_example.txt'
# file = 'data/b_lovely_landscapes.txt'
# file = 'data/c_memorable_moments.txt'
file = 'data/d_pet_pictures.txt'

with open(file) as f:
    f.readline()
    for line in f.readlines():
        parsed_line = line.split(' ')
        tags = parsed_line[2:]
        tags[-1] = tags[-1].strip('\n')
        tags = set(tags)
        pics.append({'type': parsed_line[0], 'tags': tags})

In [92]:
# ------------------------------ Create Individual ------------------------------ //
def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(fillvalue=fillvalue, *args)

def generate_population(picture_list: list, population: list, pop_size: int):
    population = deepcopy(population)
    v_len = 0
    h_len = 0
    v_pics = []
    h_pics = []
    for index, pic in enumerate(picture_list):
        if pic['type'] == 'V':
            v_len += 1
            v_pics.append(index)
        else:
            h_len += 1
            h_pics.append(index)
    for i in range(pop_size):
        individual = []
        random.shuffle(v_pics)
        for pic1, pic2 in grouper(v_pics, 2):
            individual.append([pic1, pic2])
        #print(individual)
        for pic in h_pics:
            individual.append([pic])
        random.shuffle(individual)
        population.append(individual)
    return population


def create_individual(data):
    individual = []
    v_pics = []
    h_pics = []
    for index, pic in enumerate(pics):
        if pic['type'] == 'V':
            v_pics.append(index)
        else:
            h_pics.append(index)
    random.shuffle(v_pics)
    for pic1, pic2 in grouper(v_pics, 2):
        individual.append([pic1, pic2])
    for pic in h_pics:
        individual.append([pic])
    random.shuffle(individual)
    return individual

# ---------------------------------------------------------------------- //

pop = generate_population(pics, [], 10)

ga = pyeasyga.GeneticAlgorithm(pop,
                               population_size=10,
                               generations=20,
                               crossover_probability=0.8,
                               mutation_probability=0.05,
                               elitism=True,
                               maximise_fitness=True)

ga.create_individual = create_individual


# For the crossover function, supply two individuals (i.e. candidate
# solution representations) as parameters,

import random

def deduplicate(slides):
    used_images = set([])
    new_slides = []
    
    for slide in slides:
        has_two_slides = len(slide) == 2
        is_not_duplicate = (slide[0] not in used_images) and (not has_two_slides or (slide[1] not in used_images))
            
        if is_not_duplicate:
            new_slides.append(slide)
            
            # if slide[0]:
            used_images.add(slide[0])
            if has_two_slides:
                used_images.add(slide[1])
    
    return new_slides

def crossover(parent_1, parent_2):
    len_1 = len(parent_1)
    len_2 = len(parent_2)
#     print("CROSSOVER", parent_1, "-", parent_2)
    max_index = min(len_2, len_1)
    
    if max_index <= 1:
        return parent_1, parent_2
    
    index = random.randrange(1, max_index)
    child_1 = parent_1[:index] + parent_2[index:]
    child_2 = parent_2[:index] + parent_1[index:]
    
    return deduplicate(child_1), deduplicate(child_2)

# and set the Genetic Algorithm's ``crossover_function`` attribute to
# your defined function
ga.crossover_function = crossover


# ------------------------------ Mutate Function ------------------------------ //
# def mutate(individual):
#     mutate_index = random.randrange(len(individual))
#     if individual[mutate_index] == 0:
#         individual[mutate_index] == 1
#     else:
#         individual[mutate_index] == 0
def mutate(slides):
    def is_possible(percentage):
        return random.randint(0, 100) <= percentage
    
    # mutate by swapping two genes
    if (is_possible(50)):
        r = len(slides) - 1
        x = random.randint(0, r)
        y = random.randint(0, r)
        if x != y:
            c = slides[x].copy()
            slides[x] = slides[y].copy()
            slides[y] = c
            
    # pridanie noveho slajdu???
        
    return slides
        
    
ga.mutate_function = mutate
# ---------------------------------------------------------------------- //


# For the selection function, supply a ``population`` parameter
def selection(population):
    return random.choice(population)

# and set the Genetic Algorithm's ``selection_function`` attribute to
# your defined function
ga.selection_function = selection


# ------------------------------ Fitness ------------------------------ //
def get_tags(slide):
    if len(slide) == 1:
        return pics[slide[0]]['tags']
    else:
        return pics[slide[0]]['tags'] | pics[slide[1]]['tags']


def evaluate(individual, data):
    
    score = 0
    prev_tags = get_tags(individual[0])
    for slide in individual[1:]:
        cur_tags = get_tags(slide)
        common = prev_tags | cur_tags
        only_in_prev = prev_tags - cur_tags
        only_in_cur = cur_tags - prev_tags
        score += min(len(common), len(only_in_prev), len(only_in_cur))
        prev_tags = cur_tags
        
    return score

ga.fitness_function = evaluate
# ---------------------------------------------------------------------- //



ga.run()

# print(ga.best_individual())

# for individual in ga.last_generation():
#     print(individual)

print("BEST - ", ga.best_individual())

BEST -  (459076, [[14202], [9205], [31176, 60817], [65278, 54499], [26071], [5106], [3214], [37856], [21440, 41833], [14299], [19116], [66978], [80769], [63082], [9404], [89799, 71004], [66895], [75802], [38131], [3609, 25443], [86642, 63983], [71914, 18315], [34486], [77959, 51422], [16041, 82988], [84198], [82470], [4850, 18648], [33119], [24146, 86297], [75664, 74803], [74188, 48496], [84254], [41894, 83240], [88683], [75821], [76059], [50505], [60951, 6961], [59183], [60581], [37544, 14424], [14816], [21591, 80664], [81683], [75361], [85274], [16145, 65863], [31514], [22711, 30231], [41035, 52996], [39230, 2067], [21117, 44811], [61930, 21403], [60784, 48487], [11699], [22215, 771], [18364, 40418], [24764], [52474, 9743], [62292], [31962], [12563, 53733], [9877, 43029], [9675], [74143], [16803], [10030, 320], [24566, 3150], [73415], [73430, 61452], [48218, 48280], [33353], [64780, 6774], [11153], [82581], [21658, 36795], [82218], [87735, 41239], [4810], [25906, 25497], [55353, 6621

In [None]:
with open('output.txt', 'w+') as file:
    individual = ga.best_individual()[1]
    file.write(f'{len(individual)}\n')
    for slide in individual:
        if len(slide) == 2:
            file.write(f'{slide[0]} {slide[1]}\n')
        else:
            file.write(f'{slide[0]}')
