In [9]:
from pyeasyga import pyeasyga
import random 
from itertools import zip_longest
from copy import deepcopy


def pls_my_friends(fileName):
    pics = []
    
    with open('data/' + fileName) as f:
        f.readline()
        for line in f.readlines():
            parsed_line = line.split(' ')
            tags = parsed_line[2:]
            tags[-1] = tags[-1].strip('\n')
            tags = set(tags)
            pics.append({'type': parsed_line[0], 'tags': tags})


    # ------------------------------ Create Individual ------------------------------ //
    def grouper(iterable, n, fillvalue=None):
        args = [iter(iterable)] * n
        return zip_longest(fillvalue=fillvalue, *args)

    def generate_population(picture_list: list, population: list, pop_size: int):
        population = deepcopy(population)
        v_len = 0
        h_len = 0
        v_pics = []
        h_pics = []
        for index, pic in enumerate(picture_list):
            if pic['type'] == 'V':
                v_len += 1
                v_pics.append(index)
            else:
                h_len += 1
                h_pics.append(index)
        for i in range(pop_size):
            individual = []
            random.shuffle(v_pics)
            for pic1, pic2 in grouper(v_pics, 2):
                individual.append([pic1, pic2])
            #print(individual)
            for pic in h_pics:
                individual.append([pic])
            random.shuffle(individual)
            population.append(individual)
        return population


    def create_individual(data):
        individual = []
        v_pics = []
        h_pics = []
        for index, pic in enumerate(pics):
            if pic['type'] == 'V':
                v_pics.append(index)
            else:
                h_pics.append(index)
        random.shuffle(v_pics)
        for pic1, pic2 in grouper(v_pics, 2):
            individual.append([pic1, pic2])
        for pic in h_pics:
            individual.append([pic])
        random.shuffle(individual)
        return individual

    # ---------------------------------------------------------------------- //

    pop = generate_population(pics, [], 10)

    ga = pyeasyga.GeneticAlgorithm(pop,
                                   population_size=10,
                                   generations=20,
                                   crossover_probability=0.8,
                                   mutation_probability=0.05,
                                   elitism=True,
                                   maximise_fitness=True)

    ga.create_individual = create_individual


    # For the crossover function, supply two individuals (i.e. candidate
    # solution representations) as parameters,

    def deduplicate(slides):
        used_images = set([])
        new_slides = []

        for slide in slides:
            has_two_slides = len(slide) == 2
            is_not_duplicate = (slide[0] not in used_images) and (not has_two_slides or (slide[1] not in used_images))

            if is_not_duplicate:
                new_slides.append(slide)

                # if slide[0]:
                used_images.add(slide[0])
                if has_two_slides:
                    used_images.add(slide[1])

        return new_slides

    def crossover(parent_1, parent_2):
        len_1 = len(parent_1)
        len_2 = len(parent_2)
    #     print("CROSSOVER", parent_1, "-", parent_2)
        max_index = min(len_2, len_1)

        if max_index <= 1:
            return parent_1, parent_2

        index = random.randrange(1, max_index)
        child_1 = parent_1[:index] + parent_2[index:]
        child_2 = parent_2[:index] + parent_1[index:]

        return deduplicate(child_1), deduplicate(child_2)

    # and set the Genetic Algorithm's ``crossover_function`` attribute to
    # your defined function
    ga.crossover_function = crossover


    # ------------------------------ Mutate Function ------------------------------ //
    # def mutate(individual):
    #     mutate_index = random.randrange(len(individual))
    #     if individual[mutate_index] == 0:
    #         individual[mutate_index] == 1
    #     else:
    #         individual[mutate_index] == 0
    def mutate(slides):
        def is_possible(percentage):
            return random.randint(0, 100) <= percentage

        # mutate by swapping two genes
        if (is_possible(50)):
            r = len(slides) - 1
            x = random.randint(0, r)
            y = random.randint(0, r)
            if x != y:
                c = slides[x].copy()
                slides[x] = slides[y].copy()
                slides[y] = c

        # pridanie noveho slajdu???

        return slides


    ga.mutate_function = mutate
    # ---------------------------------------------------------------------- //


    # For the selection function, supply a ``population`` parameter
    def selection(population):
        return random.choice(population)

    # and set the Genetic Algorithm's ``selection_function`` attribute to
    # your defined function
    ga.selection_function = selection


    # ------------------------------ Fitness ------------------------------ //
    def get_tags(slide):
        if len(slide) == 1:
            return pics[slide[0]]['tags']
        else:
            return pics[slide[0]]['tags'] | pics[slide[1]]['tags']


    def evaluate(individual, data):

        score = 0
        prev_tags = get_tags(individual[0])
        for slide in individual[1:]:
            cur_tags = get_tags(slide)
            common = prev_tags & cur_tags
            only_in_prev = prev_tags - cur_tags
            only_in_cur = cur_tags - prev_tags
            score += min(len(common), len(only_in_prev), len(only_in_cur))
            prev_tags = cur_tags

        return score

    ga.fitness_function = evaluate
    # ---------------------------------------------------------------------- //



    ga.run()

    # print(ga.best_individual())

    # for individual in ga.last_generation():
    #     print(individual)

    print("BEST - ", ga.best_individual())

    with open('output/' + fileName, 'w+') as file:
        individual = ga.best_individual()[1]
        file.write(f'{len(individual)}\n')
        for slide in individual:
            if len(slide) == 2:
                file.write(f'{slide[0]} {slide[1]}\n')
            else:
                file.write(f'{slide[0]}\n')

In [14]:
# pls_my_friends('a_example.txt')
# pls_my_friends('b_lovely_landscapes.txt')
# pls_my_friends('c_memorable_moments.txt')
# pls_my_friends('d_pet_pictures.txt')
pls_my_friends('e_shiny_selfies.txt')

BEST -  (112865, [[79182, 25753], [28793, 68032], [73845, 24680], [515, 12748], [5195, 28838], [77121, 11514], [20679, 78295], [1997, 53450], [13716, 17227], [50611, 73608], [40337, 60697], [19936, 76122], [67841, 2431], [3083, 51108], [70094, 61508], [4105, 22129], [72465, 48270], [34045, 16090], [49340, 6575], [34702, 13218], [34921, 78769], [34134, 72043], [72945, 15590], [46457, 39143], [18093, 59098], [49877, 57217], [14506, 53188], [5116, 63925], [6092, 30173], [46060, 16894], [26768, 43597], [30812, 12759], [74893, 58000], [9491, 74463], [55298, 6678], [53867, 59281], [49989, 76729], [36816, 57365], [40183, 58540], [43735, 24780], [78175, 22889], [5640, 24736], [47364, 40663], [49861, 36144], [46631, 61152], [44079, 56815], [19368, 57237], [13537, 66380], [25048, 45638], [14362, 74284], [8782, 4717], [66237, 56067], [62433, 3141], [32564, 20989], [40350, 5130], [2041, 40151], [79998, 63422], [13266, 29082], [36608, 28851], [30855, 36462], [34927, 5346], [67033, 67986], [13846, 7