# Trabajo del seminario
# Por Gonzalo Pérez Vizuete
#### Github: https://github.com/comandantexd/03MAIR_Algoritmos_optimizacion/blob/master/seminario.ipynb 

## El ejercicio escogido es: Organizar los horarios de los partidos de liga
## El método seleccionado es: Algoritmo genético


## Se ha buscado la solución al problema que sale en el PDF, es decir la siguiente combinación de partidos:
### ('C', 'C'), ('B', 'B'), ('B', 'B'), ('B', 'A'), ('B', 'A'), ('B', 'C'), ('B', 'B'), ('B', 'C'), ('B', 'A'), ('C', 'C')
## La mejor solucón encontrada para esta combinación es la siguiente:
### ('L',  20), ('S',  16), ('D',  18), ('D',  20), ('S',  18), ('S',  12), ('D',  16), ('D',  12), ('S',  20), ('V',  20)
### Con una puntuación total de: 6,521
## Análisis de complejidad:
Función chunks(): O(n)

Función i_min_max(): O(n)

Función evaluation(): O(n)

Función selection_cross(): O(n^2)

Función mutation(): O(n)

Función reselection(): O(n^2)

Función select_solution(): O(n)

Función optimize(): O(n)

## Análisis de la estructura de datos:
La estructura de datos elegida, se basa en asociar cada categoria, horario y día a un índice numérico de forma que:

Categorías: A -> 0, B -> 1, C -> 2

Días: V -> 0, S -> 1, D -> 2, L -> 3

Horas: 12 -> 0, 16 -> 1, 18 -> 2, 20 -> 3

De esta forma, he creado las estructuras de datos _audience y _hour_coef, donde se indexan los coeficientes pertinentes a los rqeuisitos escritos en el PDF
Y así, con acceder con el índice de las categorías, días u horas, obtenemos el dato de puntuacón necesario.

In [2]:
from threading import Thread, Condition, Lock
from time import sleep, time
from IPython.display import clear_output
import random
import copy
import sys

#Funcion generadora para dividir un array en bloques del mismo tamaño
def chunks(arr, n):
    for i in range(0, len(arr), n):
        yield arr[i:i + n]

#Devuelve el valor y el índice del meno o mayor elemento en un array
def i_min_max(arr, flag):
    ma = float(flag)
    i_ma = 0
    for i, v in enumerate(arr):
        if v > ma:
            ma = v
            i_ma = i
    return i_ma, ma


def i_max(arr):
    return i_min_max(arr, '-inf')

def i_min(arr):
    return i_min_max(arr, 'inf')

In [3]:
#Clase que representa el entorno donde trabajará el algoritmo genético
class Environment(object):
    #El indice donde se encuentra el coeficiente es el número total de coincidencias
    #multiplying by this wil be equivalen to (%): [0, 25, 45, 60, 70, 75, 78, 80, 80]
    _concurrent_loss= [1, 0.75, 0.55, 0.4, 0.3, 0.25, 0.22, 0.2, 0.2]

    #Audiencia por categoría
    _audience = [
        [2,   1.3,    1],
        [1.3, 0.9, 0.75],
        [1,  0.75, 0.47]
    ]

    #coeficiente horario, las columnas hacen referencia al día y las filas hacen referencia a las horas
    _hour_coef = [
        [ -1, 0.55, 0.45,  -1],
        [ -1,  0.7, 0.75,  -1],
        [ -1,  0.8,  0.5,  -1],
        [0.4,    1,    1, 0.4]
    ]

    #Etiquetas humanamente entendibles, se encuentran en el índice donde se accederán a los distintos coeficientes
    #de punuación de audiencia
    _categories = ['A', 'B', 'C']
    _days = ['V', 'S', 'D', 'L']
    _hours = [12, 16, 18, 20]

    #Horarios disponibles por cada día, esto existe para poder seleccionar un día aleatorio y podere selccionar un horario
    #Válido para ese día también aleatoriamente usando random.choice()
    _days_hours = {
        0: [3],
        1: [0, 1, 2, 3],
        2: [0, 1, 2, 3],
        3: [3]
    }


#Objeto que representa un partido
class Match(object):
    def __init__(self, idx_homeCat, idx_awayCat, idx_day, idx_hour):
        self._hCat = idx_homeCat
        self._aCat = idx_awayCat
        self._d = idx_day
        self._h = idx_hour

    def get_raw_time(self):
        return [self._d, self._h]

    def get_away_category(self):
        return Environment._categories[self._aCat]

    def get_home_category(self):
        return Environment._categories[self._hCat]

    def get_base(self):
        return Environment._audience[self._hCat][self._aCat]

    def get_hour_coef(self):
        return Environment._hour_coef[self._h][self._d]

    def get_score(self):
        return self.get_base() * self.get_hour_coef()

#Objeto que representa un genotipo
class GenotypeTimetable(object):
    def __init__(self, num_matches=20, chain=None):
        '''
        Genotype representation, list of lists: [[1, 2]] ([[D, H], [D, H]])
        where in pos 0 is the index of day, and in pos 1 the index of hour
        '''
        if chain != None:
            self._chain = chain
        else:
            self._chain = []
            for i in range(num_matches):
                days = list(Environment._days_hours.keys())
                idx_d = random.choice(days)
                idx_h = random.choice(Environment._days_hours[idx_d])

                self._chain.append([idx_d, idx_h])

    #Devuelve una representacion humanamente entendible del genotipo
    def get_applciable_chain(self):
        l = []
        for (day, hour) in self._chain:
            l.append([Environment._days[day], Environment._hours[hour]])

        return l

    #Muta un gen de este genotipo
    def mutate(self):
        chrom = random.choice(range(len(self._chain)))
        days = list(Environment._days_hours.keys())
        idx_d = random.choice(days)
        idx_h = random.choice(Environment._days_hours[idx_d])
        self._chain[chrom][0] = idx_d
        self._chain[chrom][1] = idx_h

    #Cruza este genotipo con el genotipo pasado por parámetro y devuelve uno
    #nuevo con el resultado del cruce de estos dos
    def cross(self, genotype):
        g1 = copy.deepcopy(self._chain)
        g2 = copy.deepcopy(genotype._chain)

        sl = random.choice(range(len(g1)))
        first = random.choice([0, 1])

        g3 = []
        if first == 0:
            g3 = g1[:sl] + g2[sl:]
        else:
            g3 = g2[:sl] + g1[sl:]

        return GenotypeTimetable(chain=g3)
        
    #Cuenta el número de veces que aparece un gen en la cadena entera
    def count(self, chrom):
        return self._chain.count(chrom)

    def __str__(self):
        return '{}'.format(self.get_applciable_chain())

    def __repr__(self):
        return '{}'.format(self.get_applciable_chain())

    def __getitem__(self, idx):
        return self._chain[idx]

    def __len__(self):
        return len(self._chain)


#Representa un fenotipo (horario)
class FenotypeTimetable(object):
    def __init__(self, matchCombinations):
        '''
        :param matchCombinations: List of lists of combinations of matches (e.g: [[A, A], [A, B]])
        '''
        self._m_comb = []
        for (cat1, cat2) in matchCombinations:
            self._m_comb.append([Environment._categories.index(cat1), Environment._categories.index(cat2)])


    #Establecemos el genotipo para este fenotipo
    def set_genotype(self, genotype):
        self._gen = genotype
        self._matches = []
        for i, (cat1, cat2) in enumerate(self._m_comb):
            (day, hour) = genotype[i]
            self._matches.append(Match(cat1, cat2, day, hour))


    #Devuelve la puntuación total del genotipo que se ha establecido 
    def get_score(self):
        acum = 0
        for match in self._matches:
            raw_time = match.get_raw_time()
            coincidences = self._gen.count(raw_time) - 1
            
            coin_coef = 0.2 #if not in list give an 80% loss
            if coincidences < len(Environment._concurrent_loss):
                coin_coef = Environment._concurrent_loss[coincidences]

            sc = match.get_score()
            acum += sc * coin_coef

        return acum


matches = [['B', 'A'], ['B', 'A'], ['C', 'C'], ['B', 'A'], ['C', 'C'], ['B', 'C'], ['B', 'B'], ['B', 'B'], ['B', 'C'], ['A', 'B']]

gen1 = GenotypeTimetable(len(matches))
gen2 = GenotypeTimetable(len(matches))
print('Gen 1:', gen1)
print('Gen 2:', gen2)

print()
gen3 = gen1.cross(gen2)
print('Gen 1 + 2 (3)   :', gen3)
gen3.mutate()
print('Gen 3 (mutation):', gen3)
print()
fen = FenotypeTimetable(matches)
fen.set_genotype(gen3)
match1 = fen._matches[0]
print('Match 1 info: ', match1.get_home_category(), match1.get_away_category(), match1.get_base(), match1.get_hour_coef(), match1.get_score())
print('Match scores: ', [i.get_score() for i in fen._matches])
print('Total score (fitness): ', fen.get_score())

Gen 1: [['L', 20], ['D', 16], ['D', 18], ['L', 20], ['S', 20], ['V', 20], ['L', 20], ['S', 12], ['V', 20], ['V', 20]]
Gen 2: [['L', 20], ['V', 20], ['D', 20], ['V', 20], ['L', 20], ['D', 12], ['V', 20], ['V', 20], ['S', 18], ['L', 20]]

Gen 1 + 2 (3)   : [['L', 20], ['V', 20], ['D', 20], ['V', 20], ['L', 20], ['D', 12], ['V', 20], ['V', 20], ['S', 18], ['L', 20]]
Gen 3 (mutation): [['L', 20], ['V', 20], ['D', 20], ['V', 20], ['L', 20], ['D', 12], ['V', 20], ['V', 20], ['D', 12], ['L', 20]]

Match 1 info:  B A 1.3 0.4 0.52
Match scores:  [0.52, 0.52, 0.47, 0.52, 0.188, 0.3375, 0.36000000000000004, 0.36000000000000004, 0.3375, 0.52]
Total score (fitness):  2.3556500000000002


In [5]:
#Clase que representa al algoritmo genético
class Genetic(object):
    def __init__(self, matches, living_things=8, iterations=50, reproductions=1, probabilistic_repoblation=True, mutations=1, n_threads=4, selection='tournament', tournament_percent=0.3, probabilistic_mutation=True):
        self.MATCHES = matches

        self.__living_things = living_things
        self.__iterations = iterations
        self.__reproductions = reproductions
        self.__mutations = mutations
        self.__n_threads = n_threads
        self.__selection = selection
        self.__tournament_percent = tournament_percent
        self.__probabilistic_repoblation = probabilistic_repoblation
        self.__probabilistic_mutation = probabilistic_mutation

        self.fitness_list = []
        self.genotypes = []

        self.best_genotype = None
        self.best_fenotype = None
        self.best_score = -1

        self.repoblate = False

        self.__shit = 0


    def init(self):
        #init population
        l = len(self.MATCHES)
        for i in range(self.__living_things):
            gen = GenotypeTimetable(l) #initialize gen

            self.genotypes.append(gen)
            self.fitness_list.append(0)


    #Función para utilizar por los hilos que evaluen los genotipos
    @staticmethod
    def evaluation_thread(gen, matches, fitness_list, index):
        fen = FenotypeTimetable(matches)
        fen.set_genotype(gen)
        fitness_list[index] = fen.get_score()


    #Evaluación de la población, se divide las operaciones en bloques de hilos, el nuemro de hilos
    #será el que se especifique en la variable n_threads
    def evaluation(self):
        i = -1

        #Divide in threads
        for genotype_chunk in chunks(self.genotypes, self.__n_threads):
            running_threads = []
            for gen in genotype_chunk:
                i += 1
                T = Thread(
                        target=Genetic.evaluation_thread,
                        args=(gen, self.MATCHES, self.fitness_list, i))

                running_threads.append(T)
                T.start()


            for thread in running_threads:
                thread.join()


    #Seleccionamos una parte de la población para cruzarla y crear nuevos genes
    def selection_cross(self):
        #Decidimos si hacemos una repoblación o no
        if not self.repoblate:
            #Seleccionamos los genes a cruzar por torneo
            if self.__selection == 'tournament':
                fitness_samples = int(len(self.fitness_list) * self.__tournament_percent)
                for i in range(self.__reproductions):
                    i_fitnesses = []
                    for n in range(2): #2 fathers, 1 per tournament
                        fitnesses = []
                        for fit in range(fitness_samples): #tournament
                            fitnesses.append(random.choice(self.fitness_list))

                        i_fitnesses.append(i_max(fitnesses)[0]) #select the best in the tournament

                    #reproduce the fathers selected in the tournaments
                    gen1 = self.genotypes[i_fitnesses[0]]
                    gen2 = self.genotypes[i_fitnesses[1]]

                    self.genotypes.append(gen1.cross(gen2))
                    self.fitness_list.append(0)

            elif self.__selection == 'weight_list':
                #Selccionamos los genes a crucar por un método de 'lista pesada' en la que los 
                #Genes con mejor "fit" tendrán más posibilidades de ser seleccionados
                wl = []
                for i, fit in enumerate(self.fitness_list):
                    wl += [i] * int(fit)
                
                for i in range(self.__reproductions):
                    #reproduce gen
                    gen = self.genotypes[random.choice(wl)].cross(self.genotypes[random.choice(wl)])

                    self.genotypes.append(gen)
                    self.fitness_list.append(0)

        else:
            #Si hay repoblación, solamente está implementado el método de selección por torneo
            #Cambiaremos toda la población por una nueva resultada del cruce de la anterior
            if self.__selection == 'tournament':
                fitness_samples = int(len(self.fitness_list) * self.__tournament_percent)
                repoblation = []
                for i in range(self.__living_things):
                    i_fitnesses = []
                    for n in range(2): #2 fathers, 1 per tournament
                        fitnesses = []
                        for fit in range(fitness_samples): #tournament
                            fitnesses.append(random.choice(self.fitness_list))

                        i_fitnesses.append(i_max(fitnesses)[0]) #select the best in the tournament

                    #reproduce the fathers selected in the tournaments
                    gen1 = self.genotypes[i_fitnesses[0]]
                    gen2 = self.genotypes[i_fitnesses[1]]
                    repoblation.append(gen1.cross(gen2))

                self.genotypes = repoblation

            elif self.__selection == 'weight_list': #With repoblation, weighted list is not implemented
                raise Exception('Not implemented repoblation with weighted list selection.')

    #Mutación de los genes
    def mutation(self):
        for i in range(self.__mutations): #mutamos el numero de veces provisto por la variable "mutations"
            for gen in self.genotypes:
                if self.__probabilistic_mutation and random.choice([0, 1]) == 1:
                    gen.mutate()


    #Aqui decidiremos los genes que continuarán para la siguiente porlbación
    def reselection(self):
        if self.__selection == 'tournament': #Seleccion por torneo
            fitness_samples = int(len(self.fitness_list) * self.__tournament_percent)
            for i in range(self.__reproductions):
                fitnesses = []
                for fit in range(fitness_samples): #tournament
                    fitnesses.append(random.choice(self.fitness_list))

                idx = (i_min(fitnesses)[0]) #select the worst in the tournament

                del self.genotypes[idx]
                del self.fitness_list[idx]

        #Selección por lista pesada donde los peores genes tendrán más posibilidad de ser elegidos
        elif self.__selection == 'weight_list':
            for i in range(self.__reproductions):
                ma = max(self.fitness_list) + 1
                wl = []
                for i, fit in enumerate(self.fitness_list):
                    wl += [i] * int(ma - fit)

                idx = random.choice(wl)

                del self.genotypes[idx]
                del self.fitness_list[idx]


    #Seleccionamos al mejor gen de la generación como una posible solución,
    #siempre y cuando sea mejor que una solución ya escogida
    def select_solution(self):
        for i, fit in enumerate(self.fitness_list):
            if fit > self.best_score:
                self.best_score = copy.copy(fit)
                self.best_genotype = copy.deepcopy(self.genotypes[i])


    def optimize(self):
        self.init()

        self.evaluation()

        #loop
        progress_size = 50
        print('[>', end='')
        _s = 0
        _p = 0
        step = self.__iterations / 50
        for i in range(self.__iterations):
            if self.__probabilistic_repoblation:
                self.repoblate = random.choice([True, False])

            self.selection_cross() #crucamos
            self.mutation() #mutamos
            self.evaluation() #evaluamos

            #si no hay repoblación, seleccionaremos una parte de la población para continuar a la siguiente genreación
            if not self.repoblate:
                self.reselection()

            self.select_solution() #Seleccionamos una posible solución

            #Dibujo de la línea de progreso
            if _s >= step:
                _p += 1
                _s = 0
                clear_output(wait=True)    
                print('[{}>{}] {:.2f}%, current best score: {}'.format(''.join(['=']*_p), ''.join([' ']*(progress_size-_p)), _p/progress_size*100, self.best_score),  end='')

            _s += 1

        _p += 1
        clear_output(wait=True)
        print('[{}>{}] {:.2f}%'.format(''.join(['=']*_p), ''.join([' ']*(progress_size-_p)), _p/progress_size*100),  end='')
        print(' DONE!, best score: {}'.format(self.best_score))

In [10]:
genetic = Genetic([['C', 'C'], ['B', 'B'], ['B', 'B'], ['B', 'A'], ['B', 'A'], ['B', 'C'], ['B', 'B'], ['B', 'C'], ['B', 'A'], ['C', 'C']], iterations=1000000, living_things=500, mutations=1, reproductions=2, n_threads=8, selection='tournament', probabilistic_repoblation=False, tournament_percent=0.3, probabilistic_mutation=True)

t = time()
genetic.optimize()
print('Best score: ', genetic.best_score)
print('Best genotype: ', genetic.best_genotype)
print('Elapsed: ', (time() - t) / 60, 'mins')

Best score:  6.521
Best genotype:  [['L', 20], ['S', 16], ['D', 18], ['D', 20], ['S', 18], ['S', 12], ['D', 16], ['D', 12], ['S', 20], ['V', 20]]
Elapsed:  982.1221022764842 mins


In [7]:
genetic = Genetic([['C', 'C'], ['A', 'B'], ['B', 'B'], ['B', 'C'], ['B', 'A'], ['B', 'C'], ['B', 'B'], ['B', 'C'], ['B', 'A'], ['B', 'C']], iterations=1000, living_things=20, mutations=1, reproductions=2, n_threads=8, selection='tournament', probabilistic_repoblation=True, tournament_percent=0.3, probabilistic_mutation=True)

t = time()
genetic.optimize()
print('Best score: ', genetic.best_score)
print('Best genotype: ', genetic.best_genotype)
print('Elapsed: ', (time() - t) / 60, 'mins')

Best score:  6.223
Best genotype:  [['V', 20], ['S', 12], ['S', 18], ['D', 18], ['D', 16], ['S', 20], ['S', 16], ['L', 20], ['D', 20], ['S', 18]]
Elapsed:  0.13533326784769695 mins


In [8]:
genetic = Genetic([['C', 'B'], ['C', 'B'], ['B', 'B'], ['B', 'C'], ['A', 'A'], ['B', 'C'], ['B', 'B'], ['B', 'C'], ['B', 'A'], ['B', 'B']], iterations=10000, living_things=20, mutations=1, reproductions=2, n_threads=8, selection='weight_list', probabilistic_repoblation=False, probabilistic_mutation=False)

t = time()
genetic.optimize()
print('Best score: ', genetic.best_score)
print('Best genotype: ', genetic.best_genotype)
print('Elapsed: ', (time() - t) / 60, 'mins')

Best score:  6.71125
Best genotype:  [['D', 16], ['D', 16], ['V', 20], ['L', 20], ['D', 20], ['S', 20], ['S', 20], ['S', 16], ['S', 18], ['D', 12]]
Elapsed:  1.0080758651097617 mins
