In [1]:
import random
import copy
import multiprocessing
import pandas as pd
import numpy as np
from deap import base
from deap import creator
from deap import tools
from datetime import datetime, timedelta

In [2]:
# Load files
file_name='RECINTO.csv'
RECINTO = pd.read_csv('./Data/'+file_name)
file_name='EVENTO.csv'
ORIGINAL_EVENTO = pd.read_csv('./Data/'+file_name)
file_name='PERFILES.csv'
PERFILES = pd.read_csv('./Data/'+file_name)
file_name='PREFERENCIAS_US1.csv'
PREFERENCIAS = pd.read_csv('./Data/'+file_name)

In [3]:
# Transform to date and hour formats
ORIGINAL_EVENTO['Hora_apertura_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_apertura_1'], format='%H:%M')
ORIGINAL_EVENTO['Hora_cierre_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_cierre_1'], format='%H:%M')
ORIGINAL_EVENTO['Fecha_1'] = pd.to_datetime(ORIGINAL_EVENTO['Fecha_1'], format='%m/%d/%Y')

ORIGINAL_EVENTO['Hora_apertura_2'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_apertura_2'], format='%H:%M')
ORIGINAL_EVENTO['Hora_cierre_2'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_cierre_2'], format='%H:%M')
ORIGINAL_EVENTO['Fecha_2'] = pd.to_datetime(ORIGINAL_EVENTO['Fecha_2'], format='%m/%d/%Y')
drop_list = []

# Exclude resources of more than 6 hours of duration
for i in range(0, max(ORIGINAL_EVENTO['Id_Recurso'])):
    
    # Get open time for the first resource
    ha1 = ORIGINAL_EVENTO.Hora_apertura_1[i]
                        
    # Get close time for the first resource
    hc1 = ORIGINAL_EVENTO.Hora_cierre_1[i]
    
     # Get open time for the first resource
    ha2 = ORIGINAL_EVENTO.Hora_apertura_2[i]
                        
    # Get close time for the first resource
    hc2 = ORIGINAL_EVENTO.Hora_cierre_2[i]
                        
    diff_hour1 = (hc1 - ha1)
    diff_hour1 = diff_hour1 / np.timedelta64(1, 'm')
    
    diff_hour2 = (hc2 - ha2)
    diff_hour2 = diff_hour2 / np.timedelta64(1, 'm')
    
    if diff_hour1 >= 360 or diff_hour2 >= 360:
        drop_list.append(i)
        
# Get the two datasets (first with short durations and second with large duration)
EVENTO = ORIGINAL_EVENTO[~ORIGINAL_EVENTO['Id_Recurso'].isin(drop_list)]
EVENTO_HOURLESS = ORIGINAL_EVENTO[ORIGINAL_EVENTO['Id_Recurso'].isin(drop_list)]

In [4]:
# Size of the individual
SIZE_INDIVIDUAL = EVENTO.shape[0]

# Number of generations
NUM_GENERATIONS = 20

# Optimal value to obtain (not reached)
MAX_VALUE = SIZE_INDIVIDUAL + 10 * SIZE_INDIVIDUAL

# Poblation members
SIZE_POBLATION = 1000

# CXPB  is the probability with which two individuals are crossed
CXPB = 0.5

# MUTPB is the probability for mutating an individual
MUTPB = 0.2

In [5]:
# Define the fitness function for maximizing the result
creator.create("FitnessMax", base.Fitness, weights=(1.0,))

# Creation of the individual with the fitness function
creator.create("Individual", list, fitness=creator.FitnessMax)

In [6]:
# Create a toolbox for the genetic algorithm
toolbox = base.Toolbox()

# Attribute generator having only 0, 1 or 2 in the body of the individual (no day, first day or second day)
toolbox.register("attr_int", random.randint, 0, 2)

# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_int, SIZE_INDIVIDUAL)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [7]:
# Enable multiprocessing
pool = multiprocessing.Pool()
toolbox.register("map", pool.map)

In [8]:
# Function to find overlapped hours between resources
def find_hour_overlapping(ha1, hc1, ha2, hc2):
    found = False
    if (ha1 == ha2) or (ha2 < ha1 < hc2) or (ha1 < ha2 < hc1) or (ha2 < ha1 < hc1 < hc2) \
        or (ha1 < ha2 < hc2 < hc1):
        found = True
    return found

In [9]:
# Function to find overlapped hours in the first day
def find_first_day_overlapping(i, j):
    found = False
    # Get date for the first resource
    date_1 = EVENTO.Fecha_1[EVENTO.index[i]]

    # Get date for the second resource
    date_2 = EVENTO.Fecha_1[EVENTO.index[j]]

    # Evaluate if the resource is in the same day
    if (date_1 - date_2).days == 0:
        # Get open time for the first resource
        ha1 = EVENTO.Hora_apertura_1[EVENTO.index[i]]

        # Get close time for the first resource
        hc1 = EVENTO.Hora_cierre_1[EVENTO.index[i]]

        # Get open time for the second resource
        ha2 = EVENTO.Hora_apertura_1[EVENTO.index[j]]

        # Get close time for the second resource
        hc2 = EVENTO.Hora_cierre_1[EVENTO.index[j]]
                            
        found = find_hour_overlapping(ha1, hc1, ha2, hc2)
    return found

In [10]:
# Function to find overlapped hours in the second day
def find_second_day_overlapping(i, j):
    found = False
    # Get date for the first resource
    date_1 = EVENTO.Fecha_2[EVENTO.index[i]]

    # Get date for the second resource
    date_2 = EVENTO.Fecha_2[EVENTO.index[j]]

    # Evaluate if the resource is in the same day
    if (date_1 - date_2).days == 0:
        # Get open time for the first resource
        ha1 = EVENTO.Hora_apertura_2[EVENTO.index[i]]

        # Get close time for the first resource
        hc1 = EVENTO.Hora_cierre_2[EVENTO.index[i]]

        # Get open time for the second resource
        ha2 = EVENTO.Hora_apertura_2[EVENTO.index[j]]

        # Get close time for the second resource
        hc2 = EVENTO.Hora_cierre_2[EVENTO.index[j]]
        
        # Exclude resources that are not active in the second day
        if ha2.hour == 0 and hc2.hour == 0:
            found = True
        else:
            found = find_hour_overlapping(ha1, hc1, ha2, hc2)
    return found

In [17]:
# Overlapping function to detect time problems in resources
def find_overlapping(individual):
    found = False
    i = 0
    while i < len(individual) and not found:
        # If the resource is selected for the algorithm for day 1
        if individual[i] == 1:
            j = 0
            while j < len(individual)  and not found:
                if i != j:
                    if individual[j] == 1:
                        found = find_first_day_overlapping(i, j)
                j = j + 1
        # If the resource is selected for the algorithm for day 2
        elif individual[i] == 2:
            j = 0
            while j < len(individual)  and not found:
                if i != j:
                    if individual[j] == 2:
                        found = find_second_day_overlapping(i, j)
                j = j + 1
        i = i + 1

    return found

In [18]:
# Fitness function
def evalOneMax(individual):
    summa = 0
    if not find_overlapping(individual):
        for i in range(0, len(individual)):
            if individual[i] == 2:
                ha = EVENTO.Hora_apertura_2[EVENTO.index[i]]
                hc = EVENTO.Hora_cierre_2[EVENTO.index[i]]
                # Only if the resource is active in the second day it is considered
                if ha.hour != 0 and hc != 0:
                    summa = 1 + summa + PREFERENCIAS[PREFERENCIAS['Id_Recurso'] == EVENTO.index[i]].Preferencia.values[0]
            elif individual[i] == 1:
                summa = 1 + summa + PREFERENCIAS[PREFERENCIAS['Id_Recurso'] == EVENTO.index[i]].Preferencia.values[0]
    return summa,

In [19]:
# Record the allowed operations
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=0, up=2, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [20]:
# Initial population
pop = toolbox.population(n=SIZE_POBLATION)
for i in range(0, SIZE_POBLATION):
    pop[i] =  creator.Individual(SIZE_INDIVIDUAL * [0])

In [21]:
# Evaluate the entire population
fitnesses = list(map(toolbox.evaluate, pop))

In [22]:
# Store the values of the fitness function for each individual
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit

In [23]:
# Function for calculating the stats each generation
def calculate_statistics(fits, g=0):
    print("-- Generation %i --" % g)   
    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean**2)**0.5
    print("  Min %s" % min(fits))
    print("  Max %s" % max(fits))
    print("  Avg %s" % mean)
    print("  Std %s" % std)

In [24]:
# Gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
calculate_statistics(fits)

-- Generation 0 --
  Min 0.0
  Max 0.0
  Avg 0.0
  Std 0.0


In [25]:
# Variable keeping track of the number of generations
g = 0

# Begin the evolution
while max(fits) < MAX_VALUE and g < NUM_GENERATIONS:
    # A new generation
    g = g + 1
    
    # Select the next generation individuals
    offspring = toolbox.select(pop, len(pop))
    
    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))
    
    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < CXPB:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < MUTPB:
            toolbox.mutate(mutant)
            del mutant.fitness.values
            
    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit
        
    pop[:] = offspring
    # Gather all the fitnesses in one list and print the stats
    fits = [ind.fitness.values[0] for ind in pop]
    calculate_statistics(fits, g)

-- Generation 1 --
  Min 0.0
  Max 36.0
  Avg 1.128
  Std 3.9728599270550684
-- Generation 2 --
  Min 0.0
  Max 36.0
  Avg 3.356
  Std 6.192032299657359
-- Generation 3 --
  Min 0.0
  Max 60.0
  Avg 7.479
  Std 8.437153489181052
-- Generation 4 --
  Min 0.0
  Max 60.0
  Avg 12.401
  Std 10.077906479026286
-- Generation 5 --
  Min 0.0
  Max 60.0
  Avg 16.401
  Std 12.269889934306665
-- Generation 6 --
  Min 0.0
  Max 64.0
  Avg 20.277
  Std 14.267525048164451
-- Generation 7 --
  Min 0.0
  Max 64.0
  Avg 24.029
  Std 15.837555335341374
-- Generation 8 --
  Min 0.0
  Max 72.0
  Avg 26.708
  Std 18.545423586426924
-- Generation 9 --
  Min 0.0
  Max 73.0
  Avg 32.757
  Std 19.56752286315261
-- Generation 10 --
  Min 0.0
  Max 73.0
  Avg 36.639
  Std 22.352285766784565
-- Generation 11 --
  Min 0.0
  Max 82.0
  Avg 40.611
  Std 24.243425479911046
-- Generation 12 --
  Min 0.0
  Max 84.0
  Avg 43.782
  Std 26.66177931046614
-- Generation 13 --
  Min 0.0
  Max 87.0
  Avg 47.397
  Std 28.02590

In [26]:
# Find and print best individual
best_index = fits.index(max(fits))
best_partial_individual = pop[best_index]
print(best_partial_individual)

[0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [36]:
# Print the selected resources by the genetic algorithm for the two days
for i in range(0,len(best_partial_individual)):
    if best_partial_individual[i] == 1:
        print(EVENTO.Id_Recurso[EVENTO.index[i]], EVENTO.Fecha_1[EVENTO.index[i]], EVENTO.Hora_apertura_1[EVENTO.index[i]], EVENTO.Hora_cierre_1[EVENTO.index[i]])

for i in range(0,len(best_partial_individual)):
    if best_partial_individual[i] == 2:
        print(EVENTO.Id_Recurso[EVENTO.index[i]], EVENTO.Fecha_2[EVENTO.index[i]], EVENTO.Hora_apertura_2[EVENTO.index[i]], EVENTO.Hora_cierre_2[EVENTO.index[i]])

2 2023-10-02 00:00:00 1900-01-01 11:00:00 1900-01-01 12:00:00
3 2023-10-02 00:00:00 1900-01-01 16:00:00 1900-01-01 17:00:00
26 2023-10-02 00:00:00 1900-01-01 19:00:00 1900-01-01 20:00:00
31 2023-10-02 00:00:00 1900-01-01 17:00:00 1900-01-01 18:00:00
35 2023-10-02 00:00:00 1900-01-01 10:00:00 1900-01-01 10:30:00
46 2023-10-02 00:00:00 1900-01-01 18:00:00 1900-01-01 18:30:00
54 2023-10-02 00:00:00 1900-01-01 20:00:00 1900-01-01 21:00:00
55 2023-10-02 00:00:00 1900-01-01 21:00:00 1900-01-01 22:00:00
88 2023-10-02 00:00:00 1900-01-01 12:00:00 1900-01-01 16:00:00
7 2023-11-02 00:00:00 1900-01-01 11:00:00 1900-01-01 12:00:00
8 2023-11-02 00:00:00 1900-01-01 16:00:00 1900-01-01 16:30:00
11 2023-11-02 00:00:00 1900-01-01 19:00:00 1900-01-01 20:00:00
96 2023-11-02 00:00:00 1900-01-01 20:00:00 1900-01-01 23:00:00


In [30]:
# Add the large duration resources
best_individual = copy.copy(best_partial_individual)
for value in list(EVENTO_HOURLESS.Id_Recurso):
    best_individual.insert(value,'#')

In [35]:
# Print the selected resources including short and long duration ones
for i in range(0,len(best_individual)):
    if best_individual[i] == '#' or best_individual[i] == 1:
        print(ORIGINAL_EVENTO.Id_Recurso[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Fecha_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_apertura_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_cierre_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Nombre[ORIGINAL_EVENTO.index[i]])
    
for i in range(0,len(best_individual)):
    if best_individual[i] == 2:
        print(ORIGINAL_EVENTO.Id_Recurso[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Fecha_2[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_apertura_2[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_cierre_2[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Nombre[ORIGINAL_EVENTO.index[i]])

2 2023-10-02 00:00:00 1900-01-01 11:00:00 1900-01-01 12:00:00 Firma de discos de Sabina
3 2023-10-02 00:00:00 1900-01-01 16:00:00 1900-01-01 17:00:00 Expositor Sabina
5 2023-10-02 00:00:00 1900-01-01 16:00:00 1900-01-01 22:00:00 Merchandisig Sabina
18 2023-10-02 00:00:00 1900-01-01 08:00:00 1900-01-01 20:00:00 Puesto de venta de merchandising pop español
19 2023-10-02 00:00:00 1900-01-01 08:00:00 1900-01-01 20:00:00 Expositor historia del pop español
26 2023-10-02 00:00:00 1900-01-01 19:00:00 1900-01-01 20:00:00 Concierto Tina & Joe
27 2023-10-02 00:00:00 1900-01-01 08:00:00 1900-01-01 20:00:00 Expositor jazz es futuro
31 2023-10-02 00:00:00 1900-01-01 17:00:00 1900-01-01 18:00:00 Concierto Maluma
34 2023-10-02 00:00:00 1900-01-01 08:00:00 1900-01-01 20:00:00 Mechandising Reggaeton
35 2023-10-02 00:00:00 1900-01-01 10:00:00 1900-01-01 10:30:00 Firma Kase.O
41 2023-10-02 00:00:00 1900-01-01 08:00:00 1900-01-01 20:00:00 Rap y sus barrios
46 2023-10-02 00:00:00 1900-01-01 18:00:00 1900-01