In [1]:
import random
import copy
import multiprocessing
import pandas as pd
import numpy as np
from deap import base
from deap import creator
from deap import tools
from datetime import datetime, timedelta

In [2]:
# Load files
path = '../Data/'
festival = 'IFEMA_Festival_Videojuegos_1/'
preferences = 'Preferencias/'
file_name= 'IFEMA_Festival_Videojuegos_v_5.csv'
ORIGINAL_EVENTO = pd.read_csv(path+festival+file_name)
file_name='PREFERENCIAS_US1.csv'
PREFERENCIAS = pd.read_csv(path+festival+preferences+file_name)
PREFERENCIAS = PREFERENCIAS.sort_values('Id_Recurso')
PREFERENCIAS.head(15)

Unnamed: 0,Id_Recurso,Preferencia
101,0,0.233295
88,1,0.358799
111,2,0.120212
102,3,0.227418
113,4,0.102221
107,5,0.184035
27,6,0.49482
114,7,0.044136
50,8,0.425795
89,9,0.358799


In [7]:
# Transform to date and hour formats
ORIGINAL_EVENTO['Hora_apertura_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_apertura_1'], format='%H:%M')
ORIGINAL_EVENTO['Hora_cierre_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_cierre_1'], format='%H:%M')
ORIGINAL_EVENTO['Fecha_1'] = pd.to_datetime(ORIGINAL_EVENTO['Fecha_1'], format='%d/%m/%Y')

drop_list = []

# Exclude resources of more than 6 hours of duration
for i in range(0, max(ORIGINAL_EVENTO['Código'])):
    
    # Get open time for the first resource.
    ha = ORIGINAL_EVENTO.Hora_apertura_1[i]
                        
    # Get close time for the first resource.
    hc = ORIGINAL_EVENTO.Hora_cierre_1[i]
                        
    diff_hour = (hc - ha)
    diff_hour = diff_hour / np.timedelta64(1, 'm')
    if diff_hour >= 360:
        drop_list.append(i)

# Get the two datasets (first with short durations and second with large duration)
EVENTO = ORIGINAL_EVENTO[~ORIGINAL_EVENTO['Código'].isin(drop_list)]
EVENTO_HOURLESS = ORIGINAL_EVENTO[ORIGINAL_EVENTO['Código'].isin(drop_list)]

In [8]:
# Size of the individual
SIZE_INDIVIDUAL = EVENTO.shape[0]

# Number of generations
NUM_GENERATIONS = 20

# Optimal value to obtain (not reached)
MAX_VALUE = SIZE_INDIVIDUAL + 10 * SIZE_INDIVIDUAL

# Poblation members
SIZE_POBLATION = 1000

# CXPB  is the probability with which two individuals are crossed
CXPB = 0.5

# MUTPB is the probability for mutating an individual
MUTPB = 0.2

In [9]:
# Define the fitness function for maximizing the result
creator.create("FitnessMax", base.Fitness, weights=(1.0,))

# Creation of the individual with the fitness function
creator.create("Individual", list, fitness=creator.FitnessMax)

In [10]:
toolbox = base.Toolbox()

# Attribute generator having only 0 or 1 in the body of the individual (no day or first day)
toolbox.register("attr_bool", random.randint, 0, 1)

# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, SIZE_INDIVIDUAL)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [11]:
# Enable multiprocessing
pool = multiprocessing.Pool()
toolbox.register("map", pool.map)

In [12]:
# Overlapping function to detect time problems in resources
def find_overlapping(individual):
    found = False
    i = 0
    while i < len(individual) and not found:
        # If the resource is selected for the algorithm.
        if individual[i] == 1:
            j = 0
            while j < len(individual)  and not found:
                if i != j and individual[j] == 1:
                    # Get date for the first resource.
                    date_1 = EVENTO.Fecha_1[EVENTO.index[i]]
                
                    # Get date for the second resource.
                    date_2 = EVENTO.Fecha_1[EVENTO.index[j]]

                    # Evaluate if the resource is in the same day.
                    if (date_1 - date_2).days == 0:
                        # Get open time for the first resource.
                        ha1 = EVENTO.Hora_apertura_1[EVENTO.index[i]]
            
                        # Get close time for the first resource.
                        hc1 = EVENTO.Hora_cierre_1[EVENTO.index[i]]

                        # Get open time for the second resource.
                        ha2 = EVENTO.Hora_apertura_1[EVENTO.index[j]]

                        # Get close time for the second resource.
                        hc2 = EVENTO.Hora_cierre_1[EVENTO.index[j]]
                        if (ha1 == ha2) or (ha2 < ha1 < hc2) or (ha1 < ha2 < hc1) or (ha2 < ha1 < hc1 < hc2) \
                            or (ha1 < ha2 < hc2 < hc1):
                            found = True
                j = j + 1
        i = i + 1

    return found

In [13]:
# Fitness function
def evalOneMax(individual):
    summa = 0
    if not find_overlapping(individual):
        summa = sum(individual)
        for i in range(0, len(individual)):
            if individual[i] == 1:
                summa = summa + PREFERENCIAS[PREFERENCIAS['Id_Recurso'] == EVENTO.index[i]].Preferencia.values[0]
    return summa,

In [14]:
best = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print(evalOneMax(best))

(0,)


In [15]:
best = [0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1]
if find_overlapping(best):
    print("Overlapping")
else:
    print("No overlapping")
    
evalOneMax(best)

Overlapping


(0,)

In [16]:
# Record the allowed operations
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [17]:
# Initial population
pop = toolbox.population(n=SIZE_POBLATION)
for i in range(0, SIZE_POBLATION):
    pop[i] =  creator.Individual(SIZE_INDIVIDUAL * [0])

In [18]:
# Evaluate the entire population
fitnesses = list(map(toolbox.evaluate, pop))

In [19]:
# Store the values of the fitness function for each individual
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit

In [20]:
# Function for calculating the stats each generation
def calculate_statistics(fits, g=0):
    print("-- Generation %i --" % g)   
    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean**2)**0.5
    print("  Min %s" % min(fits))
    print("  Max %s" % max(fits))
    print("  Avg %s" % mean)
    print("  Std %s" % std)

In [21]:
# Gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
calculate_statistics(fits)

-- Generation 0 --
  Min 0.0
  Max 0.0
  Avg 0.0
  Std 0.0


In [22]:
# Variable keeping track of the number of generations
g = 0

# Begin the evolution
while max(fits) < MAX_VALUE and g < NUM_GENERATIONS:
    # A new generation
    g = g + 1
    
    # Select the next generation individuals
    offspring = toolbox.select(pop, len(pop))
    
    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))
    
    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < CXPB:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < MUTPB:
            toolbox.mutate(mutant)
            del mutant.fitness.values
            
    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit
        
    pop[:] = offspring
    # Gather all the fitnesses in one list and print the stats
    fits = [ind.fitness.values[0] for ind in pop]
    calculate_statistics(fits, g)

-- Generation 1 --
  Min 0.0
  Max 6.799532100159175
  Avg 0.24481950637794198
  Std 0.9218799934290677
-- Generation 2 --
  Min 0.0
  Max 7.859313742841409
  Avg 0.7272498286408177
  Std 1.425997775116984
-- Generation 3 --
  Min 0.0
  Max 8.085098552494307
  Avg 1.531317172717559
  Std 1.7479094598819829
-- Generation 4 --
  Min 0.0
  Max 8.085098552494307
  Avg 2.320364557299773
  Std 2.0453418454892796
-- Generation 5 --
  Min 0.0
  Max 8.361887762222404
  Avg 2.825692057504132
  Std 2.269343778476485
-- Generation 6 --
  Min 0.0
  Max 8.665201133971456
  Avg 3.406470281538461
  Std 2.463083687845451
-- Generation 7 --
  Min 0.0
  Max 10.06968003375116
  Avg 3.623958621060933
  Std 2.7914702064459793
-- Generation 8 --
  Min 0.0
  Max 10.127406203069974
  Avg 4.061983880262568
  Std 3.035773308487398
-- Generation 9 --
  Min 0.0
  Max 11.391096849250145
  Avg 4.31534989552207
  Std 3.2229953965431255
-- Generation 10 --
  Min 0.0
  Max 11.391096849250145
  Avg 4.575068901137177
  S

In [23]:
# Find and print best individual
best_index = fits.index(max(fits))
best_partial_individual = pop[best_index]
print(best_partial_individual)

[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]


In [25]:
# Print the selected resources by the genetic algorithm for one day
for i in range(0,len(best_partial_individual)):
    if best_partial_individual[i] == 1:
        print(EVENTO.Código[EVENTO.index[i]], EVENTO.Hora_apertura_1[EVENTO.index[i]], EVENTO.Hora_cierre_1[EVENTO.index[i]])

3 1900-01-01 11:15:00 1900-01-01 11:45:00
6 1900-01-01 10:00:00 1900-01-01 11:00:00
17 1900-01-01 16:00:00 1900-01-01 16:30:00
25 1900-01-01 12:00:00 1900-01-01 12:30:00
26 1900-01-01 12:45:00 1900-01-01 13:15:00
37 1900-01-01 00:00:00 1900-01-01 00:00:00
41 1900-01-01 13:30:00 1900-01-01 14:00:00
43 1900-01-01 17:30:00 1900-01-01 19:00:00
51 1900-01-01 19:00:00 1900-01-01 19:30:00
52 1900-01-01 19:30:00 1900-01-01 21:00:00
69 1900-01-01 15:30:00 1900-01-01 16:00:00


In [27]:
# Add the large duration resources
best_individual = copy.copy(best_partial_individual)
for value in list(EVENTO_HOURLESS.Código):
    best_individual.insert(value,1)

In [28]:
# Print the selected resources including short and long duration ones
for i in range(0,len(best_individual)):
    if best_individual[i] == 1:
        print(ORIGINAL_EVENTO.Código[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_apertura_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_cierre_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Nombre[ORIGINAL_EVENTO.index[i]])

3 1900-01-01 11:15:00 1900-01-01 11:45:00 Freeplay + Inscripciones Smash
5 1900-01-01 15:00:00 1900-01-01 21:00:00 Road to Gamergy: Smash Bros Ultimate Open
6 1900-01-01 10:00:00 1900-01-01 11:00:00 Inscripciones Pokémon Unite
17 1900-01-01 16:00:00 1900-01-01 16:30:00 Inscripciones Death Match
24 1900-01-01 12:00:00 1900-01-01 20:00:00 National League CSGO
25 1900-01-01 12:00:00 1900-01-01 12:30:00 Valorant (Fiebre de la Spike) 1v1 Treintadosavos de Final Bo1
26 1900-01-01 12:45:00 1900-01-01 13:15:00 Valorant (Fiebre de la Spike) 1v1 Dieciseisavos de Final Bo1
37 1900-01-01 00:00:00 1900-01-01 00:00:00 Open Fortnite 2
41 1900-01-01 13:30:00 1900-01-01 14:00:00 Lucha de Sables
43 1900-01-01 17:30:00 1900-01-01 19:00:00 Cosplay Concurso
51 1900-01-01 19:00:00 1900-01-01 19:30:00 Concierto Delaossa Amazon Music
52 1900-01-01 19:30:00 1900-01-01 21:00:00 Concierto Amazon Music
69 1900-01-01 15:30:00 1900-01-01 16:00:00 Presentación C1b3r Wall Policia Nacional
73 1900-01-01 10:00:00 1900-