In [1]:
import random
import copy
import multiprocessing
import pandas as pd
import numpy as np
from deap import base
from deap import creator
from deap import tools
from datetime import datetime, timedelta

In [2]:
# Load files
file_name='RECINTO.csv'
RECINTO = pd.read_csv('./Data/'+file_name)
file_name='EVENTO.csv'
ORIGINAL_EVENTO = pd.read_csv('./Data/'+file_name)
file_name='PERFILES.csv'
PERFILES = pd.read_csv('./Data/'+file_name)
file_name='PREFERENCIAS_US1.csv'
PREFERENCIAS = pd.read_csv('./Data/'+file_name)

In [3]:
# Transform to date and hour formats
ORIGINAL_EVENTO['Hora_apertura_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_apertura_1'], format='%H:%M')
ORIGINAL_EVENTO['Hora_cierre_1'] = pd.to_datetime(ORIGINAL_EVENTO['Hora_cierre_1'], format='%H:%M')
ORIGINAL_EVENTO['Fecha_1'] = pd.to_datetime(ORIGINAL_EVENTO['Fecha_1'], format='%m/%d/%Y')

drop_list = []

# Exclude resources of more than 6 hours of duration
for i in range(0, max(ORIGINAL_EVENTO['Id_Recurso'])):
    
    # Get open time for the first resource.
    ha = ORIGINAL_EVENTO.Hora_apertura_1[i]
                        
    # Get close time for the first resource.
    hc = ORIGINAL_EVENTO.Hora_cierre_1[i]
                        
    diff_hour = (hc - ha)
    diff_hour = diff_hour / np.timedelta64(1, 'm')
    if diff_hour >= 360:
        drop_list.append(i)

# Get the two datasets (first with short durations and second with large duration)
EVENTO = ORIGINAL_EVENTO[~ORIGINAL_EVENTO['Id_Recurso'].isin(drop_list)]
EVENTO_HOURLESS = ORIGINAL_EVENTO[ORIGINAL_EVENTO['Id_Recurso'].isin(drop_list)]

In [4]:
# Size of the individual
SIZE_INDIVIDUAL = EVENTO.shape[0]

# Number of generations
NUM_GENERATIONS = 20

# Optimal value to obtain (not reached)
MAX_VALUE = SIZE_INDIVIDUAL + 10 * SIZE_INDIVIDUAL

# Poblation members
SIZE_POBLATION = 1000

# CXPB  is the probability with which two individuals are crossed
CXPB = 0.5

# MUTPB is the probability for mutating an individual
MUTPB = 0.2

In [5]:
# Define the fitness function for maximizing the result
creator.create("FitnessMax", base.Fitness, weights=(1.0,))

# Creation of the individual with the fitness function
creator.create("Individual", list, fitness=creator.FitnessMax)

In [6]:
toolbox = base.Toolbox()

# Attribute generator having only 0 or 1 in the body of the individual (no day or first day)
toolbox.register("attr_bool", random.randint, 0, 1)

# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, SIZE_INDIVIDUAL)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [7]:
# Enable multiprocessing
pool = multiprocessing.Pool()
toolbox.register("map", pool.map)

In [8]:
# Overlapping function to detect time problems in resources
def find_overlapping(individual):
    found = False
    i = 0
    while i < len(individual) and not found:
        # If the resource is selected for the algorithm.
        if individual[i] == 1:
            j = 0
            while j < len(individual)  and not found:
                if i != j and individual[j] == 1:
                    # Get date for the first resource.
                    date_1 = EVENTO.Fecha_1[EVENTO.index[i]]
                
                    # Get date for the second resource.
                    date_2 = EVENTO.Fecha_1[EVENTO.index[j]]

                    # Evaluate if the resource is in the same day.
                    if (date_1 - date_2).days == 0:
                        # Get open time for the first resource.
                        ha1 = EVENTO.Hora_apertura_1[EVENTO.index[i]]
            
                        # Get close time for the first resource.
                        hc1 = EVENTO.Hora_cierre_1[EVENTO.index[i]]

                        # Get open time for the second resource.
                        ha2 = EVENTO.Hora_apertura_1[EVENTO.index[j]]

                        # Get close time for the second resource.
                        hc2 = EVENTO.Hora_cierre_1[EVENTO.index[j]]
                        if (ha1 == ha2) or (ha2 < ha1 < hc2) or (ha1 < ha2 < hc1) or (ha2 < ha1 < hc1 < hc2) \
                            or (ha1 < ha2 < hc2 < hc1):
                            found = True
                j = j + 1
        i = i + 1

    return found

In [9]:
# Fitness function
def evalOneMax(individual):
    summa = 0
    if not find_overlapping(individual):
        summa = sum(individual)
        for i in range(0, len(individual)):
            if individual[i] == 1:
                summa = summa + PREFERENCIAS[PREFERENCIAS['Id_Recurso'] == EVENTO.index[i]].Preferencia.values[0]
    return summa,

In [10]:
best = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print(evalOneMax(best))

(49.0,)


In [11]:
best = [0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1]
if find_overlapping(best):
    print("Overlapping")
else:
    print("No overlapping")
    
evalOneMax(best)

Overlapping


(0,)

In [12]:
# Record the allowed operations
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [13]:
# Initial population
pop = toolbox.population(n=SIZE_POBLATION)
for i in range(0, SIZE_POBLATION):
    pop[i] =  creator.Individual(SIZE_INDIVIDUAL * [0])

In [14]:
# Evaluate the entire population
fitnesses = list(map(toolbox.evaluate, pop))

In [15]:
# Store the values of the fitness function for each individual
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit

In [16]:
# Function for calculating the stats each generation
def calculate_statistics(fits, g=0):
    print("-- Generation %i --" % g)   
    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean**2)**0.5
    print("  Min %s" % min(fits))
    print("  Max %s" % max(fits))
    print("  Avg %s" % mean)
    print("  Std %s" % std)

In [17]:
# Gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
calculate_statistics(fits)

-- Generation 0 --
  Min 0.0
  Max 0.0
  Avg 0.0
  Std 0.0


In [18]:
# Variable keeping track of the number of generations
g = 0

# Begin the evolution
while max(fits) < MAX_VALUE and g < NUM_GENERATIONS:
    # A new generation
    g = g + 1
    
    # Select the next generation individuals
    offspring = toolbox.select(pop, len(pop))
    
    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))
    
    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < CXPB:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < MUTPB:
            toolbox.mutate(mutant)
            del mutant.fitness.values
            
    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit
        
    pop[:] = offspring
    # Gather all the fitnesses in one list and print the stats
    fits = [ind.fitness.values[0] for ind in pop]
    calculate_statistics(fits, g)

-- Generation 1 --
  Min 0.0
  Max 38.0
  Avg 1.302
  Std 4.772085078872756
-- Generation 2 --
  Min 0.0
  Max 48.0
  Avg 4.074
  Std 7.7449676564850805
-- Generation 3 --
  Min 0.0
  Max 49.0
  Avg 8.563
  Std 9.837379274989857
-- Generation 4 --
  Min 0.0
  Max 53.0
  Avg 13.929
  Std 11.340456736833838
-- Generation 5 --
  Min 0.0
  Max 52.0
  Avg 17.44
  Std 12.856220284360406
-- Generation 6 --
  Min 0.0
  Max 52.0
  Avg 18.814
  Std 14.677241021390909
-- Generation 7 --
  Min 0.0
  Max 54.0
  Avg 20.631
  Std 16.386117264318596
-- Generation 8 --
  Min 0.0
  Max 58.0
  Avg 21.318
  Std 17.96393264293762
-- Generation 9 --
  Min 0.0
  Max 62.0
  Avg 23.528
  Std 19.402093082964015
-- Generation 10 --
  Min 0.0
  Max 62.0
  Avg 23.882
  Std 20.944881856911962
-- Generation 11 --
  Min 0.0
  Max 62.0
  Avg 26.483
  Std 21.93389411390508
-- Generation 12 --
  Min 0.0
  Max 65.0
  Avg 28.92
  Std 22.928837737661276
-- Generation 13 --
  Min 0.0
  Max 65.0
  Avg 29.749
  Std 24.0659094

In [19]:
# Find and print best individual
best_index = fits.index(max(fits))
best_partial_individual = pop[best_index]
print(best_partial_individual)

[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]


In [20]:
# Print the selected resources by the genetic algorithm for one day
for i in range(0,len(best_partial_individual)):
    if best_partial_individual[i] == 1:
        print(EVENTO.Id_Recurso[EVENTO.index[i]], EVENTO.Hora_apertura_1[EVENTO.index[i]], EVENTO.Hora_cierre_1[EVENTO.index[i]])

8 1900-01-01 16:00:00 1900-01-01 16:30:00
9 1900-01-01 17:00:00 1900-01-01 18:00:00
17 1900-01-01 19:00:00 1900-01-01 20:00:00
22 1900-01-01 11:00:00 1900-01-01 12:00:00
46 1900-01-01 18:00:00 1900-01-01 18:30:00
54 1900-01-01 20:00:00 1900-01-01 21:00:00
55 1900-01-01 21:00:00 1900-01-01 22:00:00
91 1900-01-01 12:00:00 1900-01-01 16:00:00
104 1900-01-01 10:00:00 1900-01-01 10:30:00


In [21]:
# Add the large duration resources
best_individual = copy.copy(best_partial_individual)
for value in list(EVENTO_HOURLESS.Id_Recurso):
    best_individual.insert(value,1)

In [22]:
# Print the selected resources including short and long duration ones
for i in range(0,len(best_individual)):
    if best_individual[i] == 1:
        print(ORIGINAL_EVENTO.Id_Recurso[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_apertura_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Hora_cierre_1[ORIGINAL_EVENTO.index[i]], ORIGINAL_EVENTO.Nombre[ORIGINAL_EVENTO.index[i]])

5 1900-01-01 16:00:00 1900-01-01 22:00:00 Merchandisig Sabina
8 1900-01-01 16:00:00 1900-01-01 16:30:00 Firma de discos de Bombay
9 1900-01-01 17:00:00 1900-01-01 18:00:00 Cocierto Bombay
17 1900-01-01 19:00:00 1900-01-01 20:00:00 Concierto de Arnau Griso
18 1900-01-01 08:00:00 1900-01-01 20:00:00 Puesto de venta de merchandising pop español
19 1900-01-01 08:00:00 1900-01-01 20:00:00 Expositor historia del pop español
22 1900-01-01 11:00:00 1900-01-01 12:00:00 Concierto DAMY SOUL
27 1900-01-01 08:00:00 1900-01-01 20:00:00 Expositor jazz es futuro
34 1900-01-01 08:00:00 1900-01-01 20:00:00 Mechandising Reggaeton
41 1900-01-01 08:00:00 1900-01-01 20:00:00 Rap y sus barrios
46 1900-01-01 18:00:00 1900-01-01 18:30:00 Firma Sevendust
48 1900-01-01 08:00:00 1900-01-01 20:00:00 Musica alternativa 80s
54 1900-01-01 20:00:00 1900-01-01 21:00:00 Concierto sinfonico Real Conservatorio Superior de Música de Madrid
55 1900-01-01 21:00:00 1900-01-01 22:00:00 Concierto coro Escuela Superior de Música