In [3]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import pyproj 
from tqdm import tqdm
import reverse_geocoder as rg

from shapely.geometry import Point
from shapely.geometry import LineString

from preprocessing.pre_process_stations import *
from preprocessing.pre_process_traffic import *
from preprocessing.helping_functions import *

from features.config import *
from features.financials_part_2 import *
from features.question_1 import *
from features.question_2 import *

In [4]:
import warnings

warnings.filterwarnings("ignore")

In [5]:
class Config:
    PATH = '../../data/'

class Params:
    PARAM = 0
config = Config()
p = Params()

## Load data

In [7]:
##### Load new coordinates
df_new_points = gpd.read_file(config.PATH+'new_coordinates/new_coordinates.shp')

##### Load hub data 
df_hub_dense = gpd.read_file(config.PATH+'F-aire-logistiques-donnees-detaillees/Aires_logistiques_denses.shp')
df_hub_enlarged = gpd.read_file(config.PATH+'F-aire-logistiques-donnees-detaillees/Aires_logistiques_elargies.shp')

DriverError: ../../data/new_coordinates/new_coordinates.shp: No such file or directory

In [6]:
##### Load traffic data
df_traffic = gpd.read_file(config.PATH+'E-tmja2019-shp/TMJA2019.shp')
df_traffic = preprocess_data(df_traffic)
df_traffic = fix_tmja(df_traffic)

Loading formatted geocoded file...


In [7]:
df_new_points = distance_to_hub(df_hub_dense, df_hub_enlarged, df_new_points)

100%|██████████| 13978/13978 [01:40<00:00, 139.07it/s]


In [8]:
df_new_points = station_distances_all(df_new_points)

100%|██████████| 13978/13978 [04:44<00:00, 49.17it/s]


## Genetic algorithm deap library


In [9]:
df_stations = df_new_points.merge(df_traffic, how='left', on='route')

: 

: 

In [None]:
df_stations = sales(df_stations,2030)

In [None]:
import random
import numpy as np
from deap import base, creator, tools

# Define the fitness function that takes a pandas DataFrame as input and returns a fitness score
def fitness(X,data: pd.DataFrame=df_stations):
    index = X[0]

    if type(index)==int:
        print('error')
        index = np.random.choice(list(range(13978)), size=376, replace=False)

    columns_distance = [c for c in data.columns if c.startswith('distance_to_point_')==True]
    print(len(columns_distance))
    if data.iloc[index,:].shape[0]>0:
        columns_distance_drop = [columns_distance[i] for i in index]
        data_sub = get_closer_station(data.iloc[index,:].drop(columns=columns_distance_drop))
        data_sub = data_sub[data_sub['distance_closer_station'] < 150000*0.8]
        # constraint profitability: make sure the station is profitable 
        fit = - (data_sub['distance_to_closest_large_hub'].mean() + data_sub['distance_to_closest_dense_hub'].mean() - data_sub['distance_closer_station'].mean())/100 + data_sub['Revenues_day'].mean()

    else:
        fit = 10e10
        
    return fit

# Define the evaluation function
def evaluate(individual):
    return fitness(individual),

# Set up the DEAP toolbox
creator.create("FitnessMax", base.Fitness, weights=[1.0,])
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()

# Register the genetic operators
toolbox.register("individual", tools.initRepeat, creator.Individual, lambda: np.random.choice(list(range(3089)), size=376, replace=False),n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=0, up=3, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Set the number of generations and the population size
num_generations = 50
population_size = 50

# Create the initial population
population = toolbox.population(n=population_size)

# Evaluate the initial population
fitnesses = list(map(toolbox.evaluate, population))
for ind, fit in zip(population, fitnesses):        
    ind.fitness.values = fit

# Set up the hall of fame and statistics objects
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("max", np.max)
stats.register("min", np.min)
stats.register("argmin", np.argmin)

avg_fitness = []
min_fitness = []
max_fitness = []
arg_min_fitness = []

# Start the evolution process
for generation in range(num_generations):
    
    population = [x for x in population if type(x)!=int]

    # Select the next generation individuals
    offspring = toolbox.select(population, len(population))

    # Clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))

    # Apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if len(child1[0])==1:
            child1[0] = np.random.choice(list(range(3089)), size=376, replace=False)
        elif len(child1[0])==1:
            child2[0] = np.random.choice(list(range(3089)), size=376, replace=False)
        else:
            toolbox.mate(child1[0], child2[0])
        del child1.fitness.values
        del child2.fitness.values

    for mutant in offspring:
        toolbox.mutate(mutant)
        del mutant.fitness.values

    # Evaluate the new individuals
    fresh_individuals = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, fresh_individuals)
    for ind, fit in zip(fresh_individuals, fitnesses):
        ind.fitness.values = fit

    # Add the new individuals to the population
    population[:] = offspring

    # Update the hall of fame and statistics
    #hof.update(population)
    record = stats.compile(population)
    print("Generation {}:".format(generation + 1))
    avg_fitness.append(record["avg"])
    min_fitness.append(record["min"])
    max_fitness.append(record["max"])
    arg_min_fitness.append(record["argmin"])

In [11]:
fig = plt.figure(figsize=(10,6))
plt.plot(np.arange(len(avg_fitness)),avg_fitness,label='mean')
plt.xlabel('Generations')
plt.ylabel('Average fitness function')
plt.legend()
plt.title('Evolution of fitness function')
plt.show()

NameError: name 'avg_fitness' is not defined

<Figure size 720x432 with 0 Axes>

In [427]:
results = pd.DataFrame(population) # the best individual per generation

In [428]:
results

Unnamed: 0,0
0,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
1,3
2,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
3,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
4,1
5,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
6,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
7,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
8,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."
9,"[625, 1820, 2820, 2110, 2458, 2556, 2018, 2354..."


In [429]:
results.to_csv(config.PATH+'results_part_2.csv')

In [431]:
best_gen = np.argmin(min_fitness)
best_gen

In [375]:
arg_min_fitness[best_gen]

7

In [None]:
list_of_station_points = pd.Series(results.iloc[best_gen].values[0].replace('[ ', '').replace(']', '').replace('\n', '').replace('   ', ' ').replace('  ', ' ').split(' ')).astype(int)

In [None]:
### NEXT STEPS

# optimisation sur les valeurs des coordonnées des routes qu'on a choisi 
# (pour optimiser sur ces valeurs et non sur les stations existantes)

# fonction qui mesure distance entre deux stations sur reseaux routier (et non a vol d'oiseau)
