In [1]:
import numpy as np
import pandas as pd 
import geneticalgorithm
from geneticalgorithm import geneticalgorithm as ga
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import pyproj 
from tqdm import tqdm

from shapely.geometry import Point
from shapely.geometry import LineString

from preprocessing.pre_process_stations import *

In [2]:
import warnings

warnings.filterwarnings("ignore")

In [3]:
class Config:
    PATH = '../../data/'

class Params:
    PARAM = 0
config = Config()
p = Params()

## Load data

In [4]:
##### Load and preprocess station data 

df_stations = pd.read_csv(config.PATH+'I-Données de stations TE_DV.xlsx - export_data_te.csv')
df_stations = create_coordinate_column_station(df_stations)
df_stations = transform_station_coordinates(df_stations)

In [5]:
##### Load hub data 

df_hub = gpd.read_file(config.PATH+'F-aire-logistiques-donnees-detaillees/Aires_logistiques_denses.shp')

In [6]:
##### Load traffic data

df_traffic = gpd.read_file(config.PATH+'E-tmja2019-shp/TMJA2019.shp')

### Distance to closer station

In [7]:
### THIS ONE SHOULD WORK because coordinates of stations in same projection

def station_distances(
        df_stations: pd.DataFrame
):
    '''
    Input:
    - df_stations: dataframe of stations with 'Point_coordinate' column in pseudo Lambert-93 projection

    Returns: 
    df_stations dataframe with the new columns:
    - Distance_closer_station: distance to closest other station for each station (using the distance geopandas function)
    - Closer_station_by_index: the index of the closest station (can check its id later)

    The geopandas distance function computes the shortest distance between Point and Point
    '''

    nb_stations = df_stations.shape[0]
    df_stations['Point_coordinate'] = df_stations['Coordinate_transform'].apply(lambda x: Point(x))
    df_stations['Distance_closer_station'] = np.zeros(nb_stations)
    df_stations['Closer_station'] = np.zeros(nb_stations)
    for j in tqdm(range(nb_stations)):
        distance_point_point = []
        for i in range(nb_stations):
            if i != j:
                point = df_stations.loc[i,'Point_coordinate']
                distance_point_point.append(df_stations.loc[j,'Point_coordinate'].distance(point))
            # exclude the distance to its own station
            else:
                distance_point_point.append(1e10)
        df_stations.loc[j,'Distance_closer_station'] = np.min(np.array(distance_point_point))
        df_stations.loc[j,'Closer_station_by_index'] = np.argmin(np.array(distance_point_point))

    return df_stations

In [8]:
df_stations = station_distances(df_stations)

100%|██████████| 3728/3728 [16:15<00:00,  3.82it/s]  


In [15]:
df_stations['Closer_station_by_index'].value_counts

<bound method IndexOpsMixin.value_counts of 0       3569.0
1        623.0
2        285.0
3       3375.0
4         47.0
         ...  
3723    3720.0
3724    3327.0
3725    3495.0
3726    1438.0
3727    2525.0
Name: Closer_station_by_index, Length: 3728, dtype: float64>

### Distance to closer route

In [13]:
### TO DO: GUS
# make sure the stations coordinates and the routes' AND hubs' geometry column are in the same projection
# transform the distances in projection Lambert-93 into KILOMETERS

def compute_route_hub_distance(
        df_stations: pd.DataFrame,
        df_geometry:  pd.DataFrame,
        route: bool
        ):
    
    '''
    Input:
    - df_stations: dataframe of stations 
    - df_geometry: dataframe with 'geometry' column which is a geopandas object 
                   Polygon for hub, Linestring or Multistring for routes

    Returns: 
    df_stations dataframe with the new columns:
    - Distance_closer_route: distance to closest route for each station (using the distance geopandas function)
    - Closer_route_by_index: the index of the closest route (can check its id later)

    The geopandas distance function computes the shortest distance between point and Linestring, Polygon, etc.
    '''

    nb_stations = df_stations.shape[0]
    if route:
        distance_column_name = 'Distance_closer_route'
        route_column_name = 'Closer_route_by_index'
    else:
        distance_column_name = 'Distance_closer_hub'
        route_column_name = 'Closer_hub_by_index'

    df_stations['Point_coordinate'] = df_stations['Coordinate_transform'].apply(lambda x: Point(x))
    df_stations[distance_column_name] = np.zeros(nb_stations)
    df_stations[route_column_name] = np.zeros(nb_stations)

    for j in tqdm(range(nb_stations)):
        distance_point_line = []
        for i in range(df_geometry.shape[0]):
            route = df_geometry.loc[i,'geometry']
            distance_point_line.append(df_stations.loc[j,'Point_coordinate'].distance(route))
        df_stations.loc[j,distance_column_name] = min(distance_point_line)
        df_stations.loc[j,route_column_name] = np.argmin(np.array(distance_point_line))

    return df_stations

In [11]:
# For routes 
df_stations = compute_route_hub_distance(df_stations,df_traffic,route=True)
df_stations['Closer_route_by_index'].value_counts()

100%|██████████| 3728/3728 [15:34<00:00,  3.99it/s]


322.0     3657
347.0       63
3765.0       7
2674.0       1
Name: Closer_route_by_index, dtype: int64

In [14]:
# For hubs 
df_stations = compute_route_hub_distance(df_stations,df_hub,route=False)
df_stations['Closer_hub_by_index'].value_counts()

100%|██████████| 3728/3728 [01:10<00:00, 52.98it/s]


24.0     3657
236.0      70
295.0       1
Name: Closer_hub_by_index, dtype: int64

### Quantity sold per day

In [16]:
### Data of operations from slides 

df_g = pd.read_csv(config.PATH+'G-operation-of-stations.csv')
df_g.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Small - 1 tpd,Medium - 2 tpd,Large - 4 tpd
0,Capex (installed equipment),m€,3,5,8
1,Depreciation,y,15,15,15
2,Yearly Opex,% of Capex,10,8,7
3,Storage Onsite,tH2,2,3,4
4,Construction time,y,1,1,1


In [None]:
df_stations['Quantity_sold_per_day(in kg)'] = np.zeros(df_stations.shape[0])

### TO DO 

# Estimation of number of trucks passing per station 
# HOW ? same model as region but per route and then per station

# Transform number of trucks into quantity of H2 sold based on:
# reservoir size (based on brands)
# revervoir persentage filled --> take ... % of reservoir size 

### Capacity 

In [None]:
df_stations['Capacity_1'] = (df_stations['Quantity_sold_per_day(in kg)'])/1000 
df_stations['Capacity_2'] = (df_stations['Quantity_sold_per_day(in kg)'])/2000 
df_stations['Capacity_3'] = (df_stations['Quantity_sold_per_day(in kg)'])/4000 

### Profitability

In [None]:
## based on threshold : 0 or 1
df_stations['Profitability_1'] = np.zeros(df_stations.shape[0])
df_stations[df_stations['Capacity_1'] > 0.9]['Profitability_1'] = 1

df_stations['Profitability_2'] = np.zeros(df_stations.shape[0])
df_stations[df_stations['Capacity_2'] > 0.8]['Profitability_2'] = 1

df_stations['Profitability_3'] = np.zeros(df_stations.shape[0])
v[df_stations['Capacity_3'] > 0.6]['Profitability_3'] = 1

In [None]:
## based on revenue and cost: in m euros 
# check online or ask their teams
price_per_kg_h2 = ### TO DO : CES 

df_stations['Profitability_1_net'] = 3 - df_stations['Quantity_sold_per_day(in kg)']*price_per_kg_h2

df_stations['Profitability_2_net'] = 5 - df_stations['Quantity_sold_per_day(in kg)']*price_per_kg_h2

df_stations['Profitability_3_net'] = 8 - df_stations['Quantity_sold_per_day(in kg)']*price_per_kg_h2

## Define the optimization problem

In [None]:
# Add constraint: 
max_nb_station = ### EXTRACT FROM PART 1


def fitness(X,max_nb_station,max_km):
    
    fit = 0

    # constraint 1: number stations == result from part 1
    if np.sum(X)==max_nb_station:
        
        # constraint 2: distance between stations < min(autonomy)*0.8
        if  < max_km*0.8:
        
            # constraint 3: make sure the station is profitable 
            if profit > 0:
            # end constraint 2


        # end constraint 2
        else:
            fit = 10e10000

    # end constraint 1
    else:
        fit = 10e10000
        
    return fit

varbound = np.array([[1,2,4]]*nb_stations) # binary variables for existing stations

model=ga(function=fitness,dimension=nb_stations,variable_type='int',variable_boundaries=varbound)

model.run()

In [None]:
# parameters to tune 

algorithm_param = {'max_num_iteration': None,\
                   'population_size':100,\
                   'mutation_probability':0.1,\
                   'elit_ratio': 0.01,\
                   'crossover_probability': 0.5,\
                   'parents_portion': 0.3,\
                   'crossover_type':'uniform',\
                   'max_iteration_without_improv':None}

In [None]:
### NEXT STEPS

# optimisation sur les valeurs des coordonnées des routes qu'on a choisi 
# (pour optimiser sur ces valeurs et non sur les stations existantes)

# fonction qui mesure distance entre deux stations sur reseaux routier (et non a vol d'oiseau)
