# Distance-based index to find the best mutualisations for itineraires
to see it rendered visit https://nbviewer.org/github/Universite-Gustave-Eiffel/geodev-logicout/blob/Thiago/notebooks/index_distance.ipynb

In [52]:
import sys  
sys.path.insert(0, '../scripts')
import indexes, use_data, IsInclude
from shapely import wkt
import geopandas as gpd
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


### To complement the index of similarity made with the Convex hulls we will use an index based on the distance between the two itineraires

This index is as follows:


$$
  \frac{\text{Distance betweem starting points} * \text{Distance between farthest points}}
  {\text {Normalization parameter}}\ 
$$

Where the normalization parameter is : $$ \pi * \text{(legal radius)}^2  $$


We will also use the jaacard index to weight the index distance:

Index with jaacard = Distance index * (1 - Jaacard's index)

We will take the same itineraire used as an example for the convex hull and use the same parameters for the algorithm

In [53]:
# parameters for the geodataframe
parameters = {
'radius':100000,
'buffer_hull':1000,
"type":1,
"area" : np.pi*100000**2
}

#We charge the dataframe
geo_df= use_data.create_gdf('simulations_reel_gdf.csv','cheflieu')

We will thake the best ranked mutualisations according to the above index

In [72]:

def mutualisations_with_index(sample_itineraire,geodataframe,radius,buffer_hull,type,area):
    
    sample_itineraire = use_data.get_itineraire(sample_itineraire,geodataframe)
    gdf = IsInclude.IsIn_tournee_gdf(sample_itineraire,geodataframe,radius,type) #verify all the mutualisables itineraires

    gdf = indexes.jacaard_index(sample_itineraire, gdf,buffer_hull) # we apply the jaacard index

    gdf = indexes.dist_start(sample_itineraire,gdf) # we calculate the distance between their starting points

    gdf = indexes.max_distance(sample_itineraire,gdf) # We calculate the maximum distance between the itineraire and his mutualisables counterparts

    gdf = indexes.index(gdf,area) # We calculate the index of distance

    return gdf

sample_gdf= mutualisations_with_index(473,geo_df,**parameters)


Let's have a look at the three best ranked mutualisations

In [73]:
best_mutualisations = sample_gdf[['id_simulation_right','index','index_with_jaacard']].sort_values(by='index_with_jaacard').head()
best_mutualisations



Unnamed: 0,id_simulation_right,index,index_with_jaacard
2,17099,0.047572,0.047559
2,9931,0.081706,0.054081
2,9935,0.081706,0.054081
2,3585,0.137131,0.116532
2,5825,0.125408,0.125408


We can plot those mutualisations to inspect if visually they are coherent

In [90]:
def plot_mutualisations(id, dataframe,**parameters):
    sample = use_data.get_itineraire(id,dataframe)
    sample_gdf = mutualisations_with_index(id, dataframe,**parameters)
    best_mutualisations = sample_gdf[['id_simulation_right','index','index_with_jaacard']].sort_values(by='index_with_jaacard').head()
    best_mutualisations = best_mutualisations.sort_values(by='id_simulation_right')
    best_mutualisations_ind = best_mutualisations['index_with_jaacard'].values.tolist()
    best_mutualisations_ind.append(0)
    itineraires_to_plot = best_mutualisations['id_simulation_right'].values
    best_mutualisations_gdf = geo_df[geo_df['id_simulation'].isin(itineraires_to_plot)]
    best_mutualisations_gdf = gpd.GeoDataFrame( pd.concat( [best_mutualisations_gdf,sample], ignore_index=True) )
    best_mutualisations_gdf['final_index']= best_mutualisations_ind
    best_mutualisations_gdf=gpd.GeoDataFrame(best_mutualisations_gdf,geometry=best_mutualisations_gdf['start'].map(wkt.loads))
    m= best_mutualisations_gdf.explore(tiles='CartoDB positron',cmap = "plasma",column='final_index',style_kwds=dict(fill=False, stroke=True,weight=4))
    best_mutualisations_gdf=gpd.GeoDataFrame(best_mutualisations_gdf,geometry=best_mutualisations_gdf['itineraire'].map(wkt.loads))
    m= best_mutualisations_gdf.explore(m=m,tiles='CartoDB positron',cmap = "plasma",column='final_index',categorical=True,style_kwds=dict(fill=False, stroke=True,weight=3))
    
    return best_mutualisations,m

In [91]:
table,map = plot_mutualisations(10826,geo_df,**parameters) #just change the number for any other desired id

With this function we can choose any simulation of the database and see their best mutualisation candidates according to the presented index
    


In [92]:

table

Unnamed: 0,id_simulation_right,index,index_with_jaacard
542,9314,0.0,0.0
542,9322,0.0,0.0
542,9328,0.0,0.0
542,9339,0.0,0.0
542,9394,0.005817,0.004884


In [93]:
map

Because of the formule Starting Distance * Max distance, if the starting points of two itineraires are the same, the index will be equal to 0

In [94]:
def plot_mutualisations_non_negative_index(id, dataframe,**parameters):
    sample = use_data.get_itineraire(id,dataframe)
    sample_gdf = mutualisations_with_index(id, dataframe,**parameters)
    sample_gdf=sample_gdf[sample_gdf['index_with_jaacard']!=0]
    best_mutualisations = sample_gdf[['id_simulation_right','index','index_with_jaacard']].sort_values(by='index_with_jaacard').head()
    best_mutualisations = best_mutualisations.sort_values(by='id_simulation_right')
    best_mutualisations_ind = best_mutualisations['index_with_jaacard'].values.tolist()
    best_mutualisations_ind.append(0)
    itineraires_to_plot = best_mutualisations['id_simulation_right'].values
    best_mutualisations_gdf = geo_df[geo_df['id_simulation'].isin(itineraires_to_plot)]
    best_mutualisations_gdf = gpd.GeoDataFrame( pd.concat( [best_mutualisations_gdf,sample], ignore_index=True) )
    best_mutualisations_gdf['final_index']= best_mutualisations_ind
    best_mutualisations_gdf=gpd.GeoDataFrame(best_mutualisations_gdf,geometry=best_mutualisations_gdf['start'].map(wkt.loads))
    m= best_mutualisations_gdf.explore(tiles='CartoDB positron',cmap = "plasma",column='final_index',style_kwds=dict(fill=False, stroke=True,weight=4))
    best_mutualisations_gdf=gpd.GeoDataFrame(best_mutualisations_gdf,geometry=best_mutualisations_gdf['itineraire'].map(wkt.loads))
    m= best_mutualisations_gdf.explore(m=m,tiles='CartoDB positron',cmap = "plasma",column='final_index',categorical=True,style_kwds=dict(fill=False, stroke=True,weight=3))
    
    return best_mutualisations,m

In [95]:
table_not_null, map_not_null = plot_mutualisations_non_negative_index(10826,geo_df,**parameters)

In [96]:
table_not_null

Unnamed: 0,id_simulation_right,index,index_with_jaacard
542,9316,0.01226,0.010226
542,9394,0.005817,0.004884
542,9397,0.005491,0.005491
542,9401,0.005014,0.00501
542,10737,0.01226,0.010226


In [97]:
map_not_null

In [None]:
#to save this file run this cell with the filename you want
map_not_null.save('10826.html')