# Version initiale pour l'Agglo de Montpellier basé sur la v4 de Mulhouse

### Parameters

In [1]:

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.options.mode.chained_assignment = None
import geopandas as gpd
from pathlib import Path
import folium
from shapely.geometry import Polygon, Point, LineString
import warnings #to ignore parket for gdf warning 
warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

#Parameters 
overwrite = True
showmap = True



In [15]:

version = 1
out_filename = "best_stations_" + str(version)+ ".csv"

data_sources_paths_fr =  {
    "populationDataset_fr": Path(r"data/in/base-ic-evol-struct-pop-2017.csv"),
    "shapes_IRIS_2013" : Path(r"data/in/iris-2013-01-01.zip"),
    "bemo_stations": Path(r"data/in/bemo2112_fr.csv"),
    "sirene": Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\AO\mul\datas\in\m2a_base-sirene-v3-consolidee-insee.geojson"),
    "m2a_communes" : Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\ao\mul\datas\m2a_communes-sur-le-territoire-m2a.geojson"),
    
    "bemo_stations1":Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\ao\mul\datas\in\stations_supp_aomul220506.csv" ),
    "bemo_stations2":Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\ao\mul\datas\interim\Gagnantsfiltres220510.csv" ),
    "bemo_stations3":Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\ao\mul\datas\interim\gagnants_filtres220517.csv" ),
    
    "results":Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\ao\mul\datas\interim\gagnants_consolides220517.csv" ),
     
 }

data_sources_paths_fr_interim = {
    "shapes_path_fr" : Path(r"data/interim/shapes_fr.pq"),
    "iris_p17": Path(r"data/interim/iris_p17_interim.pq"),
    "sirene": Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\AO\mul\datas\interim\sirene_interim.pq"),
}

top_cities  = {'Mulhouse' : ( 7.335908, 47.7508), 'Montpellier' : ( 3.876716, 43.610769)}
city =  'Montpellier'
points = 42 #Number of samples for each simulation run

radius = 0.01 #buffers size
x_range, y_range = 0.3, 0.2 #limits of the bounding box (lon , lat)
#crs = 4326 # Degrees 
crs_proj = 2154 #2 d projections france
crs_joins = 4326 #For joining geodatagrames its better to use same crs
#crs = 3035
country_col_name = {"FR" : "IRIS" } #we have to change data based on the country

country = 'FR'


logs_path = Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\AO\mul\datas\interim\logs.pq")
output_path = Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\AO\mul\datas\interim\best_stations4.csv")
output_path_comm = Path(r"C:\Users\J0102113\Documents\a-SIGetWeb\AO\mul\datas\interim\gagnantsgen4.csv")


In [3]:

class attraction(): 
  ''' Generate points and shapes for the simulations'''

  def generate_rectangle(rect_center: list, x_range: float  , y_range :float )-> dict: 
    '''We genereate a rectangle around the point '''
    limits = {'xmin' : rect_center[0]-x_range, 'xmax' : rect_center[0]+ x_range, 
              'ymin' : rect_center[1]-y_range,  'ymax' : rect_center[1]+ y_range}
    print(f"Limits : {limits}")          
    return limits

  def convert_to_gdf(limits: dict, crs : int)-> gpd.GeoDataFrame(): 
    '''We generate a one line geodataframe to facilitate geo intersection with other gdfs.
    Limits must possess xmin, xmax, ymin, ymax keys''' 
    polys1 = gpd.GeoSeries([Polygon([(limits['xmin'],limits['ymin']), (limits['xmax'],limits['ymin']), 
                                  (limits['xmax'],limits['ymax']), (limits['xmin'],limits['ymax'])])])
    gpd_ = gpd.GeoDataFrame({'geometry': polys1, 'not_used':[1]}, crs=crs)

    return gpd_

  def generate_points(limits: dict , crs : int , points : int = 3 )-> gpd.GeoDataFrame(): 
    '''We genereate a random number of points in the area defined in limits
      CRS is a 2  d projection so a distance and not degrees
    '''
    xc = (limits['xmax'] - limits['xmin']) * np.random.random(points) + limits['xmin']
    yc = (limits['ymax'] - limits['ymin']) * np.random.random(points) + limits['ymin']

    points_list = [Point(x, y) for x, y in zip(xc, yc)]
    default_index = [i for i in range(len(points_list))]

    gpd_ = gpd.GeoDataFrame({'nameofsample': default_index, 'geometry' : points_list,} , crs=crs)

    return gpd_

  def generate_buffer(gpd_: gpd.GeoDataFrame() , distance: float  , crs : int, resolution: int = 4  ):
    '''we  generate a buffer based on distance '''
    gpd_['buffer'] = gpd_["geometry"].buffer(distance = distance, resolution = 4).to_crs(crs)
    gpd_.rename(columns={'geometry':'geom_points', 'buffer':'geometry'}, inplace=True) #buffer as geometry

    return gpd_

  def generate_buffers(gdf_ : gpd.GeoDataFrame, point1 : str = 'sample_point', point2 : str = 'neighborhood_centroid' ) -> gpd.GeoDataFrame : 
    "Generate buffers with distances from sample to centroids using geopandas"
    gdf_['distances'] = gdf_[point1].distance(gdf_[point2], align=True)
    print('ok')
    gdf_['buffers'] = gdf_[point2].buffer(distance = gdf_['distances'], resolution = 4) #.to_crs(crs)

    return gdf_

######

class prepareShapes:
    def __init__(self, data_sources_paths: dict ):
        self.data_sources_paths = data_sources_paths

    def load_gdf(self, key: str, joining_col : str )-> gpd.GeoDataFrame :
      """helper to load a geodataframe - works with geojson and shapefiles"""
      gdf = gpd.read_file(self.data_sources_paths[key], 
                              dtype={joining_col: str})
      return gdf

    def load_df(self, key: str, joining_col : str, zipped : bool = False) -> pd.DataFrame :
        """Enables to load the attributes dataframe specific to France & Germany"""
        if not zipped : 
          df = pd.read_csv(self.data_sources_paths[key], 
                                    sep=',', dtype={joining_col: str})
        else :
          zf = zipfile.ZipFile(self.data_sources_paths[key])

        return df


class mergeShapes:
  """class to regroup geographical joins
  """
  def prepare_osm_gdf(gdf: gpd.GeoDataFrame, keep_cols: list ) -> gpd.GeoDataFrame : 
      """Prepare the geodataset that enables to have competition:  filter colums, identify own installations, and creating a joining col"""

      #1- filter colums : keep only columns that are not too empty
      gdf_ = gdf[keep_cols] 

      #2- try to identify Total stations based on two other columns
      gdf_["brand"] = gdf_["brand"].fillna("Unknown")
      gdf_["name"] = gdf_["name"].fillna("Unknown")
      
      gdf_['is_mine_brand'] = gdf_["brand"].str.contains("total", case=False) 
      gdf_['is_mine_name'] = gdf_["name"].str.contains("total", case=False)
      gdf_['is_mine'] = (gdf_['is_mine_brand']) | (gdf_['is_mine_name'])
      gdf_.drop(['is_mine_brand', 'is_mine_name'], axis=1, inplace=True)

      #3- copy the geometry for later geographical joins
      gdf_['node_coord'] = gdf_['geometry'] 

      return gdf_

  def geofilter_rectangle(gdf_bbox, gdf_comp) : 
      '''geofilters to the bounding box and we rename a new geometry columns for osm points'''
      gdf_ = gpd.sjoin(gdf_bbox, gdf_comp, how="left", predicate='intersects')
      gdf_.drop(['geometry', 'index_right'], axis=1, inplace=True)

      return gdf_

  def geofilter_plz(gdf, gdf2, osm_cols) : 
      """geofilters competition within the plz shapes"""
      gdf = gpd.sjoin(gdf, gdf2, how="inner", predicate='intersects')
      gdf.drop(['index_right', 'not_used'], axis=1, inplace=True)

      gdf = gdf[osm_cols]
      return gdf

  def geofilter_buffers(gpd1, gpd2, osm_cols): 
    """geofilters competition within buffer circle
    gpd1 contains the buffers gpd2 contains the points"""

    gpd2['competitor_geom'] = gpd2['geometry']
    gpd1 = gpd1.loc[:, ["buffers",  "nameofsample", "plz"] ]
    gpd1.rename(columns={'buffers':  'geometry'}, inplace=True) 

    gpd_ = gpd.sjoin(gpd1, gpd2, how="inner", predicate='intersects')
    gpd_ = gpd_[osm_cols]

    return gpd_


#### Helpers for visualisations

def prepare_points(gdf, geometry_col, ):
    """Helper function to plot folium points """
    gdf = gdf.set_geometry(geometry_col)
    points = [[point.xy[1][0], point.xy[0][0]] for point in gdf.geometry ]
    return points

def prepare_attributes(gdf, attrib_cols):
    """Helper function to plot folium points """
    attributes = {}
    for col_name in attrib_cols : 
        attributes[col_name] = list(gdf[col_name])
    return attributes

def html_popup(i, attributes):
    """Helper function to plot folium points """
    popup_html = str()
    for key, value in attributes.items() :
        popup_html += f"{key}: {str(value[i])} " +  '<br> '
    return popup_html


In [4]:
#Generate samples geodataframe (geometry set on buffer polygons)
#We create and a bounding rectangle around it.
limits_dict = attraction.generate_rectangle(top_cities[city], x_range = x_range , y_range = y_range)

#We place (p) points chosen randomly within the boundaries of the rectangle.
samples_gdf = attraction.generate_points(limits_dict, crs = crs_proj, points  = points )
#From each of these points we create a buffer circle of radius (r) 
samples_gdf = attraction.generate_buffer(samples_gdf, distance = radius, crs = crs_proj)
samples_gdf.rename(columns={'geometry':'sample_buffer', 'geom_points':'sample_point'}, inplace=True)  

Limits : {'xmin': 3.5767160000000002, 'xmax': 4.176716, 'ymin': 43.410768999999995, 'ymax': 43.810769}


## Map1 : points randomly and created with influence area around

In [5]:
if showmap: 
    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )
    #####
    _gdf = samples_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='question', prefix='fa'))
        )
    fg.add_to(m)
    ##### Buffer
    _gdf = samples_gdf
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)
    #####
    folium.LayerControl().add_to(m)
    
    display(m)

In [None]:
#We prepare country geodataframe - We only need to run it once - after we store an intermediary data

# # Instanciation
# shapes = prepareShapes(data_sources_paths_fr)

# # load shapes of neighborhoods
# shape_gdf = shapes.load_gdf('shapes_IRIS_2013', country_col_name[country])
# shape_gdf.drop(['ORIGINE'], axis=1, inplace=True) #We do not know its purpose
# shape_gdf = shape_gdf.drop_duplicates(subset = ['DCOMIRIS']) #rows are repeated 10 times
# shape_gdf = shape_gdf.sort_values(by="DEPCOM")

# # After anaysis we learned that we need to remove zero padded DCOM to merge with the population
# shape_gdf["DCOMIRIS"] = shape_gdf["DCOMIRIS"].str.lstrip('0')

# shape_gdf.to_parquet(data_sources_paths_fr_interim["shapes_path_fr"])



In [None]:
#We prepare country geodataframe - We only need to run it once - after we store an intermediary data

# shape_gdf = gpd.read_parquet(data_sources_paths_fr_interim["shapes_path_fr"])

# #we load the places population > can be saved to increase simulations speed
# pop_df = pd.read_csv(data_sources_paths_fr["populationDataset_fr"], sep=';')
# cols_to_keep = ['IRIS', 'COM', 'LAB_IRIS' ,  'P17_POP']
# pop_df = pop_df[cols_to_keep]
# pop_df['P17_POP'] = pop_df['P17_POP'].astype('int')
# pop_df['IRIS'] = pop_df['IRIS'].astype('str')

# #We merge shapes and population by district
# country_gdf = pd.merge(left=shape_gdf, right=pop_df, left_on='DCOMIRIS',right_on = 'IRIS', how='left')
# # Quality assesment Some places will not have matched with a population 
# country_gdf.dropna(subset=['P17_POP'], inplace = True) # We should probably do a fill na by sorting by NOM_COM 
# country_gdf['P17_POP'] = country_gdf['P17_POP'].astype('int')
# country_gdf= country_gdf.drop(columns=["IRIS_x", "IRIS_y"])
# print(f" Populations in districts without a shape that were deleted: {(1 - len(country_gdf)/ len(pop_df)):.1%}")
# #print(f"% populations with no corresponding shapes: {(1 - len(pop_df)/ len(country_gdf)):.0%}")

# #We select columns
# cols_to_keep = ['DEPCOM', 'NOM_COM', 'DCOMIRIS', 'NOM_IRIS', 'TYP_IRIS', 'LAB_IRIS', 'P17_POP', 'geometry']
# country_gdf = country_gdf.loc[:, cols_to_keep]

# def obj_to_string(df, colu_s):
#     #to solve a not implemented error on Parquet whe we want to save
#     for colu in colu_s: 
#         if df[colu].dtype == 'O':
#             df[colu] = df[colu].astype("str")
#     return df

# country_gdf = obj_to_string(country_gdf, cols_to_keep)

# #we store the merged shapes and population df
# country_gdf.to_parquet(data_sources_paths_fr_interim["iris_p17"])

In [6]:
country_gdf = gpd.read_parquet(data_sources_paths_fr_interim["iris_p17"])
country_gdf.info()

In [7]:
#Spatially join neighborhoods with the sample points 
#attention dependency on rtree
cake_gdf = gpd.sjoin(country_gdf, samples_gdf.set_geometry('sample_buffer'), how="inner", predicate='intersects')
cake_gdf.rename(columns={'geometry':'neighborhood_shape'}, inplace=True)
cake_gdf.drop(['index_right'], axis=1, inplace=True)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:2154

  cake_gdf = gpd.sjoin(country_gdf, samples_gdf.set_geometry('sample_buffer'), how="inner", predicate='intersects')


In [8]:

#add centroids
cake_gdf['neighborhood_centroid'] = cake_gdf.set_geometry('neighborhood_shape').centroid
#add buffer circle around the centroids. the radius is based on distance from centroid to sample
cake_gdf = attraction.generate_buffers(cake_gdf, 'sample_point','neighborhood_centroid' )
#add a line between nodes and centroids
#cake_gdf['line_to_sample'] = cake_gdf.apply(lambda x: LineString([x['neighborhood_centroid'], x['sample_point']]),axis=1)

print(f"Districts and Samples : {len(cake_gdf)}")

ok
Districts and Samples : 98



  cake_gdf['neighborhood_centroid'] = cake_gdf.set_geometry('neighborhood_shape').centroid
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:2154
Right CRS: EPSG:4326

  gdf_['distances'] = gdf_[point1].distance(gdf_[point2], align=True)

  gdf_['buffers'] = gdf_[point2].buffer(distance = gdf_['distances'], resolution = 4) #.to_crs(crs)


In [10]:
#probably to be deleted 
# iris_json = cake_gdf[['DCOMIRIS','neighborhood_shape']].sort_values(by='DCOMIRIS')#.head(2)
# iris_json = iris_json.drop_duplicates()
# iris_json = iris_json.rename(columns={"neighborhood_shape": "geometry"})

## 2-Map : Samples merged with Iris population

In [12]:
if showmap: 
    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

    ##### Points Layer
    _gdf = samples_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='question', prefix='fa'))
        )
    fg.add_to(m)

    ##### Buffer Layer
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)

    ##### IRIS

    ###### IRIS Cloropleth Layer

    #geo_data is in a json format
    rename_geom_col = "neighborhood_shape"
    property_cols = ["DCOMIRIS","NOM_IRIS","P17_POP"]

    cp_json = cake_gdf[property_cols + [rename_geom_col]]

    cp_json = cp_json.drop_duplicates()
    cp_json = cp_json.rename(columns={"neighborhood_shape": "geometry"})
    cp_json = gpd.GeoDataFrame(cp_json, crs=crs_joins)
    cp_json = cp_json.to_json()


    cp = folium.Choropleth(
    geo_data=cp_json,
    data = cake_gdf,
    name='Population',
    columns=['DCOMIRIS','P17_POP'],
    key_on="feature.properties.DCOMIRIS",
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Population",
    highlight = True
    ).add_to(m)

    # and finally adding a tooltip/hover to the choropleth's geojson
    folium.GeoJsonTooltip(["NOM_IRIS","P17_POP"]).add_to(cp.geojson)


    ##### IRIS Centroid Layer
    _gdf = cake_gdf
    geometry_col = "neighborhood_centroid"
    attrib_cols = ["NOM_IRIS","P17_POP" ]

    fg = folium.FeatureGroup("IRIS Centroids")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.CircleMarker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            radius = 1,
            color = "black")
        )
    fg.add_to(m)

    folium.LayerControl().add_to(m)

    display(m)

## 3- Other players in the area

In [16]:
def prep_bmo(df, bmo_cols, type): 
    "We make sure we have homogenuous colums and we add a column with a number to the dataframe "
    df = df.loc[:,bmo_cols]
    df["type"] = type
    return df

def geoparse(_df, _crs):
    "we parse and add a backup colum because we lose geometry later in the joins "
    _gdf = gpd.GeoDataFrame(
        _df, geometry=gpd.points_from_xy(_df['longitude'], _df['latitude']), crs = _crs)
    _gdf["geometry_bkp"] = _gdf["geometry"]
    return _gdf

bmo_cols = ["name", "latitude", "longitude"]

bmo_df = pd.read_csv(data_sources_paths_fr["bemo_stations"], sep = ";")

bmo_df = prep_bmo(bmo_df, bmo_cols, 0)

#bmo_df_1 = pd.read_csv(data_sources_paths_fr["bemo_stations1"], sep = ";", encoding = "ISO-8859-1")
#bmo_df_2 = bmo_df2 = pd.read_csv(data_sources_paths_fr["bemo_stations2"], sep = ";", encoding = "ISO-8859-1")
#we add name col to be homogenous
#bmo_df_2["name"] = bmo_df_2["com_nom"] +'-' + bmo_df_2["nameofsample"].astype("str")

#Ajout du precedent gagnant
#bmo_df_3 = pd.read_csv(data_sources_paths_fr["bemo_stations3"], sep = ";", encoding = "ISO-8859-1")
#we add name col to be homogenous
#bmo_df_3["name"] = bmo_df_3["com_nom"] +'-' + bmo_df_3["nameofsample"].astype("str")


#we will prepare and  combine the different competitors

# bmo_df_1 = prep_bmo(bmo_df_1, bmo_cols, 1)
# bmo_df_2 = prep_bmo(bmo_df_2, bmo_cols, 2)
# bmo_df_3 = prep_bmo(bmo_df_3, bmo_cols, 3)

# bmo_df_x = pd.concat([bmo_df, bmo_df_1, bmo_df_2, bmo_df_3])


#We transform into à gdf
# bmo_gdf =geoparse(bmo_df_x, crs_proj )

bmo_gdf =geoparse(bmo_df, crs_proj )

#we generate a larger gdf for the bounding box in order to include competitors
limits_dict_bigger = attraction.generate_rectangle(top_cities[city], x_range = x_range * 1.5 , y_range = y_range * 1.5)
gdf_bbox_bigger = attraction.convert_to_gdf(limits_dict_bigger, crs_proj)
#we create a geofadaframe only for competition inside the bounding box using another spatial join 
bbox_bmo_gdf = mergeShapes.geofilter_rectangle(gdf_bbox_bigger, bmo_gdf) 
#we add a random quality to other players
bbox_bmo_gdf['quality'] = np.random.uniform(1,2, len(bbox_bmo_gdf))

Limits : {'xmin': 3.426716, 'xmax': 4.326716, 'ymin': 43.310769, 'ymax': 43.910768999999995}


## Map3 : Other "players" in the area

In [17]:
if showmap: 
    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

    ##### Sample Points Layer
    _gdf = samples_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='question', prefix='fa'))
        )
    fg.add_to(m)

    ##### Sample points Buffer Layer
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)
    #####


    ###### IRIS Cloropleth Layer

    #geo_data is in a json format
    rename_geom_col = "neighborhood_shape"
    property_cols = ["DCOMIRIS","NOM_IRIS","P17_POP"]

    cp_json = cake_gdf[property_cols + [rename_geom_col]]

    cp_json = cp_json.drop_duplicates()
    cp_json = cp_json.rename(columns={"neighborhood_shape": "geometry"})
    cp_json = gpd.GeoDataFrame(cp_json, crs=crs_joins)
    cp_json = cp_json.to_json()


    cp = folium.Choropleth(
    geo_data=cp_json,
    data = cake_gdf,
    name='Population',
    columns=['DCOMIRIS','P17_POP'],
    key_on="feature.properties.DCOMIRIS",
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Population",
    highlight = True
    ).add_to(m)

    # and finally adding a tooltip/hover to the choropleth's geojson
    folium.GeoJsonTooltip(["NOM_IRIS","P17_POP"]).add_to(cp.geojson)





    # ###### Iris Cloropleth Layer
    # #Prepare json file for chloropleth map
    # #https://www.roelpeters.be/plot-choropleth-map-geopandas-folium/
    # iris_json = cake_gdf[['DCOMIRIS','neighborhood_shape']].sort_values(by='DCOMIRIS')#.head(2)
    # iris_json = iris_json.drop_duplicates()
    # iris_json = iris_json.rename(columns={"neighborhood_shape": "geometry"})
    # iris_json = gpd.GeoSeries(iris_json.set_index('DCOMIRIS')['geometry']).to_json()


    # folium.Choropleth(
    # geo_data=iris_json,
    # data = cake_gdf,
    # name='Iris',
    # columns=['DCOMIRIS','P17_POP'],
    # key_on='id',
    # fill_color='YlGnBu',
    # fill_opacity=0.7,
    # line_opacity=0.2,
    # legend_name="Population"
    # ).add_to(m)

    ##### INitial Charging Points Layer
    _gdf = bbox_bmo_gdf
    geometry_col = "geometry_bkp"
    attrib_cols = ["name" ]

    fg = folium.FeatureGroup("ChargingStations")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='green', icon='glyphicon-flash'))
        )
    fg.add_to(m)

    folium.LayerControl().add_to(m)

    display(m)

In [18]:
#we will want to keep the points coordinates
#bbox_osm_gdf['node_coord'] = bbox_osm_gdf['osm_poi_geom']

cake_gdf.set_geometry('buffers', inplace = True)
bbox_bmo_gdf.set_geometry('geometry_bkp', inplace = True)

#change the order of the dfs 

#Logic : for each sample , one or more districts > buffers , that intersect with one or more competitors  
gdf_comp = gpd.sjoin(bbox_bmo_gdf, cake_gdf,  how="inner", predicate='intersects')

#gdf_comp_cols = ['nameofsample', 'DCOMIRIS', 'P17_POP', 'quality', 'NOM_COM']
#gdf_comp= gdf_comp.loc[: , gdf_comp_cols] #we lose the geodataframe format if we do not apply .loc ! 
#gdf_comp.set_geometry('node_coord', inplace = True)
len(gdf_comp)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:2154
Right CRS: EPSG:4326

  gdf_comp = gpd.sjoin(bbox_bmo_gdf, cake_gdf,  how="inner", predicate='intersects')


631

## Map 4 : We keep stations where distance of iriscenter-to-station is less than iriscenter-to-sample 

In [19]:
if showmap: 
    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

    ##### Sample Points Layer
    _gdf = samples_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='question', prefix='fa'))
        )
    fg.add_to(m)

    ##### Sample points Buffer Layer
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)
    #####

    ###### Iris Cloropleth Layer
    #Prepare json file for chloropleth map
    #https://www.roelpeters.be/plot-choropleth-map-geopandas-folium/
    iris_json = cake_gdf[['DCOMIRIS','neighborhood_shape']].sort_values(by='DCOMIRIS')#.head(2)
    iris_json = iris_json.drop_duplicates()
    iris_json = iris_json.rename(columns={"neighborhood_shape": "geometry"})
    iris_json = gpd.GeoSeries(iris_json.set_index('DCOMIRIS')['geometry']).to_json()


    folium.Choropleth(
    geo_data=iris_json,
    data = cake_gdf,
    name='Iris',
    columns=['DCOMIRIS','P17_POP'],
    key_on='id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Population"
    ).add_to(m)

    ##### Intersected Charging Points Layer
    _gdf = gdf_comp
    geometry_col = "geometry_bkp"
    attrib_cols = ["name","type" ]

    fg = folium.FeatureGroup("ChargingStations")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='green', icon='glyphicon-flash'))
        )
    fg.add_to(m)

    folium.LayerControl().add_to(m)

    display(m)

In [20]:
len(cake_gdf)

98

In [21]:
#we only need to keep the shapes of the irises for joining with the shops in these irises
cake_gdf_iris = cake_gdf[['DCOMIRIS','neighborhood_shape']].sort_values(by='DCOMIRIS')#.head(2)
cake_gdf_iris = cake_gdf_iris.drop_duplicates()
cake_gdf_iris = gpd.GeoDataFrame(
    cake_gdf_iris, geometry= 'neighborhood_shape', crs= crs_proj)

cake_gdf_iris.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 85 entries, 14107 to 17330
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   DCOMIRIS            85 non-null     object  
 1   neighborhood_shape  85 non-null     geometry
dtypes: geometry(1), object(1)
memory usage: 2.0+ KB


In [22]:
len(cake_gdf_iris)

85

# 4- SIRET Points

In [None]:
#We add siret data

# filename = data_sources_paths_fr["sirene"]
# file = open(filename)
# gdf_sir = gpd.read_file(file)
# #We drop all colums with some null values
# #gdf_sir = gdf_sir.dropna(thresh=gdf_sir.shape[0]*0.99,how='all',axis=1)
# gdf_sir = gdf_sir.loc[gdf_sir['geometry'].notna(),:]
# #we keep just a few columns
# keep_cols = ["naturejuridiqueunitelegale", "groupeunitelegale","sectionetablissement",
#             "libellecommuneetablissement", "codecommuneetablissement", "libellevoieetablissement","geometry"]
# gdf_sir = gdf_sir[keep_cols]

# #gdf_sir.set_geometry('geometry', inplace = True)

# gdf_sir.to_parquet(data_sources_paths_fr_interim["sirene"])
gdf_sir = gpd.read_parquet(data_sources_paths_fr_interim["sirene"])

In [None]:
#

gdf_sir.set_geometry('geometry', inplace = True)
samples_gdf.set_geometry('sample_buffer', inplace = True)
#change the order of the dfs 

#Logic : for each sample , one or more districts > buffers , that intersect with one or more competitors  
gdf_comp2 = gpd.sjoin( gdf_sir, samples_gdf,  how="inner",predicate='intersects')

#gdf_comp_cols = ['nameofsample', 'DCOMIRIS', 'P17_POP', 'quality', 'NOM_COM']
#gdf_comp= gdf_comp.loc[: , gdf_comp_cols] #we lose the geodataframe format if we do not apply .loc ! 
#gdf_comp.set_geometry('node_coord', inplace = True)
len(gdf_comp2)

## Map5 : Siret Data for business presence

In [None]:
if len(gdf_comp2) < 1260:
    print("ok")

    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )
    #####
    _gdf = samples_gdf
    geometry_col = "sample_point"
    _gdf = _gdf.set_geometry(geometry_col)

    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='question', prefix='fa'))
        )
    fg.add_to(m)
    ##### Buffer
    _gdf = samples_gdf
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)
    
    ##### Shops
    _gdf = gdf_comp2
    geometry_col = "geometry"
    attrib_cols = ["naturejuridiqueunitelegale"]

    fg = folium.FeatureGroup("shops")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.CircleMarker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            radius = 1,
            color = "blue")
        )
    fg.add_to(m)

    folium.LayerControl().add_to(m)
    display(m)

In [None]:
gdf_comp2 = gdf_comp2.groupby('nameofsample')["codecommuneetablissement"].agg("count").reset_index()
gdf_comp2 = gdf_comp2.rename(columns={"codecommuneetablissement": "nb_commerces"})
gdf_comp2.head()

In [None]:
#Consolidation of parts 1 and 2 : we work by districts where we consolidate samples and the competitors

######################
######################
new_cake = cake_gdf.append(gdf_comp)
######################
######################

keep_cols = ['NOM_COM', 'DCOMIRIS', 'NOM_IRIS', 'TYP_IRIS', 'neighborhood_shape' , 'P17_POP', 'nameofsample', 'quality']
new_cake = new_cake.loc[:, keep_cols]

#We set a score to our samples 
new_cake['quality'] = new_cake['quality'].fillna(value=1.5)

new_cake.head()

In [None]:
len(new_cake.nameofsample.unique())

In [None]:
# We now dissolve the competition by aggregating them by sample and district 
score_df = new_cake.groupby(['nameofsample', 'DCOMIRIS' ], as_index=False).agg({'NOM_COM': 'count', 'P17_POP': 'mean'})
score_df.rename(columns={"DCOMIRIS": "Districts", "NOM_COM": "Players", "P17_POP": "Population"}, errors="raise", inplace = True)
score_df["Population"] = score_df["Population"].astype("int")
#score_df["Commerces"] = score_df["Commerces"].astype("int")


In [None]:
#And we aggregate by sample : 
score_df_gb = score_df.groupby(['nameofsample'], as_index = False).agg({'Districts': 'count' , 'Population' : 'sum',  'Players': 'sum'})

score_df_gb = score_df_gb.merge(gdf_comp2, on = 'nameofsample', how = 'left')
score_df_gb['nb_commerces'] = score_df_gb['nb_commerces'].fillna(0).astype("int")

## Scoring Function

In [None]:
#we multiply each row by the mean quality and divide by the number of competitors

########################
########################
score_df_gb['Score1'] = (score_df_gb['Population'] * (score_df_gb['nb_commerces']+1)) 
score_df_gb['Score'] = (score_df_gb['Score1'] / (score_df_gb['Players'])).astype('int')
########################
########################


score_df_gb.sample(7)

In [None]:
#And we aggregate by sample : 
#score_df_gb = score_df.groupby(['nameofsample'], as_index = False).agg({'Districts': 'count' , 'Population' : 'sum', 'Commerces' : 'sum', 'Players': 'sum', 'Score': 'sum'})

#we sort 
score_df_gb.sort_values(by='Score', ascending = False , inplace = True)


In [None]:
from datetime import datetime

#We add the coordinates back
score_df_gb = score_df_gb.merge(samples_gdf.loc[:, ['nameofsample', 'sample_point']], on = 'nameofsample', how = "left" )

#we add metadata to be able to analyse later and run additional simulations 
score_df_gb['Simul_utc_time'] = datetime.today().strftime('%Y-%m-%d-%H:%M:%S')
score_df_gb['country'] = country
score_df_gb['city'] = city
score_df_gb['buffer_size'] = radius
score_df_gb['QualityProcess'] = 'random'
score_df_gb['algorithm'] = 'ABFAB'
score_df_gb['compet_source'] = "bemoandprospects"


#We convert to a geodataframe
score_gdf =  gpd.GeoDataFrame(score_df_gb, crs=crs_joins, geometry = 'sample_point')



## Plots 1-2-3 : Scores explained

In [None]:
#Graph : Score by competitors vs poulation
score_gdf.plot.scatter(y='Population',
                     x='Players',
                      c= 'Score',
                      s= 50, #size of points
                     colormap='jet', sharex=False, title = 'Scores by Competitor and Population amounts')

In [None]:
#Graph : Score by competitors vs population
score_gdf.plot.scatter(y='Population',
                     x='nb_commerces',
                      c= 'Score',
                      s= 50, #size of points
                     colormap='jet', sharex=False, title = 'Scores by Competitor and Population amounts')

In [None]:
#Graph : Score by competitors vs poulation
score_gdf.plot.scatter(y='nb_commerces',
                     x='Players',
                      c= 'Score',
                      s= 50, #size of points
                     colormap='jet', sharex=False, title = 'Scores by Competitor and Commerces amounts')

## Map6 : Scores explained

In [None]:
if showmap: 

    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )


    ##### Sample Points Layer
    _gdf = samples_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample"]

    fg = folium.FeatureGroup("samples")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='grey', icon='question', prefix='fa'))
        )
    fg.add_to(m)

    ##### Sample points Buffer Layer
    geometry_col = "sample_buffer"

    fg = folium.FeatureGroup(geometry_col)

    _gdf = _gdf.set_geometry(geometry_col)

    for _, r in _gdf.iterrows():
        sim_geo = gpd.GeoSeries(r[geometry_col])
        geo_j = sim_geo.to_json()
        geo_j = folium.GeoJson(data=geo_j,
                            style_function=lambda x: {'fillColor': 'orange'})
        geo_j.add_to(fg)
    fg.add_to(m)
    #####

    ###### Iris Cloropleth Layer
    #Prepare json file for chloropleth map
    #https://www.roelpeters.be/plot-choropleth-map-geopandas-folium/
    iris_json = cake_gdf[['DCOMIRIS','neighborhood_shape']].sort_values(by='DCOMIRIS')#.head(2)
    iris_json = iris_json.drop_duplicates()
    iris_json = iris_json.rename(columns={"neighborhood_shape": "geometry"})
    iris_json = gpd.GeoSeries(iris_json.set_index('DCOMIRIS')['geometry']).to_json()


    folium.Choropleth(
    geo_data=iris_json,
    data = cake_gdf,
    name='Iris',
    columns=['DCOMIRIS','P17_POP'],
    key_on='id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Population"
    ).add_to(m)

    ##### INitial Charging Points Layer
    _gdf = bbox_bmo_gdf
    geometry_col = "geometry_bkp"
    attrib_cols = ["name","type" ]

    fg = folium.FeatureGroup("ChargingStations")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='green', icon='glyphicon-flash'))
        )
    fg.add_to(m)


    ##### Scored Points Layer
    _gdf = score_gdf
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample", "Population", "nb_commerces", "Players","Score"]

    fg = folium.FeatureGroup("Resultats")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='exclamation', prefix='fa'))
        )
    fg.add_to(m)


    folium.LayerControl().add_to(m)

    display(m)

In [None]:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

#logs_path
if overwrite : 
    score_gdf.to_parquet(logs_path) #COMMENT OUT if you want to overwrite
gdfx = gpd.read_parquet(logs_path)
gdfx = gdfx.append(score_gdf , ignore_index=True)
gdfx = gdfx.drop_duplicates() #we could save the same simulation twice
gdfx.to_parquet(logs_path)

## Map7: Overall Simulations

In [None]:
if showmap: 

    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

    ##### Scored Points Layer
    _gdf = gdfx
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample", "Population", "nb_commerces", "Players","Score"]

    fg = folium.FeatureGroup("Resultats")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='exclamation', prefix='fa'))
        )
    fg.add_to(m)

    folium.LayerControl().add_to(m)

    display(m)

In [None]:
#At the end
filename = data_sources_paths_fr["m2a_communes"]
file = open(filename)
gdf_comm = gpd.read_file(file)
#gdf_comm = gdf_comm.loc[-gdf_comm["canton"].isna(),:] #si pas de canton alors c'est toute la de Mulhouse 
assert len(list(gdf_comm["code_insee"].unique())) == 39
gdf_comm.head(2)

In [None]:
#Get points inside the polygons : 

#we will want to keep the points coordinates
#bbox_osm_gdf['node_coord'] = bbox_osm_gdf['osm_poi_geom']

gdfx.set_geometry('sample_point', inplace = True)
gdf_comm.set_geometry('geometry', inplace = True)

#change the order of the dfs 

#Logic : for each sample , one or more districts > buffers , that intersect with one or more competitors  
gdf_list = gpd.sjoin( gdfx, gdf_comm,  how="inner", predicate='intersects')

In [None]:
keep_cols = ['nameofsample', 'com_nom','Districts', 'Population', 'Players', 'nb_commerces',
       'Score1', 'Score', 'sample_point', 'Simul_utc_time', 'country', 'city',
       'buffer_size', 'compet_source', 'popu_munic',  'surface',
       'arrondisse', 'cant_num', 'code_posta', 'nom', 'arrd_num', 'code_insee', 'popu_a_par', 'popu_total']
gdf_list = gdf_list.loc[:,keep_cols]

In [None]:
def keep_bests(df, sort_list, filter_list, num_keep ): 
    """Function to take only the highest value for each group 
    """
    df  = df.sort_values(  sort_list , ascending = [True, False])
    df_gb = df.groupby(sort_list[0]).head(num_keep).reset_index(drop=True)

    filter_upper = [elt.upper() for elt in filter_list]
    df_gb = df_gb.loc[df_gb[sort_list[0]].isin(filter_upper), : ]

    return df_gb

In [None]:
#communes avec une seule station #un nom a été modifié manuellement 
sort_list = ["com_nom", "Score"]
num_keep = 1

filter_list = ["Bantzenheim","Battenheim","Berrwiller","Bollwiller","Bruebach","Chalampé",
"Dietwiller","Eschentzwiller","Feldkirch","Flaxlanden","Galfingue","Heimsbrunn","Hombourg",
"Lutterbach","Morschwiller-le-Bas","Niffer","Ottmarsheim","Petit-Landau","Pulversheim","Reiningue",
"Richwiller","Ruelisheim","Staffelfelden","Steinbrunn-le-Bas","Ungersheim","Zillisheim","Zimmersheim"]

bests_1 = keep_bests(gdf_list, sort_list, filter_list, num_keep)
len(bests_1) == len(filter_list)* num_keep


In [None]:
num_keep = 2
filter_list = ['Baldersheim',  'Habsheim', 'Sausheim']
bests_2 = keep_bests(gdf_list, sort_list, filter_list, num_keep)
len(bests_2) == len(filter_list)* num_keep

In [None]:
num_keep = 3
filter_list = ["Illzach", "Kingersheim", "Pfastatt", "Riedisheim","Rixheim", "Wittelsheim", "Wittenheim", "BRUNSTATT-DIDENHEIM"]
bests_3 = keep_bests(gdf_list, sort_list, filter_list, num_keep)
len(bests_3) == len(filter_list)* num_keep


In [None]:
num_keep = 35
filter_list = ["Mulhouse" ]
bests_35 = keep_bests(gdf_list, sort_list, filter_list, num_keep)
len(bests_35) == len(filter_list)* num_keep

In [None]:
bests_locations = bests_1.append(bests_2).append(bests_3).append(bests_35)

In [None]:
bests_locations.info()

In [None]:
if showmap: 

    m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

    ##### Scored Points Layer
    _gdf = gdf_list
    geometry_col = "sample_point"
    attrib_cols = ["nameofsample", "Population", "nb_commerces", "Players","Score"]

    fg = folium.FeatureGroup("Resultats")

    points = prepare_points(_gdf, geometry_col  )
    attributes = prepare_attributes(_gdf,attrib_cols )

    for i , coordinates in enumerate(points):
        fg.add_child(folium.Marker(
            location = [coordinates[0],coordinates[1]],
            popup = html_popup(i ,attributes ),
            icon=folium.Icon(color='blue', icon='exclamation', prefix='fa'))
        )
    fg.add_to(m)

    display(m)

In [None]:
def json_chloropleth(_gdf, key_col, geom_col): #returns a JSON
    """Prepare json file for chloropleth map
    https://www.roelpeters.be/plot-choropleth-map-geopandas-folium/"""

    file_json = _gdf.drop_duplicates()
    assert len(_gdf[key_col].unique()) == len(_gdf)

    file_json = file_json.rename(columns={"geom_col": "geometry"})
    file_json = gpd.GeoSeries(file_json.set_index(key_col)['geometry']).to_json()
    return file_json

json_file = json_chloropleth(gdf_comm, "code_insee", "geometry" )


In [None]:
#Get the best station per code_insee

def add_best(gdf, group_col, max_col): 
    _df_gb = gdf.groupby(group_col).agg({max_col : "max"})
    col_name = "best_" + max_col
    _df_gb = _df_gb.rename(columns={'Score': col_name})

    gdf_2 = gdf.merge(_df_gb, on = "code_insee" , how = "left" )

    return gdf_2

def extract_bests(_gdf, max_col):
    col_name = "best_" + max_col
    _gdf['besty'] = np.where(_gdf[max_col] == _gdf[col_name], True, False)
    _gdf_2 = _gdf.loc[_gdf["besty"] == True, :]
    return _gdf_2

gdf = gdf_list
group_col = "code_insee"
max_col = "Score"

_gdf = add_best(gdf, group_col, max_col)
gdf_bests = extract_bests(_gdf, max_col)


## MAP8: Best Scores for each Commune

In [None]:
m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )

##### Scored Points Layer
_gdf = bests_locations
geometry_col = "sample_point"
attrib_cols = ["nameofsample", "Population", "nb_commerces", "Players","Score"]

fg = folium.FeatureGroup("Resultats")

points = prepare_points(_gdf, geometry_col  )
attributes = prepare_attributes(_gdf,attrib_cols )

for i , coordinates in enumerate(points):
    fg.add_child(folium.Marker(
        location = [coordinates[0],coordinates[1]],
        popup = html_popup(i ,attributes ),
        icon=folium.Icon(color='red', icon='exclamation', prefix='fa'))
    )
fg.add_to(m)

# ###### Iris Cloropleth Layer
#Prepare json file for chloropleth map
_gdf =  gdf_comm
key_col = "com_nom"

json_file = json_chloropleth(gdf_comm, key_col, "geometry" )

folium.Choropleth(
    geo_data=json_file,
    data = _gdf,
    name='Communes',
    columns=[key_col,"popu_total"],
    key_on='id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="popu_total"
    ).add_to(m)

folium.LayerControl().add_to(m)

#m  

In [None]:
def add_lat_lon(df): 
    df['longitude'] = df["sample_point"].x
    df['latitude'] = df["sample_point"].y
    return df

bests_locations = add_lat_lon(bests_locations)
bests_locations.head()

In [None]:
bests_locations.to_csv(output_path, index = False , sep = ',')

# Meilleurs emplacements

In [None]:
#Fichier source réalisé manuellement:
#concatenation du résultat de la premieère , deuxième et troisieme simulation
#les résultats ont été triés par date de la dernière simulation : donc attition à l'ordre d'exectution
#pour les comunnes à 2 stations prise en compte des 2 premières simul
#pour les communes à 3 ou plus stations prise en compte des 3 simul , tri par lat et élimination à chaque fois des stations proches avec les scores les plus bas
df_result =  pd.read_csv(data_sources_paths_fr["results"], sep = ";", encoding = "ISO-8859-1")
df_result = df_result.loc[df_result["manualdelete"].isna(),: ]

In [None]:
#Differentiate colors in simulations

simuls_list = list(df_result["Simul_utc_time"].unique()) 
color_palette = ["orange", "red",  "darkblue"]

df_result ["color"] = "darkblue" #initialize
df_result.loc[df_result["Simul_utc_time"] == simuls_list[0], ["color"]] = color_palette[0]
df_result.loc[df_result["Simul_utc_time"] == simuls_list[1], ["color"]] = color_palette[1]
df_result.loc[df_result["Simul_utc_time"] == simuls_list[2], ["color"]] = color_palette[2]

#We convert to a geodataframe
df_result =geoparse(df_result, crs_proj )

#We need to clean col name if we did manual job in the source file on excel
df_result.columns  = df_result.columns.str.replace(' ', '')

In [None]:
#Buffer layer
df_result = attraction.generate_buffer(df_result.set_geometry('geometry'), distance = radius, crs = crs_proj)
df_result = df_result.rename(columns = {"geometry":"buffer"})
df_result = df_result.drop(columns= ["manualdelete"])

In [None]:
#Fichier  transmis au métier
#####################################
cols_to_keep = [ 'color', 'com_nom', "Score",
        'Players', 'Score1', 'Districts', 'Population', 'nb_commerces',  'nameofsample',   'longitude', 'latitude',
        'surface', 'arrondisse', 'cant_num', 'code_posta', 'nom', 'arrd_num', 'code_insee',  'popu_total',  'Simul_utc_time']
df_result = df_result.sort_values(by = ["color", "com_nom", "Score"], ascending = False)

#save file
df_result[cols_to_keep].to_csv(output_path_comm, encoding = "utf-8", index = False, sep = ";")

df_result.sample(2)

In [None]:
types = [0, 1]
compet_visu_gdf = bbox_bmo_gdf.loc[bbox_bmo_gdf["type"].isin(types), : ]
compet_visu_gdf = compet_visu_gdf.drop(columns= ["quality"])
compet_visu_gdf= compet_visu_gdf.drop_duplicates()
#compet_visu_gdf.head()
#compet_visu_gdf.sample(10)

In [None]:
df_result.head(3)

In [None]:
#intersect buffers with the shops in the area 
gdf_sho = gpd.sjoin( gdf_sir, df_result.set_geometry('buffer'),  how="inner",predicate='intersects')

cols_to_keep = ["naturejuridiqueunitelegale","geometry"]

gdf_sho = gdf_sho.loc[:, cols_to_keep ]
gdf_sho = gdf_sho.drop_duplicates()

gdf_sho_slim = gdf_sho.sample(int(len(gdf_sho)/3))
gdf_sho_slim.info()

In [None]:
#intersect buffers with the Iris in the area 


In [None]:
m = folium.Map(location=[top_cities[city][1],top_cities[city][0]] )


##### Initial Charging Points Layer

_gdf = compet_visu_gdf
geometry_col = "geometry_bkp"
attrib_cols = ["name","type" ]

fg = folium.FeatureGroup("ChargingStations")

points = prepare_points(_gdf, geometry_col  )
attributes = prepare_attributes(_gdf,attrib_cols )

for i , coordinates in enumerate(points):
    fg.add_child(folium.Marker(
        location = [coordinates[0],coordinates[1]],
        popup = html_popup(i ,attributes ),
        icon=folium.Icon(color='green', icon='glyphicon-flash'))
    )
fg.add_to(m)



##### Scored Points Layer
_gdf = df_result
geometry_col = "geom_points"
attrib_cols = ["nameofsample", "Score", "Population", "nb_commerces","Districts", "Players"]

fg = folium.FeatureGroup("Resultats")

colors = df_result["color"].values

points = prepare_points(_gdf, geometry_col  )
attributes = prepare_attributes(_gdf,attrib_cols )


for i , coordinates in enumerate(points):
    fg.add_child(folium.Marker(
        location = [coordinates[0],coordinates[1]],
        popup = html_popup(i ,attributes ),
        icon=folium.Icon(color=str(colors[i]), icon='exclamation', prefix='fa'))
    )
fg.add_to(m)


##### Sample points Buffer Layer
_gdf = df_result
geometry_col = "buffer"

fg = folium.FeatureGroup(geometry_col)

_gdf = _gdf.set_geometry(geometry_col)

for _, r in _gdf.iterrows():
    sim_geo = gpd.GeoSeries(r[geometry_col])
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                        style_function=lambda x: {'fillColor': 'orange'})
    geo_j.add_to(fg)
fg.add_to(m)

#####

##### Shops
_gdf = gdf_sho_slim
geometry_col = "geometry"
#attrib_cols = ["naturejuridiqueunitelegale"]

fg = folium.FeatureGroup("shops")

points = prepare_points(_gdf, geometry_col  )
#attributes = prepare_attributes(_gdf,attrib_cols )

for i , coordinates in enumerate(points):
    fg.add_child(folium.CircleMarker(
        location = [coordinates[0],coordinates[1]],
        #popup = html_popup(i ,attributes ),
        radius = 0.1,
        color = "blue")
    )
fg.add_to(m)

####### Communes Cloropleth Layer
#Prepare json file for chloropleth map
_gdf =  gdf_comm
key_col = "com_nom"

json_file = json_chloropleth(gdf_comm, key_col, "geometry" )

folium.Choropleth(
    geo_data=json_file,
    data = _gdf,
    name='Communes',
    columns=[key_col,"popu_total"],
    key_on='id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="popu_total"
    ).add_to(m)

folium.LayerControl().add_to(m)

#m  


In [None]:
m.save("map_x.html")