#### Computation de différentes relation "plus proche" entre les différents objets

Objectifs :

- Génération de relations "plus proche point géospatial" entre différents objets, pour la computation des scores :
    - NATIONAL_ROAD_POINT/ ROAD_POINT : tous les points routes à moins de 100m
    - 

Imports :

In [1]:
import geopandas as gpd
import pandas as pd
import geopy.distance
import os
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import copy
import joblib

from neo4j import GraphDatabase, basic_auth

Neo4j session :

In [2]:
PASSWORD_NEO4J = 'passwordneo4j'

In [3]:
driver = GraphDatabase.driver(
  "bolt://localhost:7687",
  auth=basic_auth("neo4j", PASSWORD_NEO4J))

Neo4j requests :

In [5]:
def get_all_national_rp(tx):

    query = "MATCH (n:NATIONAL_ROAD_POINT) RETURN n.x as x, n.y as y"

    result = tx.run(query)
    return result.data()


In [4]:
def get_all_national_rp_with_edges_count(tx):

    query = "MATCH (n:NATIONAL_ROAD_POINT)-[r]-(p:ROAD_POINT) RETURN n.x as x, n.y as y, count(r) as nb_edges"

    result = tx.run(query)
    return result.data()

In [6]:
def get_all_communes_with_no_nearly_road(tx):

    query = "MATCH (n:NATIONAL_ROAD_POINT) WHERE NOT EXISTS ((n)-[:NEARLY_TO]-()) RETURN n.x as x, n.y as y"

    result = tx.run(query)
    return result.data()


In [4]:
def delete_all_edges_NEARLY_TO(tx):

    query = "MATCH ()-[r:NEARLY_TO]-() \
                WHERE r.distance > 500 \
            DELETE r \
            RETURN count(*) AS COUNT"

    result = tx.run(query)
    return result.data()

In [8]:
def create_dist_edge_per_batch_NP(tx, props_list):

    query = "   UNWIND $props_list AS map \
                MATCH (n:NATIONAL_ROAD_POINT) WHERE n.x = map.x AND n.y = map.y \
                MATCH (p:ROAD_POINT) WHERE point.distance(n.location, p.location) < 100 \
                CREATE (p)-[r:NEARLY_TO]->(n) \
                SET r.distance = point.distance(n.location, p.location) \
                RETURN n.x as x, n.y as y, count(*) as edge_created"

    result = tx.run(query, props_list=props_list)
    return result.data()

In [39]:
def create_dist_edge_per_batch_NP_400m(tx, props_list):

    query = "  UNWIND $props_list AS map \
                    CALL { \
                    WITH map \
                    MATCH (n:NATIONAL_ROAD_POINT) WHERE n.x = map.x AND n.y = map.y \
                    MATCH (p:ROAD_POINT) WHERE point.distance(n.location, p.location) < 400 \
                    RETURN n, p \
                    ORDER BY point.distance(n.location, p.location) \
                    LIMIT 2 } \
                CREATE (p)-[r:NEARLY_TO]->(n) \
                SET r.distance = point.distance(n.location, p.location) \
                RETURN n.x as x, n.y as y, count(*) as edge_created"

    result = tx.run(query, props_list=props_list)
    return result.data()

---

EDGES COMMUNES / ROAD_POINT

Ardoise : pour effacer toute relation "NEARLY_TO" anciennes

In [15]:
'''with driver.session() as session:

    count_list = []

    result = session.execute_write(delete_all_edges_NEARLY_TO)

driver.close()

print(result) '''

NameError: name 'delete_all_edges_NEARLY_TO' is not defined

Récupération de tous les points CITY et leur code insee dans le graph :

In [16]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_national_rp)

driver.close()

In [17]:
df = pd.DataFrame(result)

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6309 entries, 0 to 6308
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x       6309 non-null   float64
 1   y       6309 non-null   float64
dtypes: float64(2)
memory usage: 98.7 KB


In [15]:
#df_communes = df_communes.iloc[0:100]

Transformation en liste "records", pour batching :

In [19]:
print("conversion of objects to parameters list for neo4j...")

props_list = df.to_dict("records")

conversion of objects to parameters list for neo4j...


In [21]:
BATCH_SIZE = 1000

In [22]:
# --- Batch function ---

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [23]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_NP, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


batch 1 done : 1000 cities done
batch 2 done : 1000 cities done
batch 3 done : 1000 cities done
batch 4 done : 1000 cities done
batch 5 done : 1000 cities done
batch 6 done : 1000 cities done
batch 7 done : 1000 cities done


Résultat de la première passe :

In [24]:
df_result

Unnamed: 0,x,y,edge_created
0,2.39006,48.82701,9
1,2.39014,48.82684,9
2,2.35897,48.90162,10
3,2.35720,48.92348,3
4,2.36095,48.92914,5
...,...,...,...
252,3.48594,44.52644,8
253,5.22061,48.73650,9
254,0.10242,45.69187,10
255,1.43098,48.43707,8


On récupère les communes qui n'ont eu aucune route...

In [30]:
df_2 = df.merge(df_result, how='outer', on=['x','y'])

In [32]:
df_2['edge_exist'] = df_2['edge_created'].apply(lambda n : True if n>0 else False)

In [33]:
df_2

Unnamed: 0,x,y,edge_created,edge_exist
0,4.85057,45.68163,,False
1,4.89002,45.78515,,False
2,4.86635,45.78915,,False
3,2.39006,48.82701,9.0,True
4,2.39014,48.82684,9.0,True
...,...,...,...,...
6304,5.37742,43.33153,5.0,True
6305,1.29926,43.67032,,False
6306,-0.25457,44.28522,,False
6307,-0.00623,43.96352,,False


In [35]:
fig = px.scatter_mapbox(df_2, lat='y', lon='x', color='edge_exist', mapbox_style='open-street-map')
fig.show()

On décide d'élargir encore un peu, car des "sorties" du réseau n'ont pas été suffisamment prises en comptes

In [36]:
df_2 = df_2.loc[df_2['edge_created'].isna(),['x', 'y']]

In [37]:
df_2

Unnamed: 0,x,y
0,4.85057,45.68163
1,4.89002,45.78515
2,4.86635,45.78915
12,2.40932,48.93693
13,2.41062,48.93724
...,...,...
6300,-0.54395,42.79280
6305,1.29926,43.67032
6306,-0.25457,44.28522
6307,-0.00623,43.96352


2ème passe : Pour ces communes, on agrandit le rayon à 400m et on limite à 2 routes

In [38]:
print("conversion of objects to parameters list for neo4j...")

props_list = df_2.to_dict("records")

conversion of objects to parameters list for neo4j...


In [40]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_NP_400m, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE*(i+1)} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


batch 1 done : 1000 cities done
batch 2 done : 2000 cities done
batch 3 done : 3000 cities done


In [None]:
df_result

---

In [8]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_national_rp)

driver.close()

df = pd.DataFrame(result)

In [9]:
df

Unnamed: 0,x,y
0,4.85057,45.68163
1,4.89002,45.78515
2,4.86635,45.78915
3,2.39006,48.82701
4,2.39014,48.82684
...,...,...
6304,5.37742,43.33153
6305,1.29926,43.67032
6306,-0.25457,44.28522
6307,-0.00623,43.96352


In [10]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_national_rp_with_edges_count)

driver.close()

df_edges = pd.DataFrame(result)

In [11]:
df_edges

Unnamed: 0,x,y,nb_edges
0,2.39006,48.82701,9
1,2.39014,48.82684,9
2,2.35897,48.90162,10
3,2.35720,48.92348,3
4,2.36095,48.92914,5
...,...,...,...
5298,0.10242,45.69187,10
5299,1.43098,48.43707,8
5300,5.37742,43.33153,5
5301,1.29926,43.67032,1


In [12]:
df_tot = df.merge(df_edges, how='left', on=['x','y'])

In [14]:
df_tot['have_edges'] = df_tot['nb_edges'].apply(lambda nb : True if nb >0 else False)

In [15]:
df_tot

Unnamed: 0,x,y,nb_edges,have_edges
0,4.85057,45.68163,,False
1,4.89002,45.78515,,False
2,4.86635,45.78915,,False
3,2.39006,48.82701,9.0,True
4,2.39014,48.82684,9.0,True
...,...,...,...,...
6304,5.37742,43.33153,5.0,True
6305,1.29926,43.67032,1.0,True
6306,-0.25457,44.28522,,False
6307,-0.00623,43.96352,,False


In [17]:
fig = px.scatter_mapbox(df_tot, lat='y', lon='x', color='have_edges', mapbox_style='open-street-map')
fig.show()