#### Computation de différentes relation "plus proche" entre les différents objets

---- PROCESSUS À REVOIR ! ----

Objectifs :

- Génération de relations "plus proche point géospatial" entre différents objets, pour la computation des scores :
    - CITY / ROAD_POINT : tous les points routes à moins de 500m (correction : à moins de 5km)
    - OU, si impossible, les maximun 2 points routes à moins de 5km (correction : à moins de 7km)
    - OU, si impossible, les maximun 1 points routes à moins de 30km

Imports :

In [1]:
import geopandas as gpd
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px


from neo4j import GraphDatabase, basic_auth

Neo4j session :

In [2]:
PASSWORD_NEO4J = 'passwordneo4j'

In [3]:
driver = GraphDatabase.driver(
  "bolt://localhost:7687",
  auth=basic_auth("neo4j", PASSWORD_NEO4J))

In [12]:
DISTANCE_NEARLY_TO = 5000 # 5km

LIMIT_DISTANCE_FOR_2_EDGES = 7000 #7km

LIMIT_DISTANCE_FOR_1_EDGE = 30000 #30km

----

##### Neo4j requests :

Requête ardoise :

In [4]:
""" def delete_all_edges_NEARLY_TO(tx):

    query = "MATCH ()-[r:NEARLY_TO]-() \
                WHERE r.distance > 500 \
            DELETE r \
            RETURN count(*) AS COUNT"

    result = tx.run(query)
    return result.data() """ 

In [5]:
def get_all_communes(tx):

    query = "MATCH (n:CITY) RETURN n.insee as insee"

    result = tx.run(query)
    return result.data()


In [18]:
def get_all_communes_with_no_nearly_road(tx):

    query = "MATCH (n:CITY) WHERE NOT EXISTS ((n)-[:NEARLY_TO]-()) RETURN n.insee as insee"

    result = tx.run(query)
    return result.data()


In [13]:
def create_dist_edge_per_batch_CITY(tx, props_list, DISTANCE_NEARLY_TO):

    query = "   UNWIND $props_list AS map \
                MATCH (n:CITY) WHERE n.insee = map.insee \
                MATCH (p:ROAD_POINT) WHERE point.distance(n.location, p.location) < " + DISTANCE_NEARLY_TO + " \
                CREATE (p)-[r:NEARLY_TO]->(n) \
                SET r.distance = point.distance(n.location, p.location) \
                RETURN n.insee as insee, count(*) as edge_created"

    result = tx.run(query, props_list=props_list)
    return result.data()

In [16]:
def create_dist_edge_per_batch_CITY_2_edges_limit(tx, props_list, LIMIT_DISTANCE_FOR_2_EDGES):

    query = "  UNWIND $props_list AS map \
                    CALL { \
                    WITH map \
                    MATCH (n:CITY) WHERE n.insee = map.insee \
                    MATCH (p:ROAD_POINT) WHERE point.distance(n.location, p.location) < " + LIMIT_DISTANCE_FOR_2_EDGES + " \
                    RETURN n, p \
                    ORDER BY point.distance(n.location, p.location) \
                    LIMIT 2 } \
                CREATE (p)-[r:NEARLY_TO]->(n) \
                SET r.distance = point.distance(n.location, p.location) \
                RETURN n.insee as insee, count(*) as edge_created "

    result = tx.run(query, props_list=props_list)
    return result.data()

In [17]:
def create_dist_edge_per_batch_CITY_1_edge_limit(tx, props_list, LIMIT_DISTANCE_FOR_1_EDGE):

    query = "  UNWIND $props_list AS map \
                    CALL { \
                    WITH map \
                    MATCH (n:CITY) WHERE n.insee = map.insee \
                    MATCH (p:ROAD_POINT) WHERE point.distance(n.location, p.location) < " + LIMIT_DISTANCE_FOR_1_EDGE + " \
                    RETURN n, p \
                    ORDER BY point.distance(n.location, p.location) \
                    LIMIT 1 } \
                CREATE (p)-[r:NEARLY_TO]->(n) \
                SET r.distance = point.distance(n.location, p.location) \
                RETURN n.insee as insee, count(*) as edge_created "

    result = tx.run(query, props_list=props_list)
    return result.data()

---

EDGES COMMUNES / ROAD_POINT

Ardoise : pour effacer toute relation "NEARLY_TO" anciennes

In [5]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(delete_all_edges_NEARLY_TO)

driver.close()

print(result) 

[{'COUNT': 12082}]


---

Récupération de tous les points CITY et leur code insee dans le graph :

In [6]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_communes)

driver.close()

In [7]:
df_communes = pd.DataFrame(result)

In [8]:
df_communes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34460 entries, 0 to 34459
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   insee   34460 non-null  int64
dtypes: int64(1)
memory usage: 269.3 KB


In [15]:
#df_communes = df_communes.iloc[0:100]

Transformation en liste "records", pour batching :

In [9]:
print("conversion of objects to parameters list for neo4j...")

props_list = df_communes.to_dict("records")

conversion of objects to parameters list for neo4j...


In [10]:
BATCH_SIZE = 1000

In [11]:
# --- Batch function ---

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [35]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_CITY, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done. {BATCH_SIZE*(i+1)} cities processed...')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


batch 1 done : 1000 cities done
batch 2 done : 1000 cities done
batch 3 done : 1000 cities done
batch 4 done : 1000 cities done
batch 5 done : 1000 cities done
batch 6 done : 1000 cities done
batch 7 done : 1000 cities done
batch 8 done : 1000 cities done
batch 9 done : 1000 cities done
batch 10 done : 1000 cities done
batch 11 done : 1000 cities done
batch 12 done : 1000 cities done
batch 13 done : 1000 cities done
batch 14 done : 1000 cities done
batch 15 done : 1000 cities done
batch 16 done : 1000 cities done
batch 17 done : 1000 cities done
batch 18 done : 1000 cities done
batch 19 done : 1000 cities done
batch 20 done : 1000 cities done
batch 21 done : 1000 cities done
batch 22 done : 1000 cities done
batch 23 done : 1000 cities done
batch 24 done : 1000 cities done
batch 25 done : 1000 cities done
batch 26 done : 1000 cities done
batch 27 done : 1000 cities done
batch 28 done : 1000 cities done
batch 29 done : 1000 cities done
batch 30 done : 1000 cities done
batch 31 done : 100

Résultat de la première passe :

In [36]:
df_result

Unnamed: 0,insee,edge_created
0,62053,1
1,31290,2
2,28400,1
3,42333,1
4,25393,1
...,...,...
326,32101,2
327,03170,1
328,14394,2
329,49308,5


On récupère les communes qui n'ont eu aucune route...

In [47]:
df_communes_2 = df_communes.merge(df_result, how='outer', on='insee')

In [68]:
df_communes_2 = df_communes_2.loc[df_communes_2['edge_created'].isna(),['insee']]

In [69]:
df_communes_2

Unnamed: 0,insee
1,37220
2,40180
3,92051
4,04040
5,62395
...,...
9898,37022
9899,54001
9900,54083
9901,11101


2ème passe : Pour ces communes, on agrandit le rayon à 2km et on limite à 5 routes

In [79]:
print("conversion of objects to parameters list for neo4j...")

props_list = df_communes_2.to_dict("records")

conversion of objects to parameters list for neo4j...


In [15]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_CITY_2km, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE*(i+1)} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


batch 1 done : 1000 cities done
batch 2 done : 2000 cities done
batch 3 done : 3000 cities done
batch 4 done : 4000 cities done
batch 5 done : 5000 cities done
batch 6 done : 6000 cities done
batch 7 done : 7000 cities done
batch 8 done : 8000 cities done
batch 9 done : 9000 cities done
batch 10 done : 10000 cities done


---

Si Echec du process... Clause de rattrapage ici, pour retrouver les communes orphelines (sans relation "NEARLY_TO")

In [16]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_communes_with_no_nearly_road)

driver.close()

df = pd.DataFrame(result)

In [17]:
df

Unnamed: 0,insee
0,04148
1,23255
2,79243
3,80698
4,21108
...,...
1407,41013
1408,24004
1409,81187
1410,14244


In [18]:
print("conversion of objects to parameters list for neo4j...")

props_list = df.to_dict("records")

conversion of objects to parameters list for neo4j...


In [15]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_CITY_2km, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE*(i+1)} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Variable `p` not defined (line 1, column 198 (offset: 197))
"  UNWIND $props_list AS map                     CALL {                     WITH map                     MATCH (n:CITY) WHERE n.insee = map.insee                     WITH point.distance(n.location, p.location) as dist                     MATCH (p:ROAD_POINT) WHERE dist < 2000                     RETURN n, p, dist                     ORDER BY dist                     LIMIT 5 }                 CREATE (p)-[r:NEARLY_TO]->(n)                 SET r.distance = dist                 RETURN n.insee as insee, count(*) as edge_created"
                                                                                                                                                                                                      ^}

In [23]:
df_communes_3 = df.merge(df_result, how='outer', on='insee')

In [24]:
df_communes_3

Unnamed: 0,insee,edge_created
0,04148,
1,23255,
2,79243,
3,80698,
4,21108,
...,...,...
7312,37022,5.0
7313,54001,4.0
7314,54083,1.0
7315,11101,


3ème passe : On récupère les communes toujours orphelines malgré tout... Cette fois_ci , distance à 5km limité à 2 résultats.

In [25]:
df_communes_3 = df_communes_3.loc[df_communes_3['edge_created'].isna(),['insee']]

In [None]:
print("conversion of objects to parameters list for neo4j...")

props_list = df.to_dict("records")

In [19]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_communes_with_no_nearly_road)

driver.close()

df = pd.DataFrame(result)

In [20]:
print("conversion of objects to parameters list for neo4j...")

props_list = df.to_dict("records")

conversion of objects to parameters list for neo4j...


In [23]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_CITY_5km, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE*(i+1)} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()

batch 1 done : 1000 cities done
batch 2 done : 2000 cities done


In [25]:
df_communes_4 = df.merge(df_result, how='outer', on='insee')

In [26]:
df_communes_4 = df_communes_4.loc[df_communes_4['edge_created'].isna(),['insee']]

In [27]:
print("conversion of objects to parameters list for neo4j...")

props_list = df_communes_4.to_dict("records")

conversion of objects to parameters list for neo4j...


In [30]:
with driver.session() as session:

    all_result = []

    df_result = pd.DataFrame()

    # Batch the writing session :

    for i, props_batch in enumerate(batch(props_list, BATCH_SIZE)):
        result = session.execute_write(create_dist_edge_per_batch_CITY_30km, props_batch)
        all_result.append(result)

        print(f'batch {i + 1} done : {BATCH_SIZE*(i+1)} cities done')

        df_result = pd.concat([df_result, pd.DataFrame(result)])

    driver.close()


batch 1 done : 1000 cities done


In [31]:
df_result

Unnamed: 0,insee,edge_created
0,56152,1
1,17323,1
2,73047,1
3,29084,1
4,17486,1
5,48054,1
6,5064,1
7,38073,1
8,40081,1
9,56086,1


In [32]:
with driver.session() as session:

    count_list = []

    result = session.execute_write(get_all_communes_with_no_nearly_road)

driver.close()

df = pd.DataFrame(result)

In [33]:
df