In [1]:
from neo4j import GraphDatabase
from pyproj import CRS, Transformer
from shapely.ops import nearest_points
from shapely.geometry import Point

# heuristic for score

# Closest existing charging location + density + avg home value + number of vehicles + population density

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")
driver = GraphDatabase.driver(URI, auth=AUTH)


# create a score for the candidate locations
# based on the closest existing charging location + density + avg home value + number of vehicles + population density
def calculate_score(candidate, pc4, municipality):

    closest_charging_location_distance = candidate["distance_to_nearest"]
    ev_charger_density = pc4["density"]
    avg_home_value = municipality["home_value"]
    number_of_vehicles = municipality["vehicles"]
    population_density = municipality["population_density"]
    score = (closest_charging_location_distance * 0.1 +
                ev_charger_density * 0.2 +
                avg_home_value * 0.05 +
                number_of_vehicles * 0.1 +
                population_density * 0.05)
    return score

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()
    with driver.session() as session:

        # get all the municipalities by name
        query = """
        MATCH (m:Municipality)
        RETURN m.name AS name
        """
        result = session.run(query)
        municipalities = [record["name"] for record in result]
        if not municipalities:
            print("No municipalities found in the database.")
            exit()
        print(f"Found {len(municipalities)} municipalities in the database.")
        for municipality in municipalities:

            # get the pc4 that are in the municipality
            query = """
            MATCH (p:PC4Area) - [:IS_LOCATED_IN] -> (m:Municipality {name: $municipality})
            RETURN p
            """
            result = session.run(query, municipality=municipality)
            pc4_areas = [record["p"] for record in result]
            if not pc4_areas:
                print(f"No PC4 areas found in the municipality {municipality}.")
                exit()
            print(f"Found {len(pc4_areas)} PC4 areas in the municipality {municipality}.")

            # get the candidate locations in the pc4 areas individually
            # and return the candidate locations in the municipality
            # if no pc4 areas are found, exit
            candidate_locations = []
            for pc4 in pc4_areas:
                query = """
                MATCH (c:CandidateLocation) - [:IS_LOCATED_IN] -> (p:PC4Area {pc4_code: $pc4_code})
                WITH c, p
                MATCH (c) - [:IS_LOCATED_IN] -> (m:Municipality {name: $municipality})
                RETURN properties(c) AS c, properties(p) AS p, properties(m) AS m
                """
                result = session.run(query, pc4_code=pc4["pc4_code"], municipality=municipality)
                candidates = [(record["c"], record["p"], record["m"]) for record in result]
                if candidates[0][2]["home_value"] is None:
                    print(f"Municipality {municipality} has no home value data, skipping.")
                    continue
                if not candidates:
                    print(f"No candidates found for the PC4 code {pc4['pc4_code']}.")
                    continue
                for candidate in candidate_locations:
                    if candidate[0] is None or candidate[1] is None or candidate[2] is None:
                        print(f"Candidate {candidate} has missing data, skipping.")
                        continue
                    # calculate the score for each candidate location
                    score = calculate_score(candidate[0], candidate[1], candidate[2])
                    candidate[0]["score"] = score
                    # add the score to the candidate location and update neo4j
                    update_query = """
                    MATCH (c:CandidateLocation {lon: $lon, lat: $lat})
                    SET c.score = $score
                    RETURN c
                    """
                    session.run(update_query, lon=candidate[0]["lon"], lat = candidate[0]["lat"], score=score)
                print(f"Found {len(candidates)} candidates for PC4 code {pc4['pc4_code']}.")
                candidate_locations.extend(candidates)
            if not candidate_locations:
                exit()

        
        


    driver.close()


Found 50 municipalities in the database.
Found 18 PC4 areas in the municipality Hoeksche Waard.
Found 7 candidates for PC4 code 3273.
Found 274 candidates for PC4 code 3261.
Found 27 candidates for PC4 code 3291.
Found 40 candidates for PC4 code 3271.
Found 60 candidates for PC4 code 3263.
Found 154 candidates for PC4 code 3286.
Found 65 candidates for PC4 code 3262.
Found 44 candidates for PC4 code 3267.
Found 23 candidates for PC4 code 3284.
Found 7 candidates for PC4 code 3292.
Found 130 candidates for PC4 code 3297.
Found 9 candidates for PC4 code 3265.
Found 33 candidates for PC4 code 3274.
Found 65 candidates for PC4 code 3281.
Found 1 candidates for PC4 code 3293.
Found 53 candidates for PC4 code 3299.
Found 38 candidates for PC4 code 3264.
Found 40 candidates for PC4 code 3295.
Found 11 PC4 areas in the municipality Dordrecht.
Found 121 candidates for PC4 code 3311.
Found 53 candidates for PC4 code 3319.
Found 20 candidates for PC4 code 3318.
Found 36 candidates for PC4 code 33

KeyboardInterrupt: 

batched and parallel

In [None]:
from neo4j import GraphDatabase
from concurrent.futures import ThreadPoolExecutor
from itertools import islice

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

BATCH_SIZE = 100
MAX_THREADS = 4  # For ThreadPoolExecutor


# Score calculation function
def calculate_score(candidate, pc4, municipality):
    try:
        return (
            candidate["distance_to_nearest"] * 0.1 +
            pc4["density"] * 0.2 +
            municipality["home_value"] * 0.05 +
            municipality["vehicles"] * 0.1 +
            municipality["population_density"] * 0.05
        )
    except TypeError:
        return None


# Helper to split into batches
def batched(iterable, n):
    it = iter(iterable)
    while batch := list(islice(it, n)):
        yield batch


def process_municipality(municipality_name):
    with GraphDatabase.driver(URI, auth=AUTH) as driver:
        with driver.session() as session:
            pc4_result = session.run("""
                MATCH (p:PC4Area) - [:IS_LOCATED_IN] -> (m:Municipality {name: $municipality})
                RETURN p
            """, municipality=municipality_name)

            pc4_areas = [record["p"] for record in pc4_result]
            if not pc4_areas:
                print(f"No PC4 areas found in {municipality_name}")
                return

            all_candidates = []
            for pc4 in pc4_areas:
                pc4_code = pc4["pc4_code"]
                result = session.run("""
                    MATCH (c:CandidateLocation) -[:IS_LOCATED_IN]-> (p:PC4Area {pc4_code: $pc4_code})
                    WITH c, p
                    MATCH (c)-[:IS_LOCATED_IN]->(m:Municipality {name: $municipality})
                    RETURN properties(c) AS c, properties(p) AS p, properties(m) AS m
                """, pc4_code=pc4_code, municipality=municipality_name)

                for record in result:
                    c, p, m = record["c"], record["p"], record["m"]
                    if not all([c, p, m]) or m.get("home_value") is None:
                        continue
                    score = calculate_score(c, p, m)
                    if score is None:
                        continue
                    c["score"] = score
                    all_candidates.append(c)

            print(f"[{municipality_name}] Scored {len(all_candidates)} candidates.")

            # Batch update Neo4j using UNWIND
            for batch in batched(all_candidates, BATCH_SIZE):
                session.run("""
                    UNWIND $candidates AS candidate
                    MATCH (c:CandidateLocation {lon: candidate.lon, lat: candidate.lat})
                    SET c.score = candidate.score
                """, candidates=batch)


def main():
    with GraphDatabase.driver(URI, auth=AUTH) as driver:
        with driver.session() as session:
            result = session.run("MATCH (m:Municipality) RETURN m.name AS name")
            municipalities = [r["name"] for r in result]

    print(f"Found {len(municipalities)} municipalities. Starting parallel scoring...")

    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        list(executor.map(process_municipality, municipalities))

    print("✅ Done updating all candidate scores in batches.")


if __name__ == "__main__":
    main()


Found 50 municipalities. Starting parallel scoring...
[Lansingerland] Scored 736 candidates.
[Dordrecht] Scored 1483 candidates.
[Hoeksche Waard] Scored 1070 candidates.
[Den Haag] Scored 10660 candidates.
[Nieuwkoop] Scored 201 candidates.
[Katwijk] Scored 604 candidates.
[Voorschoten] Scored 1687 candidates.
[Leidschendam-Voorburg] Scored 3115 candidates.
[Leiden] Scored 1480 candidates.
[Zoetermeer] Scored 1272 candidates.
[Waddinxveen] Scored 1813 candidates.
[Gouda] Scored 673 candidates.
[Krimpenerwaard] Scored 242 candidates.
[Alblasserdam] Scored 109 candidates.
[Ridderkerk] Scored 197 candidates.
[Schiedam] Scored 827 candidates.
[Rotterdam] Scored 3888 candidates.
[Nissewaard] Scored 444 candidates.
[Goeree-Overflakkee] Scored 511 candidates.
[Hendrik-Ido-Ambacht] Scored 149 candidates.
[Bodegraven-Reeuwijk] Scored 194 candidates.
[Gorinchem] Scored 975 candidates.
[Voorne aan Zee] Scored 633 candidates.
[Alphen aan den Rijn] Scored 2022 candidates.
[Delft] Scored 1557 candid