## Run this to reset DB
```
MATCH (n)
DETACH DELETE n
```

## We imported the 4 main data sets here

unwind and parallel version

In [None]:
from neo4j import GraphDatabase
from concurrent.futures import ThreadPoolExecutor, as_completed
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from shapely.ops import nearest_points
from pyproj import Transformer, CRS
from tqdm import tqdm  # optional progress bar

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")
driver = GraphDatabase.driver(URI, auth=AUTH)

MAX_WORKERS = 16  # Adjust based on your system's capabilities

with driver:
    driver.verify_connectivity()

    # Load files
    pc4_muni_density = gpd.read_file("data/zh_pc4_gdf_with_municipality_and_ev_charger_density.geojson")
    pc4_muni_density['municipality'] = pc4_muni_density['municipality'].replace({
        "Government of Rotterdam": "Rotterdam",
        "The Hague": "Den Haag"
    })

    combined_charging_points = gpd.read_file("data/combined-charging-points-zuid-holland.geojson")
    muni_translated = pd.read_json("data/municipalities_data_translated.json")
    parking_candidates = gpd.read_file("data/candidates.gpkg")

    def generate_closest_points_parallel(gdf_source, gdf_target, n_threads=MAX_WORKERS):
        # Prepare projection
        transformer = Transformer.from_crs(CRS("EPSG:4326"), CRS("EPSG:3857"), always_xy=True)
        target_union = gdf_target.unary_union

        def find_nearest_info(point):
            nearest_geom = nearest_points(point, target_union)[1]
            p1 = Point(transformer.transform(point.x, point.y))
            p2 = Point(transformer.transform(nearest_geom.x, nearest_geom.y))
            distance = p1.distance(p2)
            return nearest_geom.x, nearest_geom.y, distance

        # Apply in parallel
        geometries = list(gdf_source.geometry)
        results = []
        with ThreadPoolExecutor(max_workers=n_threads) as executor:
            results = list(tqdm(executor.map(find_nearest_info, geometries), total=len(geometries)))

        # Unpack results
        gdf_source['nearest_lon'], gdf_source['nearest_lat'], gdf_source['distance_to_nearest'] = zip(*results)
        return gdf_source
    parking_candidates = generate_closest_points_parallel(parking_candidates, combined_charging_points)
    # --- Bulk insertion functions ---

    CHUNK_SIZE = 1000  # or adjust depending on performance/memory

    def add_pc4_muni_density(df):
        data = df[['municipality', 'pc4_code', 'density']].to_dict("records")
        query = """
        UNWIND $data AS row
        MERGE (m:Municipality {name: row.municipality})
        MERGE (p:PC4Area {pc4_code: row.pc4_code})
        SET p.density = row.density, p.name = row.pc4_code
        MERGE (p)-[:IS_LOCATED_IN]->(m)
        """
        with driver.session() as session:
            for i in tqdm(range(0, len(data), CHUNK_SIZE), desc="PC4 + Municipality"):
                chunk = data[i:i + CHUNK_SIZE]
                session.run(query, data=chunk)

    def insert_ev_charger_chunk(chunk):
        query = """
        UNWIND $data AS row
        MERGE (p:PC4Area {pc4_code: row.pc4_code})
        MERGE (ev:EVChargingStation {
            lat: row.lat,
            lon: row.lon,
            nearest_lat: row.nearest_lat,
            nearest_lon: row.nearest_lon,
            distance_to_nearest: row.distance_to_nearest
        })
        MERGE (ev)-[:IS_LOCATED_IN]->(p)
        WITH p, ev
        MATCH (m:Municipality)<-[:IS_LOCATED_IN]-(p)
        MERGE (ev)-[:IS_LOCATED_IN]->(m)
        """
        with driver.session() as session:
            session.run(query, data=chunk)

    def add_combined_charging_points(df):
        df['lat'] = df.geometry.y
        df['lon'] = df.geometry.x
        data = df[['pc4_code', 'lat', 'lon', 'nearest_lat', 'nearest_lon', 'distance_to_nearest']].to_dict("records")
        chunks = [data[i:i + CHUNK_SIZE] for i in range(0, len(data), CHUNK_SIZE)]

        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            futures = [executor.submit(insert_ev_charger_chunk, chunk) for chunk in chunks]
            for _ in tqdm(as_completed(futures), total=len(futures), desc="EV Chargers"):
                pass

    def add_municipality_data(df):
        data = df.to_dict("records")
        query = """
        UNWIND $data AS row
        MERGE (m:Municipality {name: row.Municipality})
        SET m.home_value = row.HomeValue,
            m.households = row.Households,
            m.vehicles = row.Vehicles,
            m.passenger_cars = row.PassengerCars,
            m.population_density = row.PopulationDensity
        """
        with driver.session() as session:
            for i in tqdm(range(0, len(data), CHUNK_SIZE), desc="Municipality Data"):
                chunk = data[i:i + CHUNK_SIZE]
                session.run(query, data=chunk)

    def insert_parking_chunk(chunk):
        query = """
        UNWIND $data AS row
        MERGE (p:PC4Area {pc4_code: row.pc4_code})
        MERGE (c:CandidateLocation {
            lat: row.lat,
            lon: row.lon,
            nearest_lat: row.nearest_lat,
            nearest_lon: row.nearest_lon,
            distance_to_nearest: row.distance_to_nearest
        })
        MERGE (c)-[:IS_LOCATED_IN]->(p)
        WITH p, c
        MATCH (m:Municipality)<-[:IS_LOCATED_IN]-(p)
        MERGE (c)-[:IS_LOCATED_IN]->(m)
        """
        with driver.session() as session:
            session.run(query, data=chunk)

    def add_parking_candidates(df):
        df['lat'] = df.geometry.y
        df['lon'] = df.geometry.x
        data = df[['pc4_code', 'lat', 'lon', 'nearest_lat', 'nearest_lon', 'distance_to_nearest']].to_dict("records")
        chunks = [data[i:i + CHUNK_SIZE] for i in range(0, len(data), CHUNK_SIZE)]

        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            futures = [executor.submit(insert_parking_chunk, chunk) for chunk in chunks]
            for _ in tqdm(as_completed(futures), total=len(futures), desc="Parking Candidates"):
                pass
    # --- Execute data loading ---

    add_pc4_muni_density(pc4_muni_density)
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        executor.submit(add_combined_charging_points, combined_charging_points)
        executor.submit(add_municipality_data, muni_translated)
        executor.submit(add_parking_candidates, parking_candidates)


  target_union = gdf_target.unary_union
100%|██████████| 52953/52953 [00:07<00:00, 7457.80it/s]
PC4 + Municipality: 100%|██████████| 1/1 [00:00<00:00,  9.24it/s]
Municipality Data: 100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
EV Chargers: 100%|██████████| 10/10 [00:04<00:00,  2.50it/s]71it/s]
Parking Candidates: 100%|██████████| 53/53 [00:27<00:00,  1.96it/s]
