# **Stage 3 — Booking.com Grid Selection + Overpass (OSM) Enrichment**

We enrich Booking listings with neighborhood context using OpenStreetMap POIs collected via the Overpass public interface.

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

import os, glob, json, time, math
import requests
import pandas as pd

# -------------------------
# Input
# -------------------------
BOOKING_PATH = "dbfs:/tmp/booking_clean/booking_clean.parquet"

# -------------------------
# Output (DBFS)
# -------------------------
BATCH_DIR_DBFS  = "dbfs:/tmp/booking_enrich_batches"
BATCH_DIR_LOCAL = "/dbfs/tmp/booking_enrich_batches"
FINAL_ENRICH_PATH = "dbfs:/tmp/booking_scoped_enriched_stage3"

# -------------------------
# Spatial params
# -------------------------
METERS_PER_DEG_LAT = 111320.0
GRID_M = 1000.0
COARSE_GRID_M = 50000.0
RADIUS_M = 800

# -------------------------
# FUTURE-PROOF SWITCHES (ONLY CHANGE THESE LATER)
# -------------------------
USE_SCOPE = True
TOP_COARSE_K = 400

DENSE_GRID_MIN = 20
MAX_SCRAPE_GRIDS = 2000

# -------------------------
# Overpass endpoint
# -------------------------
OVERPASS_URL = "https://overpass.kumi.systems/api/interpreter"

# -------------------------
# Scraping controls
# -------------------------
BATCH_SIZE = 100
SLEEP_BETWEEN_REQUESTS = 2.0
MAX_RETRIES = 6

# -------------------------
# SAFETY (Run-All friendly)
# -------------------------
RUN_SCRAPING = False  # set True to actually call Overpass

1) Load cleaned Booking dataset

In [0]:
booking_df = spark.read.parquet(BOOKING_PATH)
print("Booking rows:", booking_df.count())
display(booking_df.select("hotel_id","lat","lon","country").limit(5))


Booking rows: 3239391


hotel_id,lat,lon,country
8908679,50.3330617,18.7037022,Poland
2246827,23.973931951127646,121.61510512232768,Taiwan
12275653,42.460103063041,18.509718935529,Montenegro
7877735,53.011607650726,18.617122400664,Poland
1985847,-18.2022428262032,144.572267532349,Australia


2) Keep only rows with coordinates

In [0]:
booking_geo = booking_df.filter(F.col("lat").isNotNull() & F.col("lon").isNotNull()).cache()
print("Geo rows:", booking_geo.count())


Geo rows: 3236299


3) Build coarse 50km regions (for optional scope reduction)

In [0]:
booking_coarse = (
    booking_geo
    .withColumn("lat_rad", F.radians("lat"))
    .withColumn("coarse_y", F.floor(F.col("lat") * (METERS_PER_DEG_LAT / COARSE_GRID_M)).cast("long"))
    .withColumn(
        "coarse_x",
        F.floor(F.col("lon") * (METERS_PER_DEG_LAT * F.cos(F.col("lat_rad")) / COARSE_GRID_M)).cast("long")
    )
    .withColumn("coarse_id", F.concat_ws("_", "coarse_x", "coarse_y"))
    .drop("lat_rad")
).cache()

coarse_counts = (
    booking_coarse
    .groupBy("coarse_id")
    .count()
    .withColumnRenamed("count", "listings_in_region")
    .orderBy(F.desc("listings_in_region"))
).cache()

print("Unique coarse regions:", coarse_counts.count())
display(coarse_counts.limit(10))


Unique coarse regions: 17072


coarse_id,listings_in_region
3_108,25299
-1_114,19440
20_93,19136
26_96,16026
-9_81,14888
14_101,12864
253_-20,12088
24_90,11522
21_100,11311
24_98,11234


In [0]:
if USE_SCOPE:
    top_regions = coarse_counts.limit(TOP_COARSE_K).select("coarse_id")
    booking_scoped = booking_coarse.join(top_regions, on="coarse_id", how="inner").cache()
else:
    booking_scoped = booking_coarse.cache()

total_geo = booking_geo.count()
scoped_geo = booking_scoped.count()

print("Total geo:", total_geo)
print("Scoped geo:", scoped_geo)
print("Scope coverage:", scoped_geo / total_geo)


Total geo: 3236299
Scoped geo: 407547
Scope coverage: 0.12592995888204397


5) Build 1km grids inside the (scoped) dataset

In [0]:
booking_scoped_1km = (
    booking_scoped
    .withColumn("lat_rad", F.radians("lat"))
    .withColumn("grid_y", F.floor(F.col("lat") * (METERS_PER_DEG_LAT / GRID_M)).cast("long"))
    .withColumn(
        "grid_x",
        F.floor(F.col("lon") * (METERS_PER_DEG_LAT * F.cos(F.col("lat_rad")) / GRID_M)).cast("long")
    )
    .withColumn("grid_id", F.concat_ws("_", "grid_x", "grid_y"))
    .drop("lat_rad")
).cache()

print("Scoped rows:", booking_scoped_1km.count())
print("Unique 1km grids:", booking_scoped_1km.select("grid_id").distinct().count())
display(booking_scoped_1km.select("hotel_id","lat","lon","grid_id").limit(5))


Scoped rows: 407547
Unique 1km grids: 78964


hotel_id,lat,lon,grid_id
2246827,23.973931951127646,121.61510512232768,12370_2668
7877735,53.011607650726,18.617122400664,1246_5901
2660076,49.6053849,11.7150806,845_5522
6828136,47.937397316651,19.034303254805,1419_5336
7609557,35.165633,136.91033,12459_3914


6) Select scrape grids (configurable)

In [0]:
grid_counts = (
    booking_scoped_1km
    .groupBy("grid_id")
    .count()
    .withColumnRenamed("count", "listings_per_grid")
).cache()

dense_grids = grid_counts.filter(F.col("listings_per_grid") >= DENSE_GRID_MIN).cache()
print(f"Dense grids (>= {DENSE_GRID_MIN}):", dense_grids.count())

if MAX_SCRAPE_GRIDS is None:
    scrape_grids = dense_grids.select("grid_id","listings_per_grid").cache()
else:
    w = Window.orderBy(F.desc("listings_per_grid"))
    scrape_grids = (
        dense_grids
        .withColumn("rank", F.row_number().over(w))
        .filter(F.col("rank") <= MAX_SCRAPE_GRIDS)
        .select("grid_id","listings_per_grid")
    ).cache()

print("Scrape grids selected:", scrape_grids.count())
display(scrape_grids.orderBy(F.desc("listings_per_grid")).limit(10))


Dense grids (>= 20): 3770
Scrape grids selected: 2000


grid_id,listings_per_grid
1454_4558,681
170_5146,625
786_4365,519
900_4953,511
703_4514,476
891_6049,473
-8002_3151,462
-5350_-3853,453
-9072_2296,446
440_5965,442


7) Coverage inside scope (how many scoped listings fall inside scraped grids)

In [0]:
total_scoped = booking_scoped_1km.count()
covered = booking_scoped_1km.join(scrape_grids.select("grid_id"), "grid_id", "inner").count()

print("Listings in scope:", total_scoped)
print("Covered by scrape grids:", covered)
print("Coverage in scope:", covered / total_scoped)


Listings in scope: 407547
Covered by scrape grids: 153400
Coverage in scope: 0.3763983049807752


8) Compute grid centers (lat/lon) for Overpass queries

In [0]:
grid_centers = (
    booking_scoped_1km
    .join(scrape_grids, "grid_id", "inner")
    .groupBy("grid_id")
    .agg(
        F.avg("lat").alias("grid_lat"),
        F.avg("lon").alias("grid_lon"),
        F.max("listings_per_grid").alias("listings_per_grid")
    )
).cache()

print("Grid centers:", grid_centers.count())
display(grid_centers.orderBy(F.desc("listings_per_grid")).limit(10))


Grid centers: 2000


grid_id,grid_lat,grid_lon,listings_per_grid
1454_4558,40.95157113953731,17.302022585170405,681
170_5146,46.22763900787096,2.2137562731785487,625
786_4365,39.216382993653056,9.11713156924274,519
900_4953,44.49727032885344,11.342044367761932,511
703_4514,40.55591523575706,8.317451060520304,476
891_6049,54.34213812432793,13.740105912558285,473
-8002_3151,28.31073597736645,-81.64483362633104,462
-5350_-3853,-34.60545702454899,-58.38299035054474,453
-9072_2296,20.629270263517064,-87.07263028017611,446
440_5965,53.58825071956469,6.665674954996709,442


9) Overpass scraping: POI categories and features

In [0]:
CATS = [
    "parks_playgrounds",
    "supermarkets",
    "pharmacies",
    "cafes",
    "restaurants",
    "nightlife",
    "attractions",
    "museums",
    "coworking",
]

def haversine_m(lat1, lon1, lat2, lon2):
    R = 6371000.0
    p1, p2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dl = math.radians(lon2 - lon1)
    a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2
    return 2 * R * math.asin(math.sqrt(a))

def extract_latlon(el):
    if "lat" in el and "lon" in el:
        return el["lat"], el["lon"]
    c = el.get("center")
    if c and "lat" in c and "lon" in c:
        return c["lat"], c["lon"]
    return None, None

def classify(tags):
    leisure = tags.get("leisure")
    shop = tags.get("shop")
    amenity = tags.get("amenity")
    tourism = tags.get("tourism")

    if leisure in ("park", "playground"):
        return "parks_playgrounds"
    if shop in ("supermarket", "convenience"):
        return "supermarkets"
    if amenity == "pharmacy":
        return "pharmacies"
    if amenity == "cafe":
        return "cafes"
    if amenity == "restaurant":
        return "restaurants"
    if amenity in ("bar", "nightclub"):
        return "nightlife"
    if tourism == "attraction":
        return "attractions"
    if tourism == "museum":
        return "museums"
    if amenity == "coworking_space":
        return "coworking"
    return None

def build_query(lat, lon, radius_m=800):
    return f"""
    [out:json][timeout:180];
    (
      node(around:{radius_m},{lat},{lon})[leisure~"park|playground"];
      way(around:{radius_m},{lat},{lon})[leisure~"park|playground"];
      relation(around:{radius_m},{lat},{lon})[leisure~"park|playground"];

      node(around:{radius_m},{lat},{lon})[shop~"supermarket|convenience"];
      way(around:{radius_m},{lat},{lon})[shop~"supermarket|convenience"];
      relation(around:{radius_m},{lat},{lon})[shop~"supermarket|convenience"];

      node(around:{radius_m},{lat},{lon})[amenity="pharmacy"];
      way(around:{radius_m},{lat},{lon})[amenity="pharmacy"];
      relation(around:{radius_m},{lat},{lon})[amenity="pharmacy"];

      node(around:{radius_m},{lat},{lon})[amenity~"cafe|restaurant|bar|nightclub"];
      way(around:{radius_m},{lat},{lon})[amenity~"cafe|restaurant|bar|nightclub"];
      relation(around:{radius_m},{lat},{lon})[amenity~"cafe|restaurant|bar|nightclub"];

      node(around:{radius_m},{lat},{lon})[tourism~"attraction|museum"];
      way(around:{radius_m},{lat},{lon})[tourism~"attraction|museum"];
      relation(around:{radius_m},{lat},{lon})[tourism~"attraction|museum"];

      node(around:{radius_m},{lat},{lon})[amenity="coworking_space"];
      way(around:{radius_m},{lat},{lon})[amenity="coworking_space"];
      relation(around:{radius_m},{lat},{lon})[amenity="coworking_space"];
    );
    out center tags;
    """

def post_overpass(q, retries=MAX_RETRIES):
    for i in range(retries):
        r = requests.post(OVERPASS_URL, data=q.encode("utf-8"), timeout=180)
        ok_json = (r.status_code == 200 and r.text.strip().startswith("{"))
        if ok_json:
            return r
        print(f"retry {i+1}/{retries}: HTTP {r.status_code}, first80={r.text[:80]!r}")
        time.sleep(2 * (i + 1))
    return r

def scrape_grid(grid_id, lat, lon):
    q = build_query(lat, lon, RADIUS_M)
    r = post_overpass(q, retries=MAX_RETRIES)

    if not (r.status_code == 200 and r.text.strip().startswith("{")):
        raise RuntimeError(f"Overpass failed grid={grid_id}, HTTP={r.status_code}")

    data = r.json()

    feat = {f"count_{c}": 0 for c in CATS}
    feat.update({f"min_dist_{c}": None for c in CATS})
    feat.update({"grid_id": grid_id, "grid_lat": lat, "grid_lon": lon})

    for el in data.get("elements", []):
        c = classify(el.get("tags", {}))
        if c is None:
            continue
        el_lat, el_lon = extract_latlon(el)
        if el_lat is None:
            continue
        d = haversine_m(lat, lon, el_lat, el_lon)

        feat[f"count_{c}"] += 1
        md = feat[f"min_dist_{c}"]
        feat[f"min_dist_{c}"] = d if (md is None or d < md) else md

    return feat


10) Sanity test: scrape 20 grids 

In [0]:
if RUN_SCRAPING:
    test_centers = grid_centers.orderBy(F.desc("listings_per_grid")).limit(20).toPandas()
    results = []
    ok, fail = 0, 0

    for i, row in test_centers.iterrows():
        gid = row["grid_id"]
        glat = float(row["grid_lat"])
        glon = float(row["grid_lon"])
        try:
            results.append(scrape_grid(gid, glat, glon))
            ok += 1
            print(f"[OK] {i+1}/20 {gid}")
        except Exception as e:
            fail += 1
            print(f"[FAIL] {i+1}/20 {gid} err={e}")

        time.sleep(SLEEP_BETWEEN_REQUESTS)

    print("OK:", ok, "FAIL:", fail)
    if results:
        print("Example result:", results[0])
else:
    print("RUN_SCRAPING=False → skipping sanity scrape.")


[OK] 1/20 1433_5287
[OK] 2/20 531_6112
[OK] 3/20 566_4848
[OK] 4/20 -395_4088
[OK] 5/20 904_4872
[OK] 6/20 5567_2804
[OK] 7/20 1327_4843
[OK] 8/20 13036_1618
[OK] 9/20 3461_4635
[OK] 10/20 -722_4580
[OK] 11/20 1200_4547
[OK] 12/20 -794_4309
[OK] 13/20 2082_4227
[OK] 14/20 1033_4663
[OK] 15/20 -4427_-2558
[OK] 16/20 1210_6050
[OK] 17/20 -4426_-2557
[OK] 18/20 -795_4309
[OK] 19/20 172_5439
[OK] 20/20 -4427_-2559
OK: 20 FAIL: 0
Example result: {'count_parks_playgrounds': 23, 'count_supermarkets': 60, 'count_pharmacies': 19, 'count_cafes': 186, 'count_restaurants': 324, 'count_nightlife': 136, 'count_attractions': 28, 'count_museums': 14, 'count_coworking': 4, 'min_dist_parks_playgrounds': 142.0169582112484, 'min_dist_supermarkets': 171.27872321682688, 'min_dist_pharmacies': 319.61342595364016, 'min_dist_cafes': 21.722258732886818, 'min_dist_restaurants': 32.867714545665635, 'min_dist_nightlife': 30.611696124329683, 'min_dist_attractions': 101.86460607591951, 'min_dist_museums': 141.541733

11) Full scraping (all selected grids) — resumable batching

In [0]:
if RUN_SCRAPING:   
    os.makedirs(BATCH_DIR_LOCAL, exist_ok=True)

    def already_scraped_ids():
        ids = set()
        for fp in glob.glob(os.path.join(BATCH_DIR_LOCAL, "batch_*.jsonl")):
            with open(fp, "r") as f:
                for line in f:
                    try:
                        ids.add(json.loads(line)["grid_id"])
                    except:
                        pass
        return ids

    centers_pdf = (
        grid_centers
        .select("grid_id", "grid_lat", "grid_lon", "listings_per_grid")
        .orderBy(F.desc("listings_per_grid"))
        .toPandas()
    )

    print("Total centers:", len(centers_pdf))

    def run_batches():
        done = already_scraped_ids()
        remaining = centers_pdf[~centers_pdf["grid_id"].isin(done)].reset_index(drop=True)

        print("Already scraped:", len(done))
        print("Remaining:", len(remaining))

        # start next batch number AFTER existing files (prevents overwrite)
        existing_files = glob.glob(os.path.join(BATCH_DIR_LOCAL, "batch_*.jsonl"))
        batch_num = len(existing_files)

        idx = 0
        while idx < len(remaining):
            batch = remaining.iloc[idx: idx + BATCH_SIZE]
            out_local = os.path.join(BATCH_DIR_LOCAL, f"batch_{batch_num:03d}.jsonl")
            out_dbfs = f"{BATCH_DIR_DBFS}/batch_{batch_num:03d}.jsonl"

            ok = 0
            fail = 0
            batch_results = []

            print(f"\n=== Batch {batch_num:03d} | rows {idx}..{min(idx+BATCH_SIZE-1, len(remaining)-1)} ===")

            for _, row in batch.iterrows():
                gid = row["grid_id"]
                glat = float(row["grid_lat"])
                glon = float(row["grid_lon"])

                try:
                    batch_results.append(scrape_grid(gid, glat, glon))
                    ok += 1
                except Exception as e:
                    fail += 1
                    print(f"[FAIL] grid={gid} err={e}")

                time.sleep(SLEEP_BETWEEN_REQUESTS)

            with open(out_local, "w") as f:
                for r in batch_results:
                    f.write(json.dumps(r) + "\n")

            print(f"Saved: {out_dbfs} | OK={ok} FAIL={fail}")

            idx += BATCH_SIZE
            batch_num += 1

    run_batches()
else:
    print("RUN_SCRAPING=False → skipping full scraping.")

Total centers: 2000
Already scraped: 2100
Remaining: 1298

=== Batch 021 | rows 0..99 ===


com.databricks.backend.common.rpc.CommandCancelledException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$5(SequenceExecutionState.scala:136)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:136)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:133)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:133)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:728)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:446)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:446)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.can

12) Load all enrichment batches into Spark + QA

In [0]:
enrich_all = spark.read.json(f"{BATCH_DIR_DBFS}/*.jsonl")
print("Enriched grids:", enrich_all.count())
display(enrich_all.limit(10))


Enriched grids: 2100


count_attractions,count_cafes,count_coworking,count_museums,count_nightlife,count_parks_playgrounds,count_pharmacies,count_restaurants,count_supermarkets,grid_id,grid_lat,grid_lon,min_dist_attractions,min_dist_cafes,min_dist_coworking,min_dist_museums,min_dist_nightlife,min_dist_parks_playgrounds,min_dist_pharmacies,min_dist_restaurants,min_dist_supermarkets
28,186,4,14,136,23,19,324,60,1433_5287,47.49860900914899,19.06132900685622,101.86460607591953,21.722258732886814,235.0231995473736,141.54173350805576,30.611696124329683,142.0169582112484,319.61342595364016,32.867714545665635,171.27872321682688
2,12,0,1,12,9,3,43,6,531_6112,54.90884210566215,8.303513261491052,250.18017856463825,15.89462229269765,,268.52687349785094,33.1248494521227,65.49944493868594,150.34212345814873,21.285695674464897,116.77494280082097
2,25,0,0,11,18,15,126,26,566_4848,43.55357407438292,7.021906724117657,420.40316882414857,43.35349802596985,,,126.43072004941192,89.51895020933476,110.838069456422,27.70365396790698,76.33157251962707
19,82,3,21,60,37,28,221,50,-395_4088,36.725884255414094,-4.42115411085854,50.21630838392494,89.99752362201772,289.4391558045063,15.524348059372992,63.94024499548909,193.5276072398808,62.70862132583462,48.91810740040138,150.9046357916805
22,160,0,60,79,6,34,442,44,904_4872,43.7701886143384,11.252334342626552,138.88499687154473,76.49356191175129,,48.26347653132763,71.60131325425408,594.2985440651253,31.81514401895898,9.230938963128905,122.3117646603174
3,15,0,0,6,8,2,66,8,5567_2804,25.19380039603807,55.27126263961148,466.3834081519112,287.8371596968099,,,413.9626112193566,221.5807910374245,612.100607607202,205.69256932824163,220.52946327125545
7,93,0,12,33,33,14,140,43,1327_4843,43.509414520270965,16.44198611865546,120.32571113548828,65.05140882646745,,104.9662775355349,101.4149729812359,49.154817638618496,179.42509153612565,87.03159155532616,59.27854378124091
1,56,0,2,2,13,4,190,31,13036_1618,14.539282920022442,120.98523167464674,769.5725860391055,113.3533055037905,,614.1466150476607,15.292860108469831,333.4687254806079,199.7627715204932,21.602726725233364,91.79393047210336
2,45,0,0,14,31,21,38,74,3461_4635,41.63911787437805,41.612337713007285,202.79224777917264,111.72977015861407,,,141.0184641241197,119.5538298434388,27.913300588372863,123.98372649107247,33.66484203501917
50,220,2,20,115,5,18,412,43,-722_4580,41.14730470692734,-8.607182799143024,109.6910565748138,16.618240609508327,575.8889581392167,122.59183511817442,79.82433745995804,536.7089643658749,42.17585860142364,51.36557861685544,72.28796034976061


### Finalize Stage 3 Enrichment
This cell loads all scraped grid-level enrichment data, merges results from all scraping runs,
deduplicates by grid_id, and saves a single canonical dataset for downstream modeling.


In [0]:
# -------------------------
# Paths of all scrape runs
# -------------------------
SCRAPE_PATHS = [
    "dbfs:/tmp/booking_enrich_batches/*.jsonl",
    "dbfs:/tmp/booking_enrich_batches_top2000_missing/*.jsonl"  # may or may not exist
]

# -------------------------
# Load all existing batches
# -------------------------
dfs = []
for p in SCRAPE_PATHS:
    try:
        df = spark.read.json(p)
        if df.count() > 0:
            print(f"Loaded {df.count()} rows from {p}")
            dfs.append(df)
    except Exception as e:
        print(f"Skipping {p} (not found or empty)")

if not dfs:
    raise RuntimeError("No scraped data found. Nothing to save.")

# -------------------------
# Merge + deduplicate
# -------------------------
from functools import reduce

enrich_all = reduce(lambda a, b: a.unionByName(b, allowMissingColumns=True), dfs) \
    .dropDuplicates(["grid_id"]) \
    .cache()

print("Total unique enriched grids:", enrich_all.count())
display(enrich_all.limit(10))

# -------------------------
# Save canonical enrichment
# -------------------------
FINAL_GRID_ENRICH_PATH = "dbfs:/tmp/booking_grid_enrichment_final"

enrich_all.write.mode("overwrite").parquet(FINAL_GRID_ENRICH_PATH)

print("Final grid-level enrichment saved to:")
print(FINAL_GRID_ENRICH_PATH)


Loaded 2100 rows from dbfs:/tmp/booking_enrich_batches/*.jsonl
Loaded 800 rows from dbfs:/tmp/booking_enrich_batches_top2000_missing/*.jsonl
Total unique enriched grids: 2198


count_attractions,count_cafes,count_coworking,count_museums,count_nightlife,count_parks_playgrounds,count_pharmacies,count_restaurants,count_supermarkets,grid_id,grid_lat,grid_lon,min_dist_attractions,min_dist_cafes,min_dist_coworking,min_dist_museums,min_dist_nightlife,min_dist_parks_playgrounds,min_dist_pharmacies,min_dist_restaurants,min_dist_supermarkets
5,55,0,14,15,58,14,110,38,1031_4665,41.90994845673136,12.452050966503547,407.0389752309149,41.74892695746815,,411.8914551385195,23.3515835881438,143.04158551495254,119.41777498035307,11.362537115685626,79.06444594041352
4,29,0,0,1,4,1,64,4,5568_2804,25.19202328360665,55.280871035420496,389.86028782307466,169.85382741875907,,,297.72696380310526,392.51041626647367,127.2910866153603,133.52186396782363,141.05261177859663
2,20,0,0,10,5,13,43,33,1708_4438,39.87196281696668,19.99811947860087,391.87738653868365,177.3337686150511,,,176.5283513279118,421.3363820704234,269.2073670749508,79.5968919570971,207.15845287604589
0,0,0,0,0,0,0,0,0,-8295_3381,30.37694154221444,-86.3670273163289,,,,,,,,,
0,25,0,1,3,29,4,11,20,1217_4911,44.1198711150295,15.235810953063543,,164.09622910411272,,794.3900253445852,308.47120290637906,299.0474921693966,269.10129820425766,61.075687884002924,133.1685934156732
0,2,0,0,0,1,0,3,1,-8276_3374,30.312922527859254,-86.11228670326778,,224.4165790519222,,,,184.50927710554092,,30.85810282576695,14.743794938612275
9,21,0,15,21,7,3,95,10,1483_4746,42.641330310393485,18.114636356787837,280.6595713388492,50.31881592451582,,247.3548302280968,55.59007039629853,139.3062068676229,353.5142020290032,86.40454711327315,73.39898402027045
2,103,0,12,25,29,12,82,35,4227_4494,40.37395593293424,49.84708066111036,461.0172586967726,147.63353966966207,,141.60788898882316,163.69719626744802,144.00193217652856,287.16410961195226,29.902896447213813,168.2611304900511
0,0,0,0,1,2,0,2,1,-5157_-1973,-17.72214718388298,-48.62823053405295,,,,,143.49610482717702,618.6436373136153,,429.7600116677075,709.5627328599389
13,20,0,3,10,10,7,65,9,1454_4558,40.951571139537336,17.30202258517041,28.565425670813436,63.36558478800463,,130.80090261903968,35.732555544162764,235.02280804838543,67.32894118224058,126.15253549192366,305.28296269708284


Final grid-level enrichment saved to:
dbfs:/tmp/booking_grid_enrichment_final
