In [1]:
import csv
import os
from batid.services.guess_bdg_new import Guesser, GeocodeNameHandler


os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"


guesses_path = "guesses.json"


In [2]:
def row_to_input(row):
    
    return {
        "ext_id": row['\ufeff"Id Unique"'],
        "name": row["Nom"],
        "lat": float(row["Latitude"]),
        "lng": float(row["Longitude"])
    }

In [3]:
file_path = "places_20260126171515.csv"

inputs = []

ext_ids = set()

with open(file_path, "r") as f:
    reader = csv.DictReader(f, delimiter=";")
    
    for row in reader:
        
        # It seems there are some duplicates in the original file
        if row['\ufeff"Id Unique"'] in ext_ids:
            continue
        
        ext_ids.add(row['\ufeff"Id Unique"'])
        
        input = row_to_input(row)
        inputs.append(input)

 
print(f"There are {len(ext_ids)} unique id in the original file")

guesser = Guesser()
guesser.create_work_file(inputs, guesses_path)

There are 36968 unique id in the original file


In [4]:
guesser = Guesser(batch_size=100)
guesser.handlers = [GeocodeNameHandler(sleep_time=0, bbox_apothem_in_meters=500, photon_url="http://host.docker.internal:2322/api/")]
guesser.guess_work_file(guesses_path)

  0%|          | 0/370 [00:00<?, ?it/s]

In [5]:
from IPython.display import display, HTML

guesser = Guesser()
guesser.load_work_file(guesses_path)
guesser.report()

sample = guesser.matched_sample("found_name_in_osm", 20, ["input_ext_id", "matches", "match_reason", "input_name", "input_lat", "input_lng"],)

display(HTML(sample.to_html()))

-- Report --
Number of rows: 36968
Number of match: 13405 (36.26%)

-- finished_steps --
Rows with finished_steps closest_from_point: 0 (0.00%)
Rows with finished_steps geocode_address: 0 (0.00%)
Rows with finished_steps geocode_name: 36968 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
found_name_in_osm_point_on_bdg            13084
found_name_in_osm_isolated_closest_bdg      321
Name: count, dtype: int64

-- match_reasons : % --
match_reason
found_name_in_osm_point_on_bdg            35.392772
found_name_in_osm_isolated_closest_bdg     0.868319
Name: count, dtype: float64

-- Inputs --


ValueError: a must be greater than 0 unless no samples are taken

In [6]:
guesser = Guesser()
guesser.load_work_file(guesses_path)
guesser.to_csv("ministere-culture-rnb.csv")