# Geographical Evaluation using Open Street Maps

In [None]:
# !pip install googlemaps

In [126]:
import os
import json
import polars as pl
import numpy as np
from pathlib import Path
import googlemaps
from geopy.distance import geodesic
from typing import List, Tuple, Dict, Union
from geopy.geocoders import Nominatim
from pprint import pprint

print(f'polars version: {pl.__version__}')

project_dir = Path(os.getcwd()).parent
data_dir = project_dir / 'Data'

geolocator = Nominatim(user_agent="geocoder_llm_project", timeout=300)

polars version: 1.9.0


In [46]:
df = pl.read_parquet(data_dir / 'new_formatted_addresses.parquet')

In [47]:
idx = 100
print(df[idx]['FormattedFullAddress'].item())
print(df[idx]['Latitude'].item(), df[idx]['Longitude'].item())
# df[idx].to_dict(as_series=False)

4524, Old Caldwell Mill Road, Shelby County, Alabama, 35242
33.41236637208968 -86.73952124099591


The GeoEvaluation class is designed to evaluate the accuracy of predicted geographic locations by comparing them with ground truth coordinates. It utilizes the OpenStreetMap's `Nominatim` geocoding service to convert textual addresses into geographic coordinates (latitude and longitude) and then calculates the geodesic distance between the predicted and actual locations.

A radius threshold of 0.2 kilometers (200 meters) to determine if two locations are considered the same

Methods:<br>
* `geocode_address(address: str) -> Union[Tuple[float, float], Tuple[None, None]]`<br>
This method takes a textual address as input and returns its geographic coordinates.

* `compute_geographic_distance(latitude1: float, longitude1: float, latitude2: float, longitude2: float) -> float`<br>
This method calculates the geodesic distance between two geographic points.

* `are_same(predicted_address: str, groundtruth_latitude: float, groundtruth_longitude: float) -> bool`<br>
This method determines whether a predicted address corresponds to the same location as the ground truth coordinates.

Process:
* * Geocodes the predicted address to obtain its coordinates
* * Calculates the distance between the predicted and ground truth coordinates
* * Prints the distance for debugging/information purposes
* * Returns True if the distance is less than or equal to the radius threshold (0.2 km)

Evaluation Logic
The class considers two locations to be the same if they are within 200 meters (0.2 kilometers) of each other. This threshold accounts for:

Geocoding Imprecision: Different geocoding services might return slightly different coordinates for the same address
Address Ambiguity: Addresses can sometimes refer to large buildings or areas rather than precise points
Practical Equivalence: For many applications, locations within 200 meters are functionally equivalent

The threshold can be adjusted by changing the radius_threshold property depending on the specific requirements of your evaluation task. A smaller threshold would increase precision requirements, while a larger threshold would be more lenient in considering locations as matches.

In [58]:
class GeoEvaluation:
    def __init__(self):
        self.timeout = 300
        self.geocoder = Nominatim(user_agent="geocoder_llm_evaluation_agent", timeout=self.timeout)
        self.radius_threshold = 0.2  # in kilometers

    def geocode_address(self, address: str) -> Union[Tuple[float, float], Tuple[None, None]]:
        try:
            location = self.geocoder.geocode(address)
            if location:
                return (location.latitude, location.longitude)
            print(f"Did not find location for: {address}")
        except Exception as e:
            print(f"Geocoding error for address '{address}': {e}")
        return (None, None)

    def compute_geographic_distance(self, latitude1: float, longitude1: float, latitude2: float, longitude2: float) -> float:
        """
        Compute the geodesic distance between two lat/lon pairs in kilometers using geopy.
        """
        return geodesic((latitude1, longitude1), (latitude2, longitude2)).kilometers

    def are_same(self, predicted_address: str, groundtruth_latitude: float, groundtruth_longitude: float) -> bool:
        # Geocode predicted address
        pred_latitude, pred_longitude = self.geocode_address(predicted_address)
        if pred_latitude is None or pred_longitude is None:
            return False

        # Compute the geographical distance
        distance = self.compute_geographic_distance(
            float(pred_latitude), float(pred_longitude),
            float(groundtruth_latitude), float(groundtruth_longitude)
        )

        print(f'Distance from ground truth: {round(distance, 3)} KM')

        return distance <= self.radius_threshold

In [59]:
evaluator = GeoEvaluation()
groundtruth_lat, groundtruth_lon = 32.79596540137694, -85.6535596907001
predicted_address = "south main st, Camp Hill, Alabama, 36850"

is_match = evaluator.are_same(predicted_address, groundtruth_lat, groundtruth_lon)
print("Match:", is_match)

Distance from ground truth: 0.195 KM
Match: True


# Inference Analysis:

In [101]:
import re
from sklearn.metrics import precision_score, recall_score, f1_score

In [226]:
df = pl.read_parquet(data_dir / 'address_with_instructions.parquet')

In [157]:
df.head(3)

OID_,AddNum_Pre,Add_Number,AddNum_Suf,AddNo_Full,St_PreMod,St_PreDir,St_PreTyp,St_PreSep,St_Name,St_PosTyp,St_PosDir,St_PosMod,StNam_Full,Building,Floor,Unit,Room,Seat,Addtl_Loc,SubAddress,LandmkName,County,Inc_Muni,Post_City,Census_Plc,Uninc_Comm,Nbrhd_Comm,NatAmArea,NatAmSub,Urbnztn_PR,PlaceOther,PlaceNmTyp,State,Zip_Code,Plus_4,UUID,AddAuth,AddrRefSys,Longitude,Latitude,NatGrid,Elevation,Placement,AddrPoint,Related_ID,RelateType,ParcelSrc,Parcel_ID,AddrClass,Lifecycle,Effective,Expire,DateUpdate,AnomStatus,LocatnDesc,Addr_Type,DeliverTyp,NAD_Source,DataSet_ID,StreetAddress,SecondaryAddress,CityStateZip,FullAddress,FormattedFullAddress,task1_instruction,task1_groundtruth,task2_instruction,task2_groundtruth,noise_level,variant_idx
i64,str,i64,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64
62241576,"""""",1187,"""""",1187,"""""","""north""","""""","""""","""pownal""","""road""","""""","""""","""north pownal road""","""""","""""","""""","""""","""""","""""","""""","""""","""bennington""","""NaN""","""pownal""","""""","""""","""""","""""","""""","""""","""""","""""","""vt""",5260,,"""{2adfa62f-3b16-475b-96d3-726f7…",,"""""","""-73.2482712649639""","""42.8039371512875""","""18txn4323540530""","""""","""NaN""","""-73.2482712649639 42.803937151…","""""","""""","""""","""""","""numbered thoroughfare address""","""""","""""","""""","""5/27/2016 9:33:29""","""""","""""","""residential""","""""","""vermontenhanced911board""","""134413""","""1187 north pownal road""","""""","""pownal, vt, 5260""","""1187 north pownal road pownal…","""1187, north Pownal Road, Benni…","""Parse the following address in…","""{  ""AddNum_Pre"": """",  ""Add_N…","""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""medium""",0
19781905,"""""",1124,"""""",1124,"""""","""""","""""","""""","""judah bear""","""boulevard""","""""","""""","""judah bear boulevard""","""""","""""","""""","""""","""""","""""","""""","""""","""madison""","""richmond""","""richmond""","""""","""""","""""","""""","""""","""""","""""","""""","""ky""",40475,,"""{fbbed738-c83d-41c2-b4ca-bf33c…",,"""""","""-84.25537236725299""","""37.754618638845024""","""16sgg4179582137""","""""","""NaN""","""-84.25537236725299 37.75461863…","""""","""""","""""","""""","""numbered thoroughfare address""","""""","""1/31/2024 0:00:00""","""12/31/2024 0:00:00""","""11/3/2022 0:00:00""","""""","""""","""""","""""","""stateofkentucky""","""ssap_94076madisoncountyky.us""","""1124 judah bear boulevard""","""""","""richmond, ky, 40475""","""1124 judah bear boulevard ric…","""1124, Judah Bear Boulevard, Ri…","""Parse the following address in…","""{  ""AddNum_Pre"": """",  ""Add_N…","""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""extreme""",2
21790301,"""""",27,"""""",27,"""""","""""","""""","""""","""captain bellamy""","""lane""","""""","""""","""captain bellamy lane""","""""","""""","""""","""""","""""","""""","""""","""""","""barnstable""","""barnstable""","""""","""""","""centerville""","""""","""""","""""","""""","""""","""""","""ma""",2632,,"""{8ad0ef9e-2733-7335-7ae0-03faa…",,"""uninc_comm""","""-70.330290313""","""41.6600585620001""","""19tcg8924212888""","""""","""structure - rooftop""","""-70.330290313 41.6600585620001""","""""","""""","""""","""""","""numbered thoroughfare address""","""""","""""","""""","""1/20/2014 0:00:00""","""""","""""","""residential""","""""","""massgismassachusetts""","""43420""","""27 captain bellamy lane""","""""","""ma, 2632""","""27 captain bellamy lane ma, 2…","""27, Captain Bellamy Lane, Cent…","""Parse the following address in…","""{  ""AddNum_Pre"": """",  ""Add_N…","""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""medium""",0


In [159]:
df.select(['Add_Number', 'AddNo_Full', 'St_PreDir', 'St_Name', 'St_PosTyp', 'County', 'Inc_Muni', 'Post_City', 'State', 'Zip_Code', 'Latitude', 'Longitude']).head(3)

Add_Number,AddNo_Full,St_PreDir,St_Name,St_PosTyp,County,Inc_Muni,Post_City,State,Zip_Code,Latitude,Longitude
i64,i64,str,str,str,str,str,str,str,i64,str,str
1187,1187,"""north""","""pownal""","""road""","""bennington""","""NaN""","""pownal""","""vt""",5260,"""42.8039371512875""","""-73.2482712649639"""
1124,1124,"""""","""judah bear""","""boulevard""","""madison""","""richmond""","""richmond""","""ky""",40475,"""37.754618638845024""","""-84.25537236725299"""
27,27,"""""","""captain bellamy""","""lane""","""barnstable""","""barnstable""","""""","""ma""",2632,"""41.6600585620001""","""-70.330290313"""


In [84]:
def get_record(oid: int, data: pl.DataFrame = df) -> Dict:
    record = data.filter(
        pl.col('OID_') == oid
    )[0].select([
        'OID_', 
        'FormattedFullAddress', 
        'Latitude', 
        'Longitude',
        'noise_level',
        'variant_idx'
        ]).to_dict(as_series=False)
    return {k : v[0] for k, v in record.items()}

In [43]:
# State abbreviation to full name mapping
STATE_MAP = {
    'TX': 'Texas',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'WY': 'Wyoming',
    'KY': 'Kentucky',
    'MI': 'Michigan',
    'WA': 'Washington',
    'VT': 'Vermont',
    'ND': 'North Dakota',
    'TN': 'Tennessee',
    'IN': 'Indiana',
    'WV': 'West Virginia',
    'MN': 'Minnesota',
    'RI': 'Rhode Island',
    'DE': 'Delaware',
    'IL': 'Illinois',
    'SD': 'South Dakota',
    'AK': 'Alaska',
    'MS': 'Mississippi',
    'OK': 'Oklahoma',
    'PA': 'Pennsylvania',
    'WI': 'Wisconsin',
    'NY': 'New York',
    'KS': 'Kansas',
    'NM': 'New Mexico',
    'AZ': 'Arizona',
    'SC': 'South Carolina',
    'FL': 'Florida',
    'NC': 'North Carolina',
    'MD': 'Maryland',
    'UT': 'Utah',
    'NE': 'Nebraska',
    'NH': 'New Hampshire',
    'VA': 'Virginia',
    'GA': 'Georgia',
    'AL': 'Alabama',
    'CA': 'California',
    'MA': 'Massachusetts',
    'CT': 'Connecticut',
    'AR': 'Arkansas',
    'CO': 'Colorado',
    'MT': 'Montana',
    'DC': 'District of Columbia',
    'ID': 'Idaho',
    'IA': 'Iowa',
    'OH': 'Ohio',
    'MO': 'Missouri'
}

In [44]:
def format_usdot_to_freeform_granular(data: dict, state_map: dict) -> str:
    # Custom null-like values to filter
    NULL_STRINGS = {"", None, "nan", "null"}

    def safe_get(key):
        val = data.get(key)
        if isinstance(val, str):
            val = val.lower()
        return None if (val in NULL_STRINGS or str(val).strip() in NULL_STRINGS) else str(val).strip()

    def safe_title(key):
        val = safe_get(key)
        return val.title() if val else None

    # House number
    number = " ".join(filter(None, [safe_get("AddNum_Pre"),
                                    safe_get("Add_Number"),
                                    safe_get("AddNum_Suf")]))

    # Street full
    street_parts = [
        safe_get("St_PreDir"),
        safe_title("St_Name"),
        safe_title("St_PosTyp"),
        safe_get("St_PosDir")
    ]
    street = " ".join(part for part in street_parts if part)

    # Unit/building details
    sub_parts = []
    if safe_get("Building"): sub_parts.append(f"Bldg {safe_get('Building')}")
    if safe_get("Floor"): sub_parts.append(f"Floor {safe_get('Floor')}")
    if safe_get("Unit"): sub_parts.append(f"Unit {safe_get('Unit')}")
    if safe_get("Room"): sub_parts.append(f"Room {safe_get('Room')}")

    sub_address = ", ".join(sub_parts)

    # Town/City
    town = safe_title("Uninc_Comm") or safe_title("Inc_Muni")

    # County
    county = safe_title("County")

    # State
    state_abbr = safe_get("State")
    state_full = state_map.get(state_abbr.upper(), state_abbr) if state_abbr else None

    # ZIP
    zip_raw = safe_get("Zip_Code")
    zip_code = zip_raw.zfill(5) if zip_raw and zip_raw.isdigit() else None

    # Compose full address
    components = [number, street]
    if sub_address:
        components.append(sub_address)
    components.extend([
        town,
        f"{county} County" if county else None,
        state_full,
        zip_code
    ])

    return ", ".join([c for c in components if c])

In [85]:
get_record(oid=23562552)


{'OID_': 23562552,
 'FormattedFullAddress': '1, Taunton Street, Somerville, Middlesex County, Massachusetts, 02143',
 'Latitude': '42.3769386750001',
 'Longitude': '-71.100590605',
 'noise_level': 'low',
 'variant_idx': 1}

### Task 1 Inference:

In [172]:
t1_inf_df = pl.read_csv(data_dir / 'task1_inference_results.csv')

In [173]:
def parse_address_json(model_output: str, gt: bool = False) -> Dict:
    if not gt:
        prefix = "System: "
        json_str = model_output[len(prefix):].strip()
    else:
        json_str = model_output
    address_json = json.loads(json_str)
    return address_json

In [174]:
record_idx = 2
record = t1_inf_df[record_idx]

t1_groundtruth = record['ground_truth'].item()
t1_model_output = record['model_output'].item()

grounttruth = parse_address_json(t1_groundtruth, gt=True)
modeloutput = parse_address_json(t1_model_output, gt=False)

In [175]:
print(f'GT Entities: {grounttruth}')
print(f'Generated Entities: {modeloutput}')

GT Entities: {'AddNum_Pre': '', 'Add_Number': '86', 'AddNum_Suf': '', 'St_PreDir': '', 'St_Name': 'p', 'St_PosTyp': 'street', 'St_PosDir': '', 'Building': '', 'Floor': '', 'Unit': '2', 'Room': '', 'Uninc_Comm': 'south boston', 'Inc_Muni': 'boston', 'County': 'suffolk', 'State': 'ma', 'Zip_Code': '2127'}
Generated Entities: {'AddNum_Pre': '', 'Add_Number': '86', 'AddNum_Suf': '', 'St_PreDir': '', 'St_Name': 'p', 'St_PosTyp': 'street', 'St_PosDir': '', 'Building': '', 'Floor': '', 'Unit': '2', 'Room': '', 'Uninc_Comm': 'south boston', 'Inc_Muni': 'boston', 'County': 'suffolk', 'State': 'ma', 'Zip_Code': '2127'}


In [None]:
def evaluate_address_parsing(gt: Dict[str, str], pred: Dict[str, str]) -> Dict[str, float]:
    assert gt.keys() == pred.keys(), "Mismatched keys between ground truth and prediction"

    total_fields = len(gt)
    correct_fields = 0

    total_gt_entities = 0
    total_pred_entities = 0
    correct_predicted_entities = 0

    total_noisy_fields = 0
    correctly_corrected_fields = 0

    for key in gt:
        gt_val = (gt[key] or "").strip().lower()
        pred_val = (pred[key] or "").strip().lower()

        # Field-level accuracy
        if gt_val == pred_val:
            correct_fields += 1

        # Precision/Recall entity presence
        if gt_val:
            total_gt_entities += 1
        if pred_val:
            total_pred_entities += 1
        if gt_val and pred_val and gt_val == pred_val:
            correct_predicted_entities += 1

        # Correction accuracy (only evaluate on noisy fields)
        if gt_val != pred_val and gt_val and (gt_val != (gt[key] or "").strip().lower()):
            total_noisy_fields += 1
            if pred_val == gt_val:
                correctly_corrected_fields += 1
        elif gt_val != (gt[key] or "").strip().lower():  # noisy field
            total_noisy_fields += 1
            if pred_val == gt_val:
                correctly_corrected_fields += 1

    # Final metrics
    precision = correct_predicted_entities / total_pred_entities if total_pred_entities > 0 else 0.0
    recall = correct_predicted_entities / total_gt_entities if total_gt_entities > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    field_accuracy = correct_fields / total_fields
    correction_accuracy = correctly_corrected_fields / total_noisy_fields if total_noisy_fields > 0 else 1.0

    return {
        "field_accuracy": field_accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "correction_accuracy": correction_accuracy,
        "num_noisy_fields": total_noisy_fields,
        "num_correctly_corrected": correctly_corrected_fields
    }

In [215]:
def evaluate_t1_dataset(file_path: str):
    df = pl.read_csv(file_path)
    acc_list, p_list, r_list, f1_list = [], [], [], []

    for row in df.rows(named=True):
        try:
            gt = parse_address_json(row['ground_truth'], gt=True)
            pred = parse_address_json(row['model_output'], gt=False)
            
            metrics = evaluate_address_parsing(gt, pred)
            
            acc_list.append(metrics['correction_accuracy'])
            p_list.append(metrics['precision'])
            r_list.append(metrics['recall'])
            f1_list.append(metrics['f1_score'])
        except:
            continue

    return {
        "avg_field_accuracy": sum(acc_list) / len(acc_list),
        "avg_precision": sum(p_list) / len(p_list),
        "avg_recall": sum(r_list) / len(r_list),
        "avg_f1_score": sum(f1_list) / len(f1_list)
    }

#### Evaluation Summary

In [216]:
summary = evaluate_t1_dataset(data_dir / 'task1_inference_results.csv')
print(summary)

{'avg_field_accuracy': 1.0, 'avg_precision': 1.0, 'avg_recall': 1.0, 'avg_f1_score': 1.0}


In [223]:
record = t1_inf_df[50]

gt = parse_address_json(record['ground_truth'].item(), gt=True)
pred = parse_address_json(record['model_output'].item(), gt=False)

metadata = get_record(oid=record['OID_'].item())

prompt = record['instruction'].item()

print(f'gt: {gt}')
print(f'pred: {pred}')
print(metadata)

gt: {'AddNum_Pre': '', 'Add_Number': '201', 'AddNum_Suf': '', 'St_PreDir': '', 'St_Name': 'sanders', 'St_PosTyp': 'street', 'St_PosDir': '', 'Building': '', 'Floor': '', 'Unit': '1', 'Room': '', 'Uninc_Comm': 'athol', 'Inc_Muni': 'athol', 'County': 'worcester', 'State': 'ma', 'Zip_Code': '1331'}
pred: {'AddNum_Pre': '', 'Add_Number': '201', 'AddNum_Suf': '', 'St_PreDir': '', 'St_Name': 'sanders', 'St_PosTyp': 'street', 'St_PosDir': '', 'Building': '', 'Floor': '', 'Unit': '1', 'Room': '', 'Uninc_Comm': 'athol', 'Inc_Muni': 'athol', 'County': 'worcester', 'State': 'ma', 'Zip_Code': '1331'}
{'OID_': 25146114, 'FormattedFullAddress': '201, Sanders Street, Unit 1, Athol, Worcester County, Massachusetts, 01331', 'Latitude': '42.588018678', 'Longitude': '-72.231960989', 'noise_level': 'high', 'variant_idx': 1}


In [222]:
print(prompt)

Parse the following address into a structured JSON with these fields: AddNum_Pre, Add_Number, AddNum_Suf, St_PreDir, St_Name, St_PosTyp, St_PosDir, Building, Floor, Unit, Room, Uninc_Comm, Inc_Muni, County, State, Zip_Code.
Address: 201, Sanders Street, Unit 1, Athol, Worcester County, Massachusetts, 01331


### Task 2 Inference:

In [136]:
t2_inf_df = pl.read_csv(data_dir / 'task2_inference_results.csv')
print(f'Number of task 2 inferences: {len(t2_inf_df)}')

secrets_dir = project_dir / 'Secrets'

with open (secrets_dir / 'gmaps.txt') as f:
    gmaps_api = f.read()

gmaps_client = googlemaps.Client(key=gmaps_api)

Number of task 2 inferences: 100


In [None]:
def extract_address_json_from_prompt(prompt: str) -> str:
    """
    Extracts the JSON address block from the prompt.
    Args:
        prompt: The input string containing a prompt and an address JSON.
    Returns:
        The JSON string portion from the prompt.
    """
    match = re.search(r'(\{\s*".*?\})$', prompt, re.DOTALL)
    return match.group(1) if match else ""

In [79]:
t2_inf_df.head(3)

OID_,instruction,ground_truth,model_output
i64,str,str,str
23562552,"""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""System: {  ""AddNum_Pre"": """", …"
25210416,"""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""System: {  ""AddNum_Pre"": """", …"
24579587,"""Fix the formatting, structure,…","""{  ""AddNum_Pre"": """",  ""Add_N…","""System: {  ""AddNum_Pre"": """", …"


In [133]:
t2_inf_df.schema

Schema([('OID_', Int64),
        ('instruction', String),
        ('ground_truth', String),
        ('model_output', String)])

In [139]:
def enrich_t2_df(data: pl.DataFrame, gmaps_client: str) -> pl.DataFrame:
    
    enriched_rows = []

    for record in data.rows(named=True):
        t2_groundtruth = record['ground_truth']
        t2_model_output = record['model_output']
        prompt = record['instruction']

        noisy_json = extract_address_json_from_prompt(prompt)
        noisy_json = parse_address_json(noisy_json, gt=True)
        grounttruth_json = parse_address_json(t2_groundtruth, gt=True)
        modeloutput_json = parse_address_json(t2_model_output, gt=False)

        noisy_address = format_usdot_to_freeform_granular(noisy_json, STATE_MAP)
        generated_address = format_usdot_to_freeform_granular(modeloutput_json, STATE_MAP)

        metadata = get_record(record['OID_'])

        predicted_geocoded = gmaps_client.geocode(generated_address)
        predicted_geocoded = predicted_geocoded[0]["geometry"]["location"]

        enriched_row = {
            "OID_": record['OID_'],
            "noise_level": metadata['noise_level'],
            "variant_idx": metadata['variant_idx'],
            "noisy_json": json.dumps(noisy_json),
            "ground_truth_json": json.dumps(grounttruth_json),
            "predicted_json": json.dumps(modeloutput_json),
            "groundtruth_address": metadata['FormattedFullAddress'],
            "groundtruth_latitude": metadata['Latitude'],
            "groundtruth_longitude": metadata['Longitude'],
            "noisy_address": noisy_address,
            "predicted_address": generated_address,
            "predicted_latitude": predicted_geocoded['lat'],
            "predicted_longitude": predicted_geocoded['lng'],
            }
        enriched_rows.append(enriched_row)

    return pl.DataFrame(enriched_rows)

In [None]:
# Do not run this again! Will cost Google Maps API
# enriched_t2_df = enrich_t2_df(t2_inf_df, gmaps_client)

In [145]:
def geocode_similarity(lat1: float, lon1: float, lat2: float, lon2: float, radius_threshold_km = 0.2):
    delta_distance_km = geodesic((lat1, lon1), (lat2, lon2)).kilometers
    return 1 if delta_distance_km <= radius_threshold_km else 0

In [149]:
def evaluate_json_corrections(noisy_json: Dict, ground_truth: Dict, generated: Dict) -> Dict:
    assert noisy_json.keys() == ground_truth.keys() == generated.keys()

    total_noisy_fields = 0
    corrected_fields = 0
    fieldwise_results = []

    for field in noisy_json:
        noisy_val = (noisy_json[field] or "").strip().lower()
        gt_val = (ground_truth[field] or "").strip().lower()
        pred_val = (generated[field] or "").strip().lower()

        # Skip if ground truth is empty — can't validate
        if not gt_val:
            continue

        # Consider field noisy if it mismatches GT
        if noisy_val != gt_val:
            total_noisy_fields += 1
            corrected_fields += int(pred_val == gt_val)
            fieldwise_results.append({
                "field": field,
                "noisy": noisy_val,
                "predicted": pred_val,
                "ground_truth": gt_val,
                "is_correct": pred_val == gt_val
            })

    accuracy = corrected_fields / total_noisy_fields if total_noisy_fields > 0 else 1.0

    return {
        "noisy_fields_total": total_noisy_fields,
        "corrected_correctly": corrected_fields,
        "correction_accuracy": accuracy,
        "fieldwise_analysis": fieldwise_results
    }

In [150]:
def evaluate_geosim_and_json_corrections(df: pl.DataFrame) -> Dict:
    total_records = len(df)
    geo_similar_count = 0
    correction_scores = []

    for row in df.iter_rows(named=True):
        try:
            # Geosimilarity
            lat1 = float(row['groundtruth_latitude'])
            lon1 = float(row['groundtruth_longitude'])
            lat2 = float(row['predicted_latitude'])
            lon2 = float(row['predicted_longitude'])

            geo_similar = geocode_similarity(lat1, lon1, lat2, lon2)
            geo_similar_count += geo_similar

            # JSON correction
            noisy_json = json.loads(row['noisy_json'])
            gt_json = json.loads(row['ground_truth_json'])
            pred_json = json.loads(row['predicted_json'])

            result = evaluate_json_corrections(noisy_json, gt_json, pred_json)
            correction_scores.append(result['correction_accuracy'])

        except Exception as e:
            print(f"Skipping record {row['OID_']} due to error: {e}")
            continue

    avg_correction_accuracy = sum(correction_scores) / len(correction_scores) if correction_scores else 0.0

    return {
        "geo_similar_count": geo_similar_count,
        "total_records": total_records,
        "geo_similarity_rate": geo_similar_count / total_records,
        "avg_correction_accuracy": avg_correction_accuracy
    }

#### Evaluation Summary

In [151]:
evaluate_geosim_and_json_corrections(enriched_t2_df)

{'geo_similar_count': 30,
 'total_records': 100,
 'geo_similarity_rate': 0.3,
 'avg_correction_accuracy': 0.5717103174603173}

In [None]:
# class GeoEvaluation:
#     def __init__(self, google_api_key: str = None):
#         self.radius_threshold = 0.2
#         self.gmaps = googlemaps.Client(key=google_api_key) if google_api_key else None

#     def geocode_address(self, address: str) -> Union[Tuple[float, float], Tuple[None, None]]:
#         try:
#             if self.gmaps:
#                 geocode_result = self.gmaps.geocode(address)
#                 if geocode_result:
#                     loc = geocode_result[0]['geometry']['location']
#                     return (loc['lat'], loc['lng'])
#             else:
#                 print("Google Maps client not initialized.")
#         except Exception as e:
#             print(f"Google API error for address '{address}': {e}")
#         return (None, None)

#     def compute_geographic_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
#         return geodesic((lat1, lon1), (lat2, lon2)).kilometers

#     def are_same(self, predicted_address: str, groundtruth_lat: float, groundtruth_lon: float) -> bool:
#         pred_lat, pred_lon = self.geocode_address(predicted_address)
#         if pred_lat is None or pred_lon is None:
#             return False
#         distance = self.compute_geographic_distance(pred_lat, pred_lon, groundtruth_lat, groundtruth_lon)
#         print(f"Distance from ground truth: {round(distance, 3)} KM")
#         return distance <= self.radius_threshold

In [132]:
evaluate_json_corrections(noisy_json, grounttruth_json, modeloutput_json)

{'noisy_fields_total': 4,
 'corrected_correctly': 2,
 'correction_accuracy': 0.5,
 'fieldwise_analysis': [{'field': 'Add_Number',
   'noisy': '74',
   'predicted': '47',
   'ground_truth': '47',
   'is_correct': True},
  {'field': 'St_Name',
   'noisy': 'ba9 vi4ew',
   'predicted': 'beach',
   'ground_truth': 'bay view',
   'is_correct': False},
  {'field': 'County',
   'noisy': '',
   'predicted': 'essex',
   'ground_truth': 'essex',
   'is_correct': True},
  {'field': 'Zip_Code',
   'noisy': '19',
   'predicted': '1905',
   'ground_truth': '1902',
   'is_correct': False}]}

In [242]:
x = df.filter(
    pl.col('OID_') == 21913155
)

for row in x.rows(named=True):
    p = row['task2_instruction']
    noisy_add_json = extract_address_json_from_prompt(p)
    noisy_add_json = parse_address_json(noisy_add_json, gt=True)
    noisy_add = format_usdot_to_freeform_granular(noisy_add_json, STATE_MAP)
    print(f'{noisy_add}\n')

Bay View Avenue, Unit 14, 2Y6N, Essex County, Massachusetts

471, Bay View P8En8Me, Unit 1, Essex County

74, Ba9 Vi4Ew Avenue, Unit 1, Lynn, Massachusetts, 00019

