In [None]:
import pandas as pd
import json

In [None]:
df = pd.read_csv("resale_original.csv")
with open("geocode_cache_full.json", "r") as f:
    coord_cache = json.load(f)

# Construct lookup key
df["lookup_key"] = df["block"].astype(str) + " " + df["street_name"]

# Function to extract coordinates
def get_coords(key):
    if key not in coord_cache or len(coord_cache[key]) == 0:
        return pd.Series([None, None, None, None])
    first = coord_cache[key][0]
    return pd.Series([
        float(first["X"]),
        float(first["Y"]),
        float(first["LATITUDE"]),
        float(first["LONGITUDE"])
    ])

# Create columns
df[["X", "Y", "LATITUDE", "LONGITUDE"]] = df["lookup_key"].apply(get_coords)

# Add running ID column
df.insert(0, "id", range(1, len(df) + 1))

# Drop helper column
df = df.drop(columns=["lookup_key"])

# Save
df.to_csv("resale_with_coordinates.csv", index=False)

In [4]:
df = pd.read_csv("resale_with_coordinates.csv")

# Check rows where any coordinate field is missing
missing = df[
    df["X"].isna() |
    df["Y"].isna() |
    df["LATITUDE"].isna() |
    df["LONGITUDE"].isna()
]

# Summary
print("Total rows:", len(df))
print("Rows with missing coordinates:", len(missing))

if len(missing) > 0:
    print("\nRows with missing coordinate values:")
    print(missing[["id", "block", "street_name", "X", "Y", "LATITUDE", "LONGITUDE"]])
else:
    print("\nAll rows have full coordinate data!")

Total rows: 219165
Rows with missing coordinates: 0

All rows have full coordinate data!
