In [0]:
import numpy as np
import pandas as pd

def meters_to_deg_lat(m):     # ~111,000 m per degree latitude
    return m / 111_000.0

def meters_to_deg_lon(m, lat):  # shrink by cos(latitude)
    return m / (111_000.0 * np.cos(np.deg2rad(lat)))

def jitter_duplicates(df, lat_col='lat', lon_col='lon', jitter_meters=30):
    # Count duplicates
    grp = df.groupby([lat_col, lon_col]).cumcount()
    # random radius (0..1) and angle (0..2π) per row, but only for duplicates
    r = np.random.rand(len(df)) * jitter_meters
    theta = np.random.rand(len(df)) * 2 * np.pi

    # convert meter offsets to degrees, using each row’s latitude for lon scaling
    dlat = meters_to_deg_lat(r * np.sin(theta))
    dlon = meters_to_deg_lon(r * np.cos(theta), df[lat_col].values)

    # only apply when there are duplicates (grp > 0)
    df = df.copy()
    mask = grp > 0
    df.loc[mask, lat_col] = df.loc[mask, lat_col] + dlat[mask]
    df.loc[mask, lon_col] = df.loc[mask, lon_col] + dlon[mask]
    return df

# spiderfy test
def spiderfy_duplicates(df, lat_col='lat', lon_col='lon', radius_m=200):
    df = df.copy()
    # group rows by exact coordinate
    groups = df.groupby([lat_col, lon_col], dropna=False)
    idxs = []
    dlat_all = []
    dlon_all = []

    for (_, _), idx in groups.indices.items():
        n = len(idx)
        if n == 1:
            idxs.extend(idx)
            dlat_all.extend([0])
            dlon_all.extend([0])
            continue
        # Even angles around a circle
        angles = np.linspace(0, 2*np.pi, n, endpoint=False)
        # Use the shared latitude for lon scaling
        lat_vals = df.loc[idx, lat_col].values
        dlat = meters_to_deg_lat(radius_m * np.sin(angles))
        dlon = meters_to_deg_lon(radius_m * np.cos(angles), lat_vals)
        idxs.extend(idx)
        dlat_all.extend(dlat)
        dlon_all.extend(dlon)

    dlat_all = np.array(dlat_all)
    dlon_all = np.array(dlon_all)
    df.loc[idxs, lat_col] += dlat_all
    df.loc[idxs, lon_col] += dlon_all
    return df