In [None]:
!pip install pandas --quiet
!pip install geopandas --quiet
!pip install shapely --quiet

In [None]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import math

In [None]:
merged_data = pd.read_parquet(path='merged_data.parquet')

In [None]:
convert_columns = ['original_coord', 'snapped_coord', 'osm_coord']
for col in convert_columns:
    merged_data[col] = merged_data[col].apply(wkt.loads)

In [None]:
# Following function provided by Jens Goossens
def expScore(distanceInMeters, maxDistanceCutOff, lambda_arg):
  if distanceInMeters <= 0:
    return 1.0
  if distanceInMeters >= maxDistanceCutOff:
    return 0.0
  u = distanceInMeters / maxDistanceCutOff
  eNegLambda = math.exp(-lambda_arg)
  eNegLambdaU = math.exp(-lambda_arg * u)
  return (eNegLambdaU - eNegLambda) / (1.0 - eNegLambda)

In [None]:
all_snap = gpd.GeoDataFrame(merged_data, geometry='snapped_coord', crs="EPSG:4326")
all_snap['old_score'] = all_snap.old_dist.apply(lambda x: expScore(x, 250, 4))

In [None]:
selective_snap = all_snap.copy(deep=True)

In [None]:
# Do not snap for the following categories (i.e. set snapped values to original values)
categories_to_exclude = ['farm', 'lake', 'park', 'field', 'campground', 'river']
selective_snap['snapped_coord'] = selective_snap.apply(lambda x: x['original_coord'] if any(cat in x['category'] for cat in categories_to_exclude) else x['snapped_coord'], axis=1)
selective_snap['new_dist'] = selective_snap.apply(lambda x: x['old_dist'] if any(cat in x['category'] for cat in categories_to_exclude) else x['new_dist'], axis=1)

In [None]:
selective_snap['new_score'] = selective_snap.new_dist.apply(lambda x: expScore(x, 250, 4))
all_snap['new_score'] = all_snap.new_dist.apply(lambda x: expScore(x, 250, 4))

In [None]:
all_snap['log_ratio'] = all_snap.apply(lambda x: math.log((x['new_dist'] + 1) / (x['old_dist'] + 1)), axis=1)
selective_snap['log_ratio'] = selective_snap.apply(lambda x: math.log((x['new_dist'] + 1) / (x['old_dist'] + 1)), axis=1)

In [None]:
all_snap['percent_change'] = all_snap.apply(lambda x: (x['new_dist'] - x['old_dist']) / (x['old_dist'] + 1), axis=1)
selective_snap['percent_change'] = selective_snap.apply(lambda x: (x['new_dist'] - x['old_dist']) / (x['old_dist'] + 1), axis=1)

In [None]:
# Basic average difference (lower is better)
print(all_snap['new_dist'].mean() - all_snap['old_dist'].mean())
print(selective_snap['new_dist'].mean() - all_snap['old_dist'].mean())

In [None]:
# Percent change on average (lower is better)
print(all_snap['percent_change'].mean())
print(selective_snap['percent_change'].mean())

In [None]:
# Average log_ratio, or log of geometric mean of ratios (lower is better)
print(all_snap['log_ratio'].mean())
print(selective_snap['log_ratio'].mean())

In [None]:
# 0-1 accuracy metric (higher is better)
print(all_snap['old_score'].mean())
print(all_snap['new_score'].mean())
print(selective_snap['new_score'].mean())