<h1 align="center"><img align="center" src="data/geoparse_logo.png" alt="GeoParse Logo" width="200"/></h1>
<h1 align="center">GeoParse</h1>
<h3 align="center">It's all about points <img src="data/point.png" width="10"/> lines <img src="data/line.png" width="40"/> and polygons <img src="data/polygon.png" width="30"/></h3>



#### [Website](http://geoparse.io)
***

# Geospatial Index

[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/geoparse/geoparse/main?labpath=tutorials%2F00_visualization.ipynb)

This notebook demonstrates how to convert coordinates into spatial indexes (H3, S2, and Geohash) and back into coordinates.
***

In [1]:
# Add parent directory to sys.path
import os
import sys

sys.path.append(os.path.abspath(".."))

In [3]:
import warnings

import pandas as pd

from geoparse.gindex import GeomCell, CellGeom
from geoparse.karta import plp

warnings.filterwarnings("ignore")

In [4]:
# Create an instance of SpatialIndexer
geomcell = GeomCell()
cellgeom = CellGeom()

## Read from CSV file

In [5]:
df = pd.read_csv("data/great_britain_road_casualties-2023.csv")
df.head()

Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30
1,07/01/2023,10:05,51.593701,0.022379,1,1,30
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30
4,16/01/2023,19:22,51.447944,0.117279,2,1,30


In [6]:
len(df)

1522

In [7]:
plp(df)

In [8]:
%%time
df["h3"] = geomcell.ppointcell(df.latitude, df.longitude, cell_type="h3", res=15)
df.head()

CPU times: user 55 ms, sys: 112 ms, total: 167 ms
Wall time: 1.34 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9


In [9]:
%%time
df[["lat", "lon"]] = cellgeom.pcellpoint(df.h3, cell_type="h3")
df.head()

CPU times: user 43.2 ms, sys: 102 ms, total: 145 ms
Wall time: 1.32 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3,lat,lon
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811,51.356552,-0.097765
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851,51.593698,0.022381
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04,51.466687,-0.011286
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca,51.671579,-0.03754
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9,51.447941,0.11728


In [10]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))

(4.954881163143909e-06, 8.070243062263671e-06)

In [11]:
%%time
df["s2"] = geomcell.ppointcell(df.latitude, df.longitude, cell_type="s2", res=30)
df.head()

CPU times: user 46 ms, sys: 98.9 ms, total: 145 ms
Wall time: 1.31 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3,lat,lon,s2
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811,51.356552,-0.097765,487607526cb67e75
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851,51.593698,0.022381,47d8a749f4303a43
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04,51.466687,-0.011286,4876027b49457ecb
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca,51.671579,-0.03754,48761f9ac2a128c7
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9,51.447941,0.11728,47d8ae95f533fc95


In [12]:
%%time
df[["lat", "lon"]] = cellgeom.pcellpoint(df.s2, cell_type="s2")
df.head()

CPU times: user 42.9 ms, sys: 101 ms, total: 144 ms
Wall time: 1.32 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3,lat,lon,s2
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811,51.356551,-0.097759,487607526cb67e75
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851,51.593701,0.022379,47d8a749f4303a43
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04,51.466689,-0.011289,4876027b49457ecb
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca,51.671577,-0.037543,48761f9ac2a128c7
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9,51.447944,0.117279,47d8ae95f533fc95


In [13]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))

(4.346317439285485e-08, 6.312256317642095e-08)

In [14]:
%%time
df["geohash"] = geomcell.ppointcell(df.latitude, df.longitude, cell_type="geohash", res=10)
df.head()

CPU times: user 46.1 ms, sys: 98.9 ms, total: 145 ms
Wall time: 1.35 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3,lat,lon,s2,geohash
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811,51.356551,-0.097759,487607526cb67e75,gcpujv0vd4
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851,51.593701,0.022379,47d8a749f4303a43,u10j882716
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04,51.466689,-0.011289,4876027b49457ecb,gcpuz9pz0y
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca,51.671577,-0.037543,48761f9ac2a128c7,gcpvzns8v0
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9,51.447944,0.117279,47d8ae95f533fc95,u10hdtv3uz


In [15]:
%%time
df[["lat", "lon"]] = cellgeom.pcellpoint(df.geohash, cell_type="geohash")
df.head()

CPU times: user 42 ms, sys: 100 ms, total: 142 ms
Wall time: 1.35 s


Unnamed: 0,date,time,latitude,longitude,number_of_vehicles,number_of_casualties,speed_limit,h3,lat,lon,s2,geohash
0,03/01/2023,19:12,51.356551,-0.097759,1,1,30,8f194ac22370811,51.35655,-0.0978,487607526cb67e75,gcpujv0vd4
1,07/01/2023,10:05,51.593701,0.022379,1,1,30,8f194e68042a851,51.5937,0.0224,47d8a749f4303a43,u10j882716
2,14/01/2023,16:15,51.466689,-0.011289,1,1,20,8f194ad058a4c04,51.46669,-0.0113,4876027b49457ecb,gcpuz9pz0y
3,15/01/2023,19:51,51.671577,-0.037543,1,1,30,8f195db6b2d6cca,51.67158,-0.0375,48761f9ac2a128c7,gcpvzns8v0
4,16/01/2023,19:22,51.447944,0.117279,2,1,30,8f194e6deb25ac9,51.44794,0.1173,47d8ae95f533fc95,u10hdtv3uz


In [16]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))

(7.000000003642981e-06, 5.500000000002725e-05)