<h1 align="center"><img align="center" src="https://geoparse.io/graphics/geoparse_logo.png" alt="GeoParse Logo" width="200"/></h1>
<h1 align="center">GeoParse</h1>
<h3 align="center">All About Points <img src="https://geoparse.io/graphics/point.png" width="10"/> Lines <img src="https://geoparse.io/graphics/line.png" width="40"/> and Polygons <img src="https://geoparse.io/graphics/polygon.png" width="30"/></h3>


#### [HTML](http://geoparse.io/tutorials/karta.html) 
***

# Data Visualization

[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/geoparse/geoparse/main?labpath=tutorials%2F00_visualization.ipynb)

This notebook demonstrates how to visualize large-scale geometries containing over 1 million points using the SnabbKarta class.

***

In [None]:
import os
import sys

sys.path.append(os.path.abspath("../geoparse/"))
import warnings

import geopandas as gpd
import pandas as pd

from geoparse import SnabbKarta, SpatialIndex

pd.set_option("display.max_columns", None)
warnings.filterwarnings("ignore")

In [None]:
# Reload all changed modules every time before executing a new line
%load_ext autoreload
%autoreload 2

# GeoDataFrame

In [None]:
gdf = gpd.read_parquet("../../open-data/data/os-open-usrn/osopenusrn_202510.parquet", bbox=(-0.5, 51.25, 0.5, 51.75))
gdf.head()

In [None]:
SnabbKarta.plp(gdf)

# DataFrame

In [None]:
df = pd.read_parquet("../../open-data/data/ons-postcode-directory/ons-postcode-directory.parquet")
df.head()

In [None]:
len(df)

In [None]:
SnabbKarta.plp(df.sample(10_000))

In [None]:
SnabbKarta.plp(df.sample(10_000), geom_col=["lon", "lat"])

### Geohash, S2, H3

In [None]:
df = pd.read_parquet("../../open-data/data/ons-postcode-directory/ons-postcode-directory.parquet")
df.head()

In [None]:
len(df)

In [None]:
df = df.sample(10_000).reset_index(drop=True)
df.head()

In [None]:
df.isnull().sum()

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="geohash", res=4)
cdf = pd.DataFrame({"cell_id": cells})  # cell df
cdf.head()

In [None]:
cdf.isnull().sum()

In [None]:
SnabbKarta.plp(cdf, geom_col="cell_id", geom_type="geohash")

In [None]:
SnabbKarta.plp(set(cells), geom_type="geohash")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="s2", res=9)
cdf = pd.DataFrame({"cell_id": cells})  # cell df
cdf.head()

In [None]:
SnabbKarta.plp(cdf, geom_col="cell_id", geom_type="s2")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="s2_int", res=9)
cdf = pd.DataFrame({"cell_id": cells}, dtype="Int64")
cdf.head()

In [None]:
SnabbKarta.plp(cdf, geom_col="cell_id", geom_type="s2_int")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="h3", res=5)
cdf = pd.DataFrame({"cell_id": cells})  # cell df
cdf.head()

In [None]:
SnabbKarta.plp(cdf, geom_col="cell_id", geom_type="h3")

## UPRN, USRN, Postcode, OSM

### UPRN

In [None]:
%%time

# UPRN Prep

# df = pd.read_parquet('~/repo/open-data/data/ons-uprn-directory/parq/')

# gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.easting, df.northing), crs=27700).to_crs(4326)
# gdf = gdf[['uprn', 'geometry']]

# gdf = gdf.sort_values(by=['uprn'])

# gdf['lat'] = gdf.geometry.y
# gdf['lon'] = gdf.geometry.x
# gdf.head()

# gdf[['uprn', 'lat', 'lon']].to_parquet('../../open-data/data/ons-uprn-directory/uprn_minimal.parquet', index=False)

In [None]:
%%time
# gdf = gpd.read_parquet('../../open-data/data/ons-uprn-directory/uprn_minimal_geom.parquet')  # gpd is slower than pd for reading parquet file.
df = pd.read_parquet("../../open-data/data/ons-uprn-directory/uprn_minimal.parquet")
df.head()

In [None]:
len(df)

In [None]:
udf = df[["uprn"]].sample(10_000).reset_index(drop=True)
udf.head()

In [None]:
%%time
lookup_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
lookup_gdf = lookup_gdf[["uprn", "geometry"]]
lookup_gdf.head()

In [None]:
len(lookup_gdf)

In [None]:
SnabbKarta.plp(udf, geom_type="uprn", geom_col="uprn", lookup_gdf=lookup_gdf, lookup_key="uprn")

### USRN

In [None]:
gdf = gpd.read_parquet("../../open-data/data/os-open-usrn/osopenusrn_202510.parquet", bbox=(-0.5, 51.25, 0.5, 51.75))
gdf.head()

In [None]:
len(gdf)

In [None]:
udf = gdf[["usrn"]].sample(10_000).reset_index(drop=True)
udf.head()

In [None]:
SnabbKarta.plp(udf, geom_type="usrn", geom_col="usrn", lookup_gdf=gdf, lookup_key="usrn")

### Postcode

In [None]:
%%time
df = pd.read_parquet("../../open-data/data/ons-postcode-directory/ons-postcode-directory.parquet")
df.head()

In [None]:
pdf = df[["postcode"]].sample(10_000).reset_index(drop=True)
pdf.head()

In [None]:
%%time
lookup_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
lookup_gdf = lookup_gdf[["postcode", "geometry"]]
lookup_gdf.head()

In [None]:
SnabbKarta.plp(pdf, geom_type="postcode", geom_col="postcode", lookup_gdf=lookup_gdf, lookup_key="postcode")

### OSM

In [None]:
%%time
gdf = gpd.read_parquet("../../open-data/data/geofabrik-osm/monaco/multipolygons.parquet")
gdf.head()

In [None]:
len(gdf)

In [None]:
lookup_gdf = gdf[["osm_way_id", "name", "geometry"]]
lookup_gdf.head()

In [None]:
odf = lookup_gdf[["osm_way_id"]].sample(1000).reset_index(drop=True)
odf.head()

In [None]:
SnabbKarta.plp(odf, geom_type="osm", geom_col="osm_way_id", lookup_gdf=lookup_gdf, lookup_key="osm_way_id")

# Set

## Geohash, S2, H3

In [None]:
df = pd.read_parquet("../../open-data/data/ons-postcode-directory/ons-postcode-directory.parquet")
df.head()

In [None]:
len(df)

In [None]:
df = df.sample(10_000).reset_index(drop=True)
df.head()

In [None]:
df.isnull().sum()

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="geohash", res=4)
cells[:3]

In [None]:
SnabbKarta.plp(set(cells), geom_type="geohash")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="s2", res=9)
cells[:3]

In [None]:
SnabbKarta.plp(set(cells), geom_type="s2")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="s2_int", res=9)
cells[:3]

In [None]:
SnabbKarta.plp(set(cells), geom_type="s2_int")

In [None]:
cells = SpatialIndex.point_cell(lats=df.lat.to_list(), lons=df.lon.to_list(), cell_type="h3", res=5)
cells[:3]

In [None]:
SnabbKarta.plp(set(cells), geom_type="h3")

## UPRN, USRN, Postcode, OSM

### UPRN

In [None]:
%%time

# UPRN Prep

# df = pd.read_parquet('~/repo/open-data/data/ons-uprn-directory/parq/')

# gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.easting, df.northing), crs=27700).to_crs(4326)
# gdf = gdf[['uprn', 'geometry']]

# gdf = gdf.sort_values(by=['uprn'])

# gdf['lat'] = gdf.geometry.y
# gdf['lon'] = gdf.geometry.x
# gdf.head()

# gdf[['uprn', 'lat', 'lon']].to_parquet('../../open-data/data/ons-uprn-directory/uprn_minimal.parquet', index=False)

In [None]:
%%time
# gdf = gpd.read_parquet('../../open-data/data/ons-uprn-directory/uprn_minimal_geom.parquet')  # gpd is slower than pd for reading parquet file.
df = pd.read_parquet("../../open-data/data/ons-uprn-directory/uprn_minimal.parquet")
df.head()

In [None]:
len(df)

In [None]:
uprn_list = df.sample(10_000).uprn.to_list()
uprn_list[:3]

In [None]:
%%time
lookup_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
lookup_gdf = lookup_gdf[["uprn", "geometry"]]
lookup_gdf.head()

In [None]:
SnabbKarta.plp(set(uprn_list), geom_type="uprn", lookup_gdf=lookup_gdf, lookup_key="uprn")

### USRN

In [None]:
gdf = gpd.read_parquet(
    "/Users/abbas/repo/open-data/data/os-open-usrn/osopenusrn_202510.parquet", bbox=(-0.5, 51.25, 0.5, 51.75)
)
gdf.head()

In [None]:
len(gdf)

In [None]:
usrn_list = gdf.sample(10_000).usrn.to_list()
usrn_list[:3]

In [None]:
SnabbKarta.plp(set(usrn_list), geom_type="usrn", lookup_gdf=gdf, lookup_key="usrn")

### Postcode

In [None]:
%%time
df = pd.read_parquet("../../open-data/data/ons-postcode-directory/ons-postcode-directory.parquet")
df.head()

In [None]:
postcode_list = df.sample(10_000).postcode.to_list()
postcode_list[:3]

In [None]:
%%time
lookup_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=4326)
lookup_gdf = lookup_gdf[["postcode", "geometry"]]
lookup_gdf.head()

In [None]:
SnabbKarta.plp(set(postcode_list), geom_type="postcode", lookup_gdf=lookup_gdf, lookup_key="postcode")

### OSM

In [None]:
%%time
gdf = gpd.read_parquet("../../open-data/data/geofabrik-osm/monaco/multipolygons.parquet")
gdf.head()

In [None]:
len(gdf)

In [None]:
lookup_gdf = gdf[["osm_way_id", "name", "geometry"]]
lookup_gdf.head()

In [None]:
osm_list = lookup_gdf.sample(1000).osm_way_id.to_list()
osm_list[:3]

In [None]:
SnabbKarta.plp(set(osm_list), geom_type="osm", lookup_gdf=lookup_gdf, lookup_key="osm_way_id")