# create input images for ML

From a geotiff datasource, the 10cm raster data from switzerland, currently "only" covering mots parts of eastern switzerland.

Steps:

1. Create Input Files for cutting (vrt)
1. cut geodata along paths
1. validate images if in correct folder (containing an zebracrossing or not)
1. convert images to png for each band
1. Done: Use the images in ML


See Dockerfile for installed dependencies

Requirements:
* gdal (binaries suffice)

Conda:
* fastai (just for compatability test)
* pyarrow (supporting the feather format)

Pip:
* rasterio
* turfpy
* fiona
* shapely
* pyrosm


## Constants

Run wherever needed, every section should be able to run on its own (well, at least that ist the goal).

In [1]:
# import constants and helpers
%run 000_Constants.ipynb

# see Constants.ipynb to change start values/settings

# extract crossings locations from osm

within the provided dataset

## load pbf

and restrict to area of interest

In [None]:
import pyrosm

fetch_pbf()

data_bounds = get_bbox_polygon_for_image(INPUT_DATA_VRT, out_crs=CRS_4326)
osm = pyrosm.OSM(str(OSM_PBF_DEST.absolute()), bounding_box=data_bounds)

Create Area of interest bounding box

## filter nodes to get the locations

In [None]:
# create the network from osm data
tag_filter = {"highway": ["crossing",]}
CONFIG = dict(
    custom_filter=tag_filter,
    osm_keys_to_keep=["highway", "crossing", "crossing_ref"],
    keep_nodes=True,
    keep_ways=True,
    keep_relations=False,
)
gdf_nodes = osm.get_data_by_custom_criteria(**CONFIG)

In [None]:
gdf_nodes.head()

## save crossings to disk
and free memory

In [None]:
import json
gdf_nodes.to_feather(GEOPANDAS_CROSSINGS_RESULT_FEATHER)
gdf_nodes.to_file(GEOPANDAS_CROSSINGS_RESULT_GEOJSON, driver='GeoJSON')

In [None]:
import gc

del gdf_nodes
del osm
gc.collect()

# extract non-crossing locations from osm

More or less the same as above, but we use the street network nodes.

In [6]:
# import constants and helpers
%run Constants.ipynb

# see Constants.ipynb to change start values/settings

In [2]:
import pyrosm

fetch_pbf()

data_bounds = get_bbox_polygon_for_image(INPUT_DATA_VRT, out_crs=CRS_4326)
osm = pyrosm.OSM(str(OSM_PBF_DEST.absolute()), bounding_box=data_bounds)

In [3]:
gdf_nodes, gdf_edges = osm.get_network(network_type="driving+service", nodes=True)
gdf_nodes.head()

Unnamed: 0,lon,lat,tags,timestamp,version,changeset,id,geometry
0,8.495231,47.399632,,1475399106,12,42577489,249091984,POINT (8.49523 47.39963)
1,8.495218,47.399589,"{'crossing': 'traffic_signals', 'crossing_ref'...",1527602166,3,59369294,1264083863,POINT (8.49522 47.39959)
2,8.494893,47.398697,"{'crossing': 'no', 'highway': 'traffic_signals...",1531092005,5,60520366,1247641714,POINT (8.49489 47.39870)
3,8.494834,47.398527,,1529762079,18,60101624,92206459,POINT (8.49483 47.39853)
4,8.494775,47.398357,"{'crossing': 'traffic_signals', 'crossing_ref'...",1535973608,5,62244007,1770708062,POINT (8.49478 47.39836)


In [4]:
# remove pyrosm instance
import gc
del osm
gc.collect()

63812676

## get all crossings and exclude points within a distance of 25m

In [9]:
# filter for crossing
def crosswalk_tag_filter(tags: dict):
    is_crossing = False
    if not tags:
        return is_crossing
    # see https://wiki.openstreetmap.org/wiki/Key:crossing
    # is_marked_crossing = tags.get("highway") == "crossing" and tags.get("crossing") == "marked"
    # is_zebra_1 = tags.get("crossing") == "zebra"
    # is_zebra_2 = "crossing" in tags and tags.get("crossing_ref") == "zebra"
    if highway_tag := tags.get("highway"):
        is_crossing = highway_tag == "crossing"
    return is_crossing

# crosswalk_tag_filter = lambda x: crosswalk_tag_filter(x)
filter_condition = gdf_nodes.tags.apply(crosswalk_tag_filter)
crossing_locations = gdf_nodes[filter_condition]

In [10]:
avoid_locations = crossing_locations.set_crs(CRS_4326)
avoid_locations = avoid_locations.to_crs(CRS_3857)
avoid_buffered = avoid_locations.buffer(AVOIDING_CROSSINGS_BUFFER_IN_METERS)
avoid_buffered = avoid_buffered.to_crs(CRS_4326)
crossing_locations = crossing_locations.assign(crossing_areas=avoid_buffered)
crossing_locations = crossing_locations.set_geometry('crossing_areas')

In [11]:
# this takes a "while" ;-)

no_crossing_nodes = gdf_nodes.overlay(crossing_locations, how='symmetric_difference')

  return geopandas.overlay(


In [12]:
# sanity check
print(len(gdf_nodes))
print(len(crossing_locations))
print(len(no_crossing_nodes))

1484468
27015
1200779


In [13]:
import json
no_crossing_nodes.to_feather(GEOPANDAS_NO_CROSSINGS_RESULT)
no_crossing_nodes.to_file(GEOPANDAS_NO_CROSSINGS_RESULT_GEOJSON, driver='GeoJSON')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  no_crossing_nodes.to_feather(GEOPANDAS_NO_CROSSINGS_RESULT)


In [14]:
import gc

del gdf_nodes
del no_crossing_nodes
gc.collect()

6

# create combined dataframe

for a single source of truth and for better reuse

## combine and reduce datasets

In [33]:
# import constants and helpers
%run Constants.ipynb

# see Constants.ipynb to change start values/settings

In [29]:
import geopandas as gpd
columns = ['geometry']
gdf_crossings = gpd.read_feather(GEOPANDAS_CROSSINGS_RESULT_FEATHER, columns=columns)
gdf_no_crossing = gpd.read_feather(GEOPANDAS_NO_CROSSINGS_RESULT_FEATHER, columns=columns)
gdf_crossings = gdf_crossings.assign(is_crossing=1)
gdf_no_crossing = gdf_no_crossing.assign(is_crossing=0)
crossings_count = len(gdf_crossings)
crossings_absent = len(gdf_no_crossing)
gdf = gdf_no_crossing.append(gdf_crossings)

In [32]:
# sanity check
print("gdf_crossings", len(gdf[gdf['is_crossing'] == 1]) == crossings_count)
print("gdf_no_crossing", len(gdf[gdf['is_crossing'] == 0]) == crossings_absent)
print(gdf.head())

gdf_crossings True
gdf_no_crossing True
                   geometry  is_crossing
0  POINT (8.48187 47.37476)            0
1  POINT (8.48176 47.37487)            0
2  POINT (8.48167 47.37496)            0
3  POINT (8.48150 47.37515)            0
4  POINT (8.48120 47.37546)            0


In [34]:
gdf.to_feather(GEOPANDAS_LABELED_POSITIONS_FEATHER)
gdf.to_file(GEOPANDAS_LABELED_POSITIONS_GEOJSON, driver='GeoJSON')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  gdf.to_feather(GEOPANDAS_LABELED_POSITIONS_FEATHER)


In [35]:
del gdf
del gdf_no_crossing
del gdf_crossings

# create tiles

In [77]:
# import constants and helpers
%run Constants.ipynb

# see Constants.ipynb to change start values/settings

## prepare geometries

Usage see next section

In [49]:
import geopandas as gpd
gdf = gpd.read_feather(GEOPANDAS_LABELED_POSITIONS_FEATHER)

In [75]:
from shapely.geometry import Point, Polygon, shape

def create_named_buffered_geoms(gdf):
    buffered_geoms = []
    names = []

    for index, l in gdf.iterrows():
        center = l.geometry.centroid
        buff = buffered_shape(shape=l.geometry.centroid, radius_in_meters=(IMAGE_BUFFER_RADIUS_IN_METERS))
        buffered_geoms.append(buff)
        name = f"{center.x}_{center.y}_is_crossing_{l.is_crossing}"
        names.append(name)
    return dict(zip(names, buffered_geoms))

In [76]:
# sanity check
create_named_buffered_geoms(gdf.sample(2))

{'8.5204836_47.3006597_is_crossing_0': <shapely.geometry.polygon.Polygon at 0x7fe44b5dbfa0>,
 '9.1019966_47.1463966_is_crossing_0': <shapely.geometry.polygon.Polygon at 0x7fe49c27bb20>}

## Create virtual concatenated TIF Layer

Using an vrt file.

Currently, the files are in `LV95`.

In [78]:
# skip this step, if an vrt already exists
if not INPUT_DATA_VRT.exists():
    !cd $INPUT_TIF_PATH && gdalbuildvrt -a_srs $CRS_4326 $INPUT_DATA_VRT *.tif

## create tifs

### crossings

In [84]:
selection = gdf[gdf['is_crossing'] == 1]
filename_polygons = create_named_buffered_geoms(selection)

In [85]:
# this takes very very long!
out_image, out_transform = cut_image(INPUT_DATA_VRT, filename_polygon_dict=filename_polygons, destination_folder=IMAGES_TIF_FOLDER_CROSSINGS)

out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.


### non-crossings

In [86]:
selection = gdf[gdf['is_crossing'] == 0].sample(len(crossings))
filename_polygons = create_named_buffered_geoms(selection)

In [87]:
# this takes very very long!
out_image, out_transform = cut_image(INPUT_DATA_VRT, filename_polygon_dict=filename_polygons, destination_folder=IMAGES_TIF_FOLDER_OTHER)

out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
out of bounds
Input shapes do not overlap raster.
