In [1]:
import neo4j
import osmnx as ox
import pandas as pd
from utils import simplify_to_centroid_if_small
from utils_osm import poidf2rows
from utils_neo4j import init, poi_insert_query

In [2]:
NEO4J_URI = "bolt://localhost:7689"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "busticket123"

driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
init(driver)

In [3]:
# Define the area of interest
place = "Hamburg, Deutschland"

# Define the tags for different POIs you're interested in
tags = {
    # Additional categories
    'building': ['civic', 'public'],
}

In [4]:
# Fetch the POIs
pois = ox.features_from_place(place, tags=tags)
pois = pois.reset_index()

In [5]:
# description based on: 'description:de', 'description' 
# tags based on: office, amenity, community_centre, operator, building, sport, dsa:criteria, official_name, operator:type, tourism
tagcols = ['office', 'amenity', 'community_centre', 'operator', 'building', 'sport', 'dsa:criteria', 'official_name', 'operator:type', 'tourism']
impcols = ['id', 'geometry', 'name', 'addr:street', 'addr:housenumber', 'addr:postcode', 'description:de', 'description', *tagcols]

In [6]:
pois_impcols = pois[impcols]

In [7]:
pois_impcols['geometry'] = pois_impcols['geometry'].apply(simplify_to_centroid_if_small)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
# step 1
# combine col description:de and description, use description:de 
# if available else description, if both are not available use empty string
pois_impcols['description'] = pois_impcols['description:de'].combine_first(pois_impcols['description'])
pois_impcols['description'] = pois_impcols['description']
pois_impcols.drop(columns=['description:de'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pois_impcols.drop(columns=['description:de'], inplace=True)


In [9]:
# step 2
# office, amenity, community_centre, operator, building, sport, dsa:criteria, official_name, operator:type, tourism 
# put values in a col called tags which is list of strings but only if the value is not null or empty string
# remove the columns from pois_impcols
pois_impcols['tags'] = pois_impcols[tagcols].apply(lambda x: [i for i in x if pd.notna(i) and i != ''], axis=1)
pois_impcols.drop(columns=tagcols, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pois_impcols.drop(columns=tagcols, inplace=True)


In [10]:
# drop columns where geometry is empty 
pois_impcols = pois_impcols[pois_impcols['geometry'].notna()]
# drop columns where name and description and tags is empty
mask = pois_impcols.tags.apply(lambda x: len(x) > 0) | pois_impcols.name.notna() | pois_impcols.description.notna()
pois_impcols = pois_impcols[mask]

In [11]:
# rename col id to osmid
pois_impcols.rename(columns={'id': 'osmid'}, inplace=True)

In [12]:
pois_impcols.head()

Unnamed: 0,osmid,geometry,name,addr:street,addr:housenumber,addr:postcode,description,tags
0,1017868272,POINT (9.97694 53.55304),Handwerkskammer Hamburg,Holstenwall,12.0,20355.0,,"[government, public]"
1,2737928311,POINT (9.95034 53.56575),Spielhaus Alsenpark,Alsenstraße,33.0,22769.0,Offene Freizeiteinrichtung für Kinder/ Offene ...,"[community_centre, youth_centre, Bezirksamt Al..."
2,6960079424,POINT (10.07123 53.61907),Bewegungsinsel Bramfelder See,,,,,"[public, gymnastics]"
3,11012804769,POINT (10.13497 53.62065),Elternschule Farmsen-Berne,Bekassinenau,126.0,22147.0,"Familienförderung, Erwachsenenbildung, Offener...","[community_centre, Bezirksamt Wandsbek, public]"
4,308234,POINT (9.98514 53.55695),Justizbehörde,,,,,"[government, public]"


In [13]:
# filter rows where geometry dont is point
notpois_impcols = pois_impcols[pois_impcols['geometry'].apply(lambda x: x.geom_type != 'Point')]
pois_impcols = pois_impcols[pois_impcols['geometry'].apply(lambda x: x.geom_type == 'Point')]

In [14]:
pois_impcols.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 348 entries, 0 to 357
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   osmid             348 non-null    int64   
 1   geometry          348 non-null    geometry
 2   name              232 non-null    object  
 3   addr:street       265 non-null    object  
 4   addr:housenumber  263 non-null    object  
 5   addr:postcode     232 non-null    object  
 6   description       56 non-null     object  
 7   tags              348 non-null    object  
dtypes: geometry(1), int64(1), object(6)
memory usage: 24.5+ KB


In [15]:
len(notpois_impcols)

10

In [16]:
rows = poidf2rows(pois_impcols)

In [17]:
rows

[{'osmid': 1017868272,
  'name': 0,
  'addr_street': 'Holstenwall',
  'addr_housenumber': '12',
  'addr_postcode': '20355',
  'description': None,
  'tags': ['government', 'public'],
  'longitude': 9.9769408,
  'latitude': 53.5530432},
 {'osmid': 2737928311,
  'name': 1,
  'addr_street': 'Alsenstraße',
  'addr_housenumber': '33',
  'addr_postcode': '22769',
  'description': 'Offene Freizeiteinrichtung für Kinder/ Offene Kinder- und Jugendarbeit',
  'tags': ['community_centre', 'youth_centre', 'Bezirksamt Altona', 'civic'],
  'longitude': 9.9503402,
  'latitude': 53.5657474},
 {'osmid': 6960079424,
  'name': 2,
  'addr_street': None,
  'addr_housenumber': None,
  'addr_postcode': None,
  'description': None,
  'tags': ['public', 'gymnastics'],
  'longitude': 10.0712285,
  'latitude': 53.6190671},
 {'osmid': 11012804769,
  'name': 3,
  'addr_street': 'Bekassinenau',
  'addr_housenumber': '126',
  'addr_postcode': '22147',
  'description': 'Familienförderung, Erwachsenenbildung, Offener T

In [18]:
# Execute the query
with driver.session() as session:
    result = session.run(poi_insert_query, rows=rows)
    print(f"Imported {result.single()['total']} POIs")

driver.close()

Imported 348 POIs
