In [1]:
import pandas as pd
import os
import json
import sqlalchemy
import logging

In [39]:
def describe_variable(variable:str):
    null_values = sum(osm_lines[variable].isnull())
    valid_values = num_rows - null_values
    print(f"""NULL values: {null_values}
    Valid values: {valid_values}""")
    print(osm_lines[variable].unique())
    print(osm_lines[variable].value_counts())

In [2]:
# Path variables
logger = logging.getLogger('Crossmap - INSEE to geojson')

target_folder = "/Users/duccioa/CLOUD/01_Cloud/01_Work/04_Projects/0031_CrossMap/05_Data/OSM/"

credential_file_path = '/Users/duccioa/CLOUD/01_Cloud/01_Work/04_Projects/0031_CrossMap/04_Admin/03_Credentials/crossmapDB_credentials.json'

In [3]:
# SQL connection
with open(credential_file_path) as data_file:
    credential_json = json.load(data_file)

server = 'localhost'
credentials = credential_json['crossmap_database_credentials'][server]
host = credentials['host']
port = credentials['port']
database = credentials['database']
user = credentials['user']
password = credentials['password']
driver = credentials['driver']
sql_connection = driver + "://" + user + ":" + password + "@" + host + ":" + port + "/" + database
print(f"Creating connection on {host} with database {database} for user {user}")
engine = sqlalchemy.create_engine(sql_connection)


Creating connection on localhost with database crossmap for user duccioa


# osm_plane_line exploration

Exploration and cleaning of the variables in the table osm_planet_line

In [None]:
osm_lines = pd.read_sql("SELECT * FROM paris.osm_line_idf_view", engine)
print("Done.")

In [None]:
num_rows = osm_lines.shape[0]
num_columns = osm_lines.shape[1]
print(osm_lines.shape)

In [8]:
print(osm_lines.columns)

Index(['osm_id', 'access', 'addr:housename', 'addr:housenumber',
       'addr:interpolation', 'admin_level', 'aerialway', 'aeroway', 'amenity',
       'area', 'barrier', 'bicycle', 'brand', 'bridge', 'boundary', 'building',
       'construction', 'covered', 'culvert', 'cutting', 'denomination',
       'disused', 'embankment', 'foot', 'generator:source', 'harbour',
       'highway', 'historic', 'horse', 'intermittent', 'junction', 'landuse',
       'layer', 'leisure', 'lock', 'man_made', 'military', 'motorcar', 'name',
       'natural', 'office', 'oneway', 'operator', 'place', 'population',
       'power', 'power_source', 'public_transport', 'railway', 'ref',
       'religion', 'route', 'service', 'shop', 'sport', 'surface', 'toll',
       'tourism', 'tower:type', 'tracktype', 'tunnel', 'water', 'waterway',
       'wetland', 'width', 'wood', 'z_order', 'way_area', 'tags', 'way'],
      dtype='object')


In [49]:
print(osm_lines.dtypes)

osm_id                 int64
access                object
addr:housename        object
addr:housenumber      object
addr:interpolation    object
admin_level           object
aerialway             object
aeroway               object
amenity               object
area                  object
barrier               object
bicycle               object
brand                 object
bridge                object
boundary              object
building              object
construction          object
covered               object
culvert               object
cutting               object
denomination          object
disused               object
embankment            object
foot                  object
generator:source      object
harbour               object
highway               object
historic              object
horse                 object
intermittent          object
                       ...  
office                object
oneway                object
operator              object
place         

In [6]:
osm_lines.head(100)

Unnamed: 0,osm_id,access,addr:housename,addr:housenumber,addr:interpolation,admin_level,aerialway,aeroway,amenity,area,...,tunnel,water,waterway,wetland,width,wood,z_order,way_area,tags,way
0,62280423,,,,,,,,,,...,,,,,,,15,,{},0102000020110F0000060000008FC2F5280C8AE6C09A99...
1,62280421,,,,,,,,,,...,,,,,,,15,,{},0102000020110F000005000000F6285C8FC28CE6C052B8...
2,62280430,,,,,,,,,,...,,,,,,,15,,{},0102000020110F00000300000085EB51B89E89E6C00000...
3,62280432,,,,,,,,,,...,,,,,,,15,,{},0102000020110F00000300000048E17A14D688E6C0AE47...
4,16773143,,,,,,,,,,...,,,,,,,15,,{},0102000020110F000002000000CDCCCCCC5C7DE6C052B8...
5,62280433,,,,,,,,,,...,,,,,,,15,,{},0102000020110F00000200000048E17A14A67CE6C0A470...
6,62280398,,,,,,,,,,...,,,,,,,15,,{},0102000020110F000005000000D7A3703D1A81E6C01F85...
7,62280434,,,,,,,,,,...,,,,,,,15,,{},0102000020110F00000200000052B81E85A379E6C0EC51...
8,62280429,,,,,,,,,,...,,,,,,,15,,{},0102000020110F00000200000085EB51B85E8BE6C014AE...
9,62280431,,,,,,,,,,...,,,,,,,15,,{},0102000020110F000003000000D7A3703D4A89E6C06666...


In [40]:
describe_variable("highway")

NULL values: 174988
    Valid values: 642686
['service' 'residential' None 'cycleway' 'trunk' 'tertiary' 'footway'
 'path' 'primary' 'primary_link' 'trunk_link' 'pedestrian' 'steps' 'track'
 'unclassified' 'motorway' 'road' 'tertiary_link' 'motorway_link'
 'secondary' 'bridleway' 'construction' 'elevator' 'raceway'
 'living_street' 'secondary_link' 'proposed' 'unsurfaced' 'no' 'corridor'
 'escalator' 'demolished' 'stepping_stones' 'disused' 'bus_guideway'
 'crossing' 'bus_stop' 'emergency_access_point' 'platform' 'access_ramp'
 'virtual' 'yes' 'abandoned' 'services' 'step' 'traffic_island' 'rest_area']
residential               175842
service                   117061
footway                   109252
track                      38575
unclassified               36226
path                       32376
tertiary                   30238
secondary                  23607
primary                    23405
steps                      15807
cycleway                   11214
trunk                      

In [41]:
describe_variable("railway")

NULL values: 786218
    Valid values: 31456
[None 'subway' 'miniature' 'rail' 'disused' 'gantry' 'dismantled'
 'abandoned' 'platform' 'proposed' 'preserved' 'light_rail' 'tram'
 'platform_edge' 'crossing' 'station' 'approved' 'construction' 'switch'
 'narrow_gauge' 'funicular' 'historic' 'razed' 'turntable' 'monorail' 'no'
 'traverser' 'train_station_entrance' 'buffer_stop']
rail                      23728
subway                     3148
disused                    1401
platform                    956
abandoned                   691
tram                        599
light_rail                  490
proposed                     84
construction                 57
narrow_gauge                 55
monorail                     45
preserved                    36
dismantled                   36
miniature                    35
gantry                       31
razed                        28
turntable                     8
historic                      6
funicular                     5
station       

In [42]:
describe_variable("public_transport")

NULL values: 817295
    Valid values: 379
[None 'platform' 'stop_position']
platform         371
stop_position      8
Name: public_transport, dtype: int64


In [43]:
describe_variable("tunnel")

NULL values: 804003
    Valid values: 13671
[None 'yes' 'building_passage' 'no' 'culvert' 'passage' 'covered']
yes                 10999
building_passage     1303
culvert               992
no                    361
covered                14
passage                 2
Name: tunnel, dtype: int64


In [44]:
describe_variable("water")

NULL values: 817673
    Valid values: 1
[None 'pond']
pond    1
Name: water, dtype: int64


In [45]:
describe_variable("waterway")

NULL values: 808827
    Valid values: 8847
[None 'stream' 'river' 'canal' 'ditch' 'drain' 'weir' 'dam'
 'floating_barrier' 'lock_gate' 'abandoned_canal' 'waterfall' 'lock'
 'barrier' '2.5 m' 'dr' 'sluice' 'fish_pass' 'quay' 'wadi' 'hitch']
stream              4520
river               2185
drain                772
canal                622
ditch                589
weir                  92
lock_gate             23
dam                   22
waterfall              8
floating_barrier       3
fish_pass              2
2.5 m                  1
sluice                 1
wadi                   1
dr                     1
lock                   1
quay                   1
barrier                1
abandoned_canal        1
hitch                  1
Name: waterway, dtype: int64


In [50]:
describe_variable("width")

NULL values: 814463
    Valid values: 3211
[None '0.3' '3' '2' '2 m' '4 m' '0.5' '0.2' '1.5' '2.5' '1' '45' '1.2' '8'
 '4m' '2.5 m' '5' '1.8' '4' '0.3 m' '1 m' '0.5 m' '3 m' '10 m' '0.2 m' '16'
 '5 m' '2.13' '1.5 m' '0.8' '15' '8.3' '2.25' '3.75' '0' '2 metres'
 '1 metre' '0.4 m' '3 metres' '6\'6"' '12' '21' '4.5' '23' '0.7 m' '11.4'
 '20cm' '1.2 m' '0.8 m' '1.6' '7' '11' '6' '7.4' '6.5' '15.4' '12.5' '12.6'
 '11.7' '6.4' '14.6' '6 m' '2,5 m' '1,5 m' '10' '3.7' '4m-ish' '2.4' '2.3'
 '13' 'Variable' '15 m' '4 ' '0.6 m' '0.3m' '1.8 m' '1.5m' '2m' '20'
 '60 cm' '3.5 m' '0.5m' '2.2' '1m' '1.0' '3.5' '30' '0.75m' '2.0 m' '3m'
 '1,2 m' '0,5 m' 'c 1m' '4.7' '4.8' '0.4' '2.6 m' '2.3 m' '3.0' '1.7' '40'
 '-2' '2.20' 'narrow' '-1' '0.35' '2.50' '1.3' '0.6' '9' '1.50' '0.7'
 '1.20' '14' '0.30' '43' '2.1' '0.9' '5.5' '1.4' '3.4' '70 cm' '2.8' '-4'
 '0.70' '0.50' '0,9' '80' '60' '136' '100' '44' '50' '7.40' '8.5']
1           585
2           505
3           293
4           176
1 m         160
1.5  

In [51]:
describe_variable("bicycle")

NULL values: 781319
    Valid values: 36355
[None 'designated' 'yes' 'no' 'dismount' 'permissive' 'discouraged'
 'private' 'destination' 'unsuitable' 'unknown' 'unofficial' 'allowed'
 'opposite' 'track' 'use_sidepath' 'dismounted' 'opposite_lane' 'official'
 'customers' 'limited']
yes              22343
no                6451
designated        2889
limited           1552
dismount          1366
permissive        1362
private            253
unknown             33
destination         31
discouraged         18
allowed             17
unsuitable          13
unofficial          11
use_sidepath         6
official             4
dismounted           2
opposite_lane        1
opposite             1
customers            1
track                1
Name: bicycle, dtype: int64


In [52]:
describe_variable("bridge")

NULL values: 798214
    Valid values: 19460
[None 'yes' 'aqueduct' 'viaduct' 'movable' 'boardwalk' 'no' '1' 'gangway'
 'covered' 'path']
yes          18320
viaduct       1040
no              38
aqueduct        29
movable         16
covered          5
1                4
boardwalk        4
gangway          3
path             1
Name: bridge, dtype: int64


# osm_planet_polygon

In [None]:
osm_poly = pd.read_sql("SELECT * FROM paris.osm_polygon_idf_view", engine)
print("Done.")

In [None]:
num_rows = osm_poly.shape[0]
num_columns = osm_poly.shape[1]
print(osm_poly.shape)

In [None]:
print(osm_poly.columns)

In [None]:
print(osm_poly.dtypes)

In [None]:
osm_poly.head(100)