## Compare OSM Data with Carta Áreas Edificadas from DGT

In [1]:
osm_ref_tags = {
    "TABLE"     : '/home/jasp/mystuff/dgt/osm_features_2021_v2.xlsx',
    "SHEET"     : 'osm_features',
    "LULC_COL"  : 'L4',
    "KEY_COL"   : "key",
    "VALUE_COL" : "value",
    "GEOM_COL"  : "geom"
}

osmdata = {
    "FILE"  : '/home/jasp/mystuff/dgt/osm_parts/osmpt_185.xml',
    "DB"    : 'dgt_cell185',
    "TABLE" : "multipolygons",
    "DBSET" : "local"
}

ref_edificado = [
    '1141', '1151', '1211', '1221',
    '1222', '1223', '1231', '1241',
    '1251', '1252', '1254', '1255',
    '1256', '1257', '1253', '1612',
    '1631', '1632', '1633', '1651',
    '16', '143', '1431', '1432', '165',
    '11', '13', '142', '1421', '161'
]


lulccls = 'lulc_cls'

caeshp = '/home/jasp/mystuff/dgt/cae_parts/cae_185.shp'

refshp = '/home/jasp/mystuff/dgt/ref_parts/ref_parts_185.shp'

epsg = 3763

# OSM LULC result
lulcshp = '/home/jasp/mystuff/dgt/res_parts/res_185.shp'

# OSM/CAE intersection result
osmvscae = '/home/jasp/mystuff/dgt/res_parts/osmvscae_185.shp'

In [2]:
import os
import pandas as pd
import numpy as np

from glass.g.it.db import osm_to_psql
from glass.g.it.shp import dbtbl_to_shp
from glass.g.wenv.grs import run_grass
from glass.g.dp.torst import shp_to_rst
from glass.g.rd.shp import shp_to_obj
from glass.g.wt.shp import df_to_shp

from glass.ng.prop.sql import cols_name
from glass.ng.rd import tbl_to_obj
from glass.ng.sql.q import exec_write_q
from glass.ng.sql.db import create_db
from glass.pys.oss import mkdir, fprop

In [3]:
# Prepare workspace
ws = mkdir(os.path.join(
    os.path.dirname(lulcshp), f'grs{fprop(lulcshp, "fn")}'
), overwrite=True)

In [4]:
# Import data into a database
create_db(osmdata["DB"], api='psql', overwrite=True, dbset=osmdata["DBSET"])

osm_to_psql(osmdata["FILE"], osmdata["DB"], dbsetup='local')

'dgt_cell185'

In [5]:
osm_tags = tbl_to_obj(osm_ref_tags["TABLE"], sheet=osm_ref_tags["SHEET"])

osm_tags = osm_tags[osm_tags[osm_ref_tags["GEOM_COL"]] == 'Polygon']

In [6]:
osm_tags['sevtags'] = osm_tags[osm_ref_tags["LULC_COL"]].str.contains(';')

osm_tags = osm_tags[osm_tags.sevtags != True]

In [7]:
# Create key/value column
osm_tags.loc[:, osm_ref_tags["VALUE_COL"]] = osmdata["TABLE"] + "." + \
    osm_tags[osm_ref_tags["KEY_COL"]] + \
    "='" + osm_tags[osm_ref_tags["VALUE_COL"]] + "'"

In [8]:
# Add new column to multipolygons table
# Update it adding an LULC class

cols = cols_name(osmdata["DB"], osmdata['TABLE'], dbset=osmdata["DBSET"])

qs = [] if "lulc_cls" in cols else [(
    f"ALTER TABLE {osmdata['TABLE']} ADD COLUMN "
    "lulc_cls integer"
)]

for cls in osm_tags[osm_ref_tags["LULC_COL"]].unique():
    # Se uma feature estiver associada a duas TAGS que dizem respeito a classes
    # diferentes, a classe da feature será a última classe considerada
    # Abordagem multitag vai resolver este problema.
    __osmtags = osm_tags[osm_tags[osm_ref_tags["LULC_COL"]] == cls]
    
    qs.append((
        f"UPDATE {osmdata['TABLE']} SET lulc_cls={str(cls)} "
        f"WHERE {str(__osmtags[osm_ref_tags['VALUE_COL']].str.cat(sep=' OR '))}"
    ))

In [9]:
cols = cols_name(osmdata["DB"], osmdata['TABLE'], dbset=osmdata["DBSET"])
print(cols)

['ogc_fid', 'osm_id', 'osm_way_id', 'name', 'type', 'aeroway', 'amenity', 'admin_level', 'barrier', 'boundary', 'building', 'craft', 'geological', 'historic', 'land_area', 'landuse', 'leisure', 'man_made', 'military', '"natural"', 'office', 'place', 'shop', 'sport', 'tourism', 'waterway', 'power', 'railway', 'healthcare', 'highway', 'water', 'landcover', 'industrial', 'other_tags', 'wkb_geometry']


In [10]:
# RUN queries
exec_write_q(osmdata["DB"], qs, api='psql', dbset=osmdata["DBSET"])

In [11]:
# Export shapefile with data
whr = " OR ".join([f"lulc_cls={c}" for c in ref_edificado])

q = (
    f'SELECT ogc_fid, osm_id, name, lulc_cls, '
    'building, amenity, landuse, '
    f'ST_Transform(wkb_geometry, {str(epsg)}) AS geom '
    f'FROM {osmdata["TABLE"]} '
    f'WHERE {whr}'
)

osmlulc = dbtbl_to_shp(
    osmdata["DB"], q, 'geom', os.path.join(ws, 'osmlulc.shp'),
    tableIsQuery=True, api='pgsql2shp', epsg=epsg,
    dbset=osmdata["DBSET"]
)

**Dissolve all lulc classes polygons and intersect OSM with CAE**

In [12]:
# Start GRASS GIS Session

bname = fprop(refshp, 'fn')

refrst = shp_to_rst(
    refshp, None, 10, 0, os.path.join(
        ws, f'rst{bname}.tif'
    )
)

loc = 'locwork'
gb = run_grass(ws, location=loc, srs=refrst)

import grass.script.setup as gsetup

gsetup.init(gb, ws, loc, 'PERMANENT')

'/tmp/tmp18c2t8tm'

In [13]:
# GRASS GIS Modules
from glass.g.it.shp import shp_to_grs, grs_to_shp
from glass.g.tbl.col import add_fields, cols_calc
from glass.g.tbl.grs import add_table
from glass.g.gp.gen import dissolve
from glass.g.gp.ovl import grsunion

In [14]:
# Import data
osmlulcgrs = shp_to_grs(osmlulc, fprop(osmlulc, 'fn'))
caegrs = shp_to_grs(caeshp, fprop(caeshp, 'fn'))

In [15]:
# Dissolve
lulcdiss = dissolve(osmlulcgrs, 'osmlulcdiss', 'lulc_cls', api='grass')

add_table(lulcdiss, None, lyrN=1, asCMD=True, keyp='lulc_cls')

disscat = grs_to_shp(lulcdiss, os.path.join(
    ws, 'osmlulcdiss.shp'
), 'area')

lulcdiss = shp_to_grs(disscat, fprop(disscat, 'fn'))

In [16]:
# Union OSM CAE
osmcae = grsunion(lulcdiss, caegrs, 'lulcunion')

In [17]:
# Export
osm_and_cae = grs_to_shp(osmcae, osmvscae, 'area')

**Classify OSM polygons:**

In [18]:
gdf = shp_to_obj(osm_and_cae)

osmdf = shp_to_obj(disscat)

In [19]:
gdf = gdf[~gdf.a_cat.isna()]

gdf['a_cat'] = gdf.a_cat.astype(int)
gdf['b_cat'] = gdf.b_cat.fillna(value=0)
gdf['b_cat'] = gdf.b_cat.astype(int)

In [20]:
# Count how many times we have the same a_cat

catcount = pd.DataFrame({
    'countcat' : gdf.groupby(['a_cat'])['a_cat'].agg('count')
}).reset_index()

catcount.rename(columns={'a_cat': 'acaty'}, inplace=True)

In [21]:
# Join
gdf = gdf.merge(catcount, how='inner', left_on='a_cat', right_on='acaty')

In [22]:
# Get classes
gdf['fcat'] = -1

gdf['fcat'] = np.where(
    (gdf.countcat == 1) & (gdf.b_cat == 0),
    0, gdf.fcat
)

gdf['fcat'] = np.where(
    (gdf.countcat == 1) & (gdf.b_cat > 0),
    1, gdf.fcat
)

gdf['fcat'] = np.where(
    gdf.countcat > 1, 2, gdf.fcat
)

In [23]:
fdf = pd.DataFrame({
    'existcae' : gdf.groupby(["a_cat"])['fcat'].agg('min')
}).reset_index()

In [24]:
osmdf['cat'] = osmdf.index + 1

osmdf = osmdf.merge(fdf, how='left', left_on='cat', right_on='a_cat')

In [25]:
osmdf.rename(columns={
    'cat' : 'id_obj', 'lulc_cls' : 'classuos'
}, inplace=True)

osmdf['fonte']    = 'osm'
osmdf['classuos'] = osmdf.classuos.astype(str)
osmdf['existcae'] = osmdf.existcae.fillna(value=-1)
osmdf['existcae'] = osmdf.existcae.astype(int)
osmdf['areaha']   = osmdf.geometry.area / 10000

osmdf.drop(['a_cat'], axis=1, inplace=True)

In [26]:
df_to_shp(osmdf, lulcshp)

'/home/jasp/mystuff/dgt/res_parts/res_185.shp'