In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import psycopg2
from geoalchemy2 import Geometry, WKTElement
from sqlalchemy import *
from shapely.geometry import MultiPolygon
from zipfile import ZipFile
import requests 
import sys
from tqdm import tqdm

In [3]:
import yaml

with open('../../config/postgres.yaml') as f:
    engine_configs = yaml.load(f, Loader=yaml.FullLoader)
    
try:
    engine = create_engine('postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(**engine_configs))
except Exception as e:
    print("Uh oh, can't connect. Invalid dbname, user or password?")
    print(e)

In [4]:
def process_geometry_SQL_insert(gdf):
    gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement((MultiPolygon([x]) if x.geom_type == 'Polygon' else x).wkt, srid=4326))
    gdf = gdf.drop('geometry', 1)
    return gdf

In [7]:
CITY='bogota'

### Neighborhoods

In [22]:
sql = """INSERT INTO spatial_groups (city, core_geom, core_id, lower_ids, spatial_name, approx_geom)
SELECT a.city, a.core_geom, a.core_id, array_agg(a.core_id), 'core', ST_multi(a.core_geom)
FROM spatial_groups a
where a.city='{city}' and a.spatial_name = 'ego'
GROUP BY a.core_id, a.core_geom, a.city;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

## Land use

In [23]:
land_gdf = gpd.read_file('../../data/bogota/land_use/Lots_2014.gpkg')
land_gdf = land_gdf[(~(land_gdf['LotCodigo'].isnull()))]

land_gdf = land_gdf[['LotCodigo', 'geometry']]

#land_gdf = land_gdf.to_crs({'init': 'epsg:4326'}) 

land_gdf.head()

Unnamed: 0,LotCodigo,geometry
0,4597039009,"MULTIPOLYGON (((-74.20334 4.60793, -74.20339 4..."
1,4593071010,"MULTIPOLYGON (((-74.18929 4.62773, -74.18935 4..."
2,4597039035,"MULTIPOLYGON (((-74.20295 4.60799, -74.20300 4..."
3,4597039020,"MULTIPOLYGON (((-74.20312 4.60792, -74.20315 4..."
4,1401046024,"MULTIPOLYGON (((-74.09880 4.57604, -74.09883 4..."


In [24]:
land_use_df = pd.read_csv('../../data/bogota/land_use/uso.csv.zip', dtype={'UsoCLote': str, 'UsoTUso': str})
land_use_df = land_use_df.rename(columns={
    'UsoArea': 'sqftmain', 
    'UsoTUso': 'usecode',
    'UsoCLote': 'LotCodigo'
})
land_use_df.head()

Unnamed: 0,OBJECTID,LotCodigo,usecode,sqftmain
0,1,5402013032,4,40.5
1,2,5626003014,10,1171.8
2,3,2205012067,1,129.8
3,4,9203069030,1,75.0
4,5,2430034026,22,91.61


In [25]:
land_gdf = pd.merge(land_gdf, land_use_df, on='LotCodigo', how='left')
land_gdf.loc[land_gdf['usecode'].isnull(), 'usecode'] = '999' # In bogota land use is about buildings. Missing lote are correlated with parks and sport courts

In [26]:
land_gdf['landuse'] = 'none'

land_gdf.loc[land_gdf['usecode'].isin({'001', '002', '037', '038'}), 'landuse'] = 'residential'

# Be careful of the NOT in the query
land_gdf.loc[~land_gdf['usecode'].isin({'001', '002', '023', '029', '030', '031', '032', '036', '037', '038', '047', '048', '052', '065', '090'}), 'landuse'] = 'commercial'

land_gdf.loc[land_gdf['usecode'].isin({'023', '029', '030', '031', '032', '036',  '047', '052', '065', '999'}), 'landuse'] = 'recreational'
land_gdf.loc[land_gdf['usecode'].isin({'090', '048'}), 'landuse'] = 'vacant'

In [27]:
land_gdf = land_gdf.reset_index()
land_gdf.head()

Unnamed: 0,index,LotCodigo,geometry,OBJECTID,usecode,sqftmain,landuse
0,0,4597039009,"MULTIPOLYGON (((-74.20334 4.60793, -74.20339 4...",209953.0,1,145.0,residential
1,1,4593071010,"MULTIPOLYGON (((-74.18929 4.62773, -74.18935 4...",218802.0,1,196.09,residential
2,2,4597039035,"MULTIPOLYGON (((-74.20295 4.60799, -74.20300 4...",800131.0,1,796.0,residential
3,3,4597039020,"MULTIPOLYGON (((-74.20312 4.60792, -74.20315 4...",273787.0,1,204.0,residential
4,4,1401046024,"MULTIPOLYGON (((-74.09880 4.57604, -74.09883 4...",666530.0,1,3692.0,residential


In [28]:
unique_land_gdf = land_gdf.copy()
unique_land_gdf.loc[:, 'x'] = unique_land_gdf.geometry.centroid.x
unique_land_gdf.loc[:, 'y'] = unique_land_gdf.geometry.centroid.y
unique_land_gdf = unique_land_gdf.drop_duplicates(subset=['x', 'y'])

## Net area

In [29]:
land_gdf = process_geometry_SQL_insert(unique_land_gdf)
land_gdf.to_sql('temptable_u_{}'.format(CITY.lower()), engine, if_exists='replace', index=False, dtype={'geom': Geometry('MultiPolygon', srid=4326)})

In [30]:
sql = """
UPDATE temptable_u_{tempname} p SET geom=ST_Multi(ST_buffer(p.geom, 0.0)) 
WHERE (NOT ST_IsValid(p.geom));
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [31]:
sql = """
DELETE 
FROM temptable_u_{tempname} t
USING unused_areas u 
WHERE u.city = '{city}' AND ST_Intersects(u.geom, t.geom) AND (NOT ST_Touches(u.geom, t.geom)) 
AND (ST_Contains(u.geom, t.geom) OR ST_AREA(ST_Intersection(t.geom, u.geom))/ST_Area(t.geom) > 0.5);
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

In [32]:
sql = """
INSERT INTO spatial_groups_net_area (sp_id, city, spatial_name, used_area) 
SELECT sp_id, city, spatial_name, SUM(ST_Area((CASE WHEN ST_Within(t.geom, s.approx_geom) THEN t.geom ELSE ST_Intersection(s.approx_geom, t.geom) END)::geography))/1000000.
FROM temptable_u_{tempname} t
INNER JOIN spatial_groups s ON ST_Intersects(s.approx_geom, t.geom) AND NOT ST_Touches(s.approx_geom, t.geom)
WHERE s.city = '{city}' AND s.spatial_name='core'
GROUP BY sp_id, city, spatial_name;
""".format(city=CITY, tempname=CITY.lower())

result = engine.execute(text(sql))

We don't clean directly the blocks as land use is not clean

In [33]:
sql = """
REFRESH MATERIALIZED VIEW spatial_groups_unused_areas;
"""

result = engine.execute(text(sql))