# Initialize

In [None]:
!conda install -y -c conda-forge nomkl numpy scipy scikit-learn numexpr

In [None]:
!ls manual_aoi

In [104]:
import geopandas as gpd
import psycopg2
import folium
import folium.plugins
import fiona
import osmnx as ox
import networkx as nx
import operator
import json
import matplotlib.cm as cmx
import matplotlib.colors as colors
from decimal import Decimal
from os.path import expanduser
import os  
from os.path import expanduser
from IPython.display import clear_output


In [105]:
class Query(object):

    def bbox(bbox_json):
        return f"""
            SELECT ST_Transform(ST_SetSRid(ST_GeomFromGeoJSON('{bbox_json}'), 4326), 3857) AS geometry
        """
    
    def pois(): 
        return f"""
            SELECT geometry 
            FROM pois
        """ 
    
    # ST_Intersects is used
    def pois(bbox_query): 
        return f"""
            WITH bbox as ({bbox_query})
            SELECT pois.geometry 
            FROM pois, bbox
            WHERE ST_Intersects(pois.geometry, bbox.geometry);
        """ 
    
    def manual_aois(manual_aois_json):
        return f"""
            WITH feature_collection AS ( 
                SELECT '{manual_aois_json}'::json AS features
            ) 
            SELECT ST_Transform(ST_SetSRid(ST_GeomFromGeoJSON(feature_col->>'geometry'), 4326), 3857) AS geometry
            FROM (
                SELECT json_array_elements(features->'features') AS feature_col
                FROM feature_collection
            ) AS subquery;
        """
    
    def aois(bbox_query, eps, minpoints):
        return f"""
            WITH bbox as ({bbox_query}), 
            hulls AS ( 
                SELECT preclusters.hull as hull
                FROM bbox INNER JOIN preclusters ON ST_Intersects(preclusters.hull,bbox.geometry)
            ), 
            smaller_clusters AS(
                SELECT pois.geometry, ST_ClusterDBSCAN(pois.geometry, eps := {eps}, minpoints := {minpoints}) over () AS cid
                FROM pois, hulls
                WHERE ST_Within(geometry, hulls.hull)
            )
            SELECT * FROM smaller_clusters WHERE cid IS NOT NULL
            UNION ALL
            SELECT ST_ConcaveHull(ST_Union(geometry),0.99), cid FROM smaller_clusters WHERE cid IS NOT NULL GROUP BY cid
        """
        
    def without_water(aois_query):
        return f"""
            WITH aois AS ({aois_query})
            SELECT ST_Difference(aois.geometry, coalesce((
                SELECT ST_Union(way) AS geometry 
                FROM planet_osm_polygon
                WHERE (
                    water IS NOT NULL OR waterway IS NOT NULL
                    )
                    AND (
                    tunnel IS NULL OR tunnel = 'no'
                    )
                AND ST_Intersects(way, aois.geometry)
            ), 'GEOMETRYCOLLECTION EMPTY'::geometry)) AS geometry
            FROM aois
        """
    
    def sanitized_aois(without_water):
        return f"""
            WITH aois AS ({without_water}),
            sanitized_aois AS(
                SELECT ST_Simplify((ST_Dump(ST_Union(geometry))).geom, 5) AS geometry FROM aois
            )
            SELECT * FROM sanitized_aois WHERE ST_IsValid(geometry) AND NOT ST_IsEmpty(geometry)
        """
    

        
    

# In order of importance:
* hit_rate_manual (percentage of number of manual aoi polygon that has been taken into account)
* hit_rate_calculated (percentage of number of calculated aoi polygon that has been taken into account)
 * condition for polygon to be accounted:
   * at most 40% of the calculated polygon is not intersecting
   * at least 30 % of the manual polygon is intersecting
* intersections (percentage of intersection area)


In [109]:
class Optimizer(object):
    def __init__(self, PATH_OF_MANUAL_AOI, PATH_OF_BBOX):
        self.PATH_OF_MANUAL_AOI = PATH_OF_MANUAL_AOI
        self.PATH_OF_BBOX = PATH_OF_BBOX
        self.result = []
        bbox_geodataframe = gpd.read_file(PATH_OF_BBOX)
        bbox_json = json.dumps(bbox_geodataframe.loc[0]['geometry'].__geo_interface__)
        self.bbox_query = Query.bbox(bbox_json)
        pois_query = Query.pois(self.bbox_query)
        
        manual_aois_geodataframe = gpd.read_file(PATH_OF_MANUAL_AOI)
        manual_aois_json = json.dumps(manual_aois_geodataframe.__geo_interface__)
        manual_aois_query = Query.manual_aois(manual_aois_json)
        with psycopg2.connect("") as conn:
            self.manual_aois = gpd.read_postgis(manual_aois_query, conn, geom_col='geometry')
            self.pois = gpd.read_postgis(pois_query, conn, geom_col='geometry')
        
        # deriving the export path based on the input manual file name
        # input file path must end with the following format 
        # <something>/manual_aoi/{country code}_{state code}_{city}.geojson
        
        path_split = self.PATH_OF_MANUAL_AOI.split('_')
        
        self.country = path_split[-3][4:]
        self.state = path_split[-2]
        self.city = path_split[-1].split('.geojson')[0]
        
        # create <result> directory in home directory to store the results
        self.directory = expanduser('~') + f"/result/{self.country}/{self.state}"
        if not os.path.exists(self.directory):
            os.makedirs(self.directory)
        
        # map style 
        self.pois_color = lambda x: {'fillOpacity': '0.7', 'fillColor': 'green', 'color':''}
        self.manual_color = lambda x: {'fillOpacity': '0.5', 'fillColor': 'red', 'color':''}
        self.calculated_color = lambda x: {'fillOpacity': '0.5', 'fillColor': 'blue', 'color':''}
        self.default_view = list(manual_aois_geodataframe['geometry'].centroid.to_crs(epsg=4326).loc[0].coords)[0][::-1]
        
        self.manual_aois_area = 0
        for i, manual_polygon in self.manual_aois.iterrows():
            self.manual_aois_area = self.manual_aois_area + manual_polygon['geometry'].area
            
    def getCalculatedPolygon (self, eps, minpoints):
        aois_query = Query.aois(self.bbox_query, eps, minpoints)
        without_water_query = Query.without_water(aois_query)
        sanitized_aois_query = Query.sanitized_aois(without_water_query)
        with psycopg2.connect("") as conn:
            return gpd.read_postgis(sanitized_aois_query, conn, geom_col='geometry')
        
        
    
    def calculate(self, eps, minpoints):
        sanitized_aois = self.getCalculatedPolygon(eps, minpoints)
        if (sanitized_aois.size == 0):
            return {'eps': eps, 'minpoints': minpoints, 'hit_rate_manual': -1, 
            'hit_rate_calculated': -1, 'intersection': -1}
        
        intersection_area = 0.0
        calculated_area = 0.0
        num_hit = 0
        num_outlier = 0
        
        # index if outlier corresponds to the index of calculated polygon, initially it is assumed that all
        # the calculated polygon are outliers 
        isOutlier_list = sanitized_aois.size * [True]
        
        for _, manual_polygon in self.manual_aois.iterrows():
            
            # taken that all manual polygon are not hit initially
            hit = False
            for aois_index, calculated_polygon in sanitized_aois.iterrows():
                # condition for calculated polygon to be accounted:
                    #     at most 40% of the calculated polygon is not intersecting
                    #     at least 30 % of the manual polygon is intersecting
                if manual_polygon['geometry'].intersects(calculated_polygon['geometry']):
                    intersection_area = intersection_area + (manual_polygon['geometry'].intersection(calculated_polygon['geometry'])).area
                    
                    # prevents small calculated intersecting polygon to be consider as hit
                    if (((manual_polygon['geometry'].intersection(calculated_polygon['geometry'])).area / manual_polygon['geometry'].area) > 0.3):
                        # prevents large calculated intersecting polygon to be consider as hit
                        if (((manual_polygon['geometry'].intersection(calculated_polygon['geometry'])).area / calculated_polygon['geometry'].area) > 0.6):
                            hit = True
                            isOutlier_list[aois_index] = False
                    
            if hit:
                num_hit += 1

        for _, s in sanitized_aois.iterrows():
            calculated_area = calculated_area + (s['geometry'].area)

        num_outlier = 0
        for isOutlier in isOutlier_list:
            if isOutlier:
                num_outlier += 1
                
        # the percentage of number of manual aoi polygon that has been taken into account 
        hit_rate_manual = num_hit / self.manual_aois.size * 100

        # percentage of number of calculated aoi polygon that has been taken into account
        hit_rate_calculated = 100.0 - num_outlier / sanitized_aois.size * 100


        # percentage of intersection area
        intersection = (intersection_area/ (calculated_area + self.manual_aois_area - intersection_area)) * 100
        
        print(f"eps: {eps:{2}}, minpoints: {minpoints:{2}}, hit_rate_manual:{hit_rate_manual: {8}.3f}%, "
            + f"hit_rate_calculated:{hit_rate_calculated: {8}.3f}%, intersection:{Decimal(intersection):{8}.3f}%")
        
        return {'eps': eps, 'minpoints': minpoints, 'hit_rate_manual': round(hit_rate_manual, 3), 
                'hit_rate_calculated': round(hit_rate_calculated, 3), 'intersection': round(intersection, 3)}
            
    
    def startAnalyzing(self):
        for eps in range(10, 120):
            for minpoints in range (1,20):
                self.result.append(self.calculate(eps, minpoints))
        clear_output()
            
        # tentative way of selecting the most optimal parameter
        self.result.sort(key=lambda x: (x['hit_rate_manual'], x['hit_rate_calculated'], x['intersection']), reverse=True)


        with open(f"{self.directory}/{self.city}_data.json", 'w') as file:
            json.dump(self.result, file, sort_keys = True, indent = 4, ensure_ascii = False)
        
        m = self.toFolium(self.result[0]['eps'], self.result[0]['minpoints'])
        
        # save map as html because jupyter can have problem showing the output
        m.save(f"{self.directory}/{self.city}_map_eps({self.result[0]['eps']})_minpoints({self.result[0]['minpoints']}).html")
        
        # save map as geojson
        df = self.getCalculatedPolygon(self.result[0]['eps'], self.result[0]['minpoints'])
        df = df.to_crs(epsg="4326")
        with open(f"{self.directory}/{self.city}_eps({self.result[0]['eps']})_minpoints({self.result[0]['minpoints']}).geojson", 'w') as file:
            json.dump(df._to_geo(), file, sort_keys = True, indent = 4, ensure_ascii = False)
        
        print(f"Results is saved in '{self.directory}'")
        
   
    # for generating the map with a specific eps minpoints
    def toFolium(self, eps, minpoints):
        sanitized_aois = self.getCalculatedPolygon(eps, minpoints)
        m = folium.Map(location=self.default_view, zoom_start=15, tiles="cartodbpositron")
        folium.plugins.Fullscreen().add_to(m)
        folium.GeoJson(self.manual_aois, style_function=self.manual_color).add_to(m)
        folium.GeoJson(sanitized_aois, style_function=self.calculated_color).add_to(m)
        folium.GeoJson(self.pois, style_function=self.pois_color).add_to(m)

        return m
    
    def toJson(self, eps, minpoints):
        sanitized_aois = self.getCalculatedPolygon(eps, minpoints)
        return sanitized_aois.to_json()

# Optimizer(MANUAL_PATH, BBOX_PATH)

In [110]:
halle_optimizer = Optimizer("../../test-cases/manual_aoi/be_lsa_halle.geojson", "clip/Halle_AL8.GeoJson")
singapore_optimizer = Optimizer("../../test-cases/manual_aoi/sg_sg_singapore.geojson", "clip/Singapore_AL2.GeoJson")
witikon_optimizer = Optimizer("../../test-cases/manual_aoi/ch_zh_witikon.geojson", "clip/Witikon_AL10.GeoJson")
rischrotkreuz_optimizer = Optimizer("../../test-cases/manual_aoi/ch_zg_risch-rotkreuz.geojson", "clip/Risch-Rotkreuz_AL8.GeoJson")

In [111]:
halle_optimizer.startAnalyzing()
for i in range(10):
    print(halle_optimizer.result[i])

Results is saved in '/home/jovyan/result/be/lsa'
{'eps': 62, 'minpoints': 9, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 55.047}
{'eps': 63, 'minpoints': 9, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 55.047}
{'eps': 64, 'minpoints': 9, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 55.047}
{'eps': 61, 'minpoints': 9, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 55.005}
{'eps': 71, 'minpoints': 11, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 54.961}
{'eps': 71, 'minpoints': 12, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 54.961}
{'eps': 72, 'minpoints': 11, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 54.961}
{'eps': 72, 'minpoints': 12, 'hit_rate_manual': 66.667, 'hit_rate_calculated': 100.0, 'intersection': 54.961}
{'eps': 73, 'minpoints': 12, 'hit_rate_manual': 66.667, 'hit_rate_calculate

In [112]:
witikon_optimizer.startAnalyzing()
for i in range(10):
    print(witikon_optimizer.result[i])

Results is saved in '/home/jovyan/result/ch/zh'
{'eps': 65, 'minpoints': 4, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 68, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 69, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 70, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 71, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 72, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 73, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 74, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 100.0, 'intersection': 40.512}
{'eps': 75, 'minpoints': 6, 'hit_rate_manual': 33.333, 'hit_rate_calculated': 10

In [113]:
rischrotkreuz_optimizer.startAnalyzing()
for i in range(10):
    print(rischrotkreuz_optimizer.result[i])

Results is saved in '/home/jovyan/result/ch/zg'
{'eps': 66, 'minpoints': 3, 'hit_rate_manual': 20.0, 'hit_rate_calculated': 50.0, 'intersection': 25.538}
{'eps': 66, 'minpoints': 2, 'hit_rate_manual': 20.0, 'hit_rate_calculated': 40.0, 'intersection': 25.448}
{'eps': 66, 'minpoints': 1, 'hit_rate_manual': 20.0, 'hit_rate_calculated': 33.333, 'intersection': 25.518}
{'eps': 89, 'minpoints': 8, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection': 24.627}
{'eps': 90, 'minpoints': 8, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection': 24.627}
{'eps': 90, 'minpoints': 9, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection': 24.627}
{'eps': 91, 'minpoints': 8, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection': 24.627}
{'eps': 91, 'minpoints': 9, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection': 24.627}
{'eps': 92, 'minpoints': 8, 'hit_rate_manual': 10.0, 'hit_rate_calculated': 100.0, 'intersection'

In [117]:
singapore_optimizer.startAnalyzing()
for i in range(10):
    print(singapore_optimizer.result[i])

Results is saved in '/home/jovyan/result/sg/sg'
{'eps': 93, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 16.372, 'intersection': 30.513}
{'eps': 90, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 16.3, 'intersection': 30.818}
{'eps': 91, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 16.228, 'intersection': 30.817}
{'eps': 89, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 16.228, 'intersection': 30.813}
{'eps': 92, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 16.228, 'intersection': 30.792}
{'eps': 66, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 15.226, 'intersection': 30.254}
{'eps': 65, 'minpoints': 4, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 15.102, 'intersection': 30.22}
{'eps': 89, 'minpoints': 3, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 12.759, 'intersection': 30.307}
{'eps': 89, 'minpoints': 2, 'hit_rate_manual': 50.0, 'hit_rate_calculated': 12.252, 'inters