In [None]:
import os
import glob
import sys 
import json
import geojson
import re
from transliterate import translit, get_available_language_codes

In [None]:
# general fields declaration
RESULT_DIR = "/Users/denyskononenko/Documents/map/results"
MBTILES_DIR = "/Users/denyskononenko/Documents/map/results/mbtiles"
MERGED_FILENAME = "/Users/denyskononenko/Documents/map/results/merged_pol.geojson"
PROCESSED_JSON_FILENAME = "/Users/denyskononenko/Documents/map/results/lun-buildings.geojson"

REGIONS = ["Київ", "Київська область", "Львівська область", "Харківська область", "Дніпропетровська область", "Одеська область"]
REGIONS_TEST = ["Київ"]
HEIGHT_OF_FLOOR = 3
DEFAULT_VALUE_HEIGHT = 5
DEFAULT_MIN_HEIGHT = 0
UNUESED_FILEDS = ["roof:colour", "building", "roof:shape", "opening_hours", "fuel:lpg", "brand"]
#used fields in properties
USED_FILEDS = ["id", "addr:housenumber", "addr:street"]
REPORT_SUMMARY = """
Modification summary 

Init number of features: {}
Final number of features: {}

Buildings without height: {} 
Features with LineString type of geometry: {} 
Deleted Points: {}
Deleted MultiPoints: {}

"""
OUT_BASE = {
   "type": "FeatureCollection",
   "features": []
    }

filenames = []



In [None]:
# functions declaration
def update_progress(progress, maxl):
    """ Update custom progress bar."""
    bar_length = 20
    interval = maxl
    block = int(round(bar_length*progress))
    text = "\rProgress: [{0}] {1}% ".format( "#"*block + "-"*(bar_length-block), round(100*(progress + 1)/interval, 2))
    sys.stdout.write(text)
    sys.stdout.flush()


def start_pipeline_to_geojson():
    """Steps for generation of total geojson"""
    # get xml files from osm
    print("XML obtaining")
    for index, region in enumerate(REGIONS):
            # progeress bar
            update_progress(index, len(REGIONS))
            filename = region.replace(" ", "_")
            filenames.append(filename)
            os.system('python3 getosm.py "{0}" > {2}/{1}.xml'.format(region, filename, RESULT_DIR))
            
            
    # make geojson files from xml
    os.chdir(RESULT_DIR)
    print("\nGenerating of geojson:")
    for index, filename in enumerate(filenames):
        update_progress(index, len(filenames))
        os.system('node --max_old_space_size=4000 `which osmtogeojson` {0}.xml > {0}.geojson'.format(filename))
    
    # open base file
    with open('{}.geojson'.format(filenames[0])) as geojson1:
        poly_base_geojson = json.load(geojson1)

    # merdge features of the base file with other
    print("\n Merging")
    for index, file in enumerate(filenames[1:]):
        with open('{}.geojson'.format(file)) as geojson_temp:
            poly_temp_geojson = json.load(geojson_temp)
        # add to features of the next 
        poly_base_geojson['features'] += poly_temp_geojson['features']
        update_progress(index, len(filenames[1:]))

    with open(MERGED_FILENAME, 'w') as outfile:
            json.dump(poly_base_geojson, outfile, indent=3)
    
    return poly_base_geojson
    
def process_geojson(poly_base_geojson):
    """
    Add field height and if it is absent in Property of Feature.
    Save processed geojson in separate file. 
    """
    count_build_without_height = 0
    count_bug_linestring = 0
    count_deleted_points = 0
    count_deleted_multi_points = 0
    count_deleted_unused_fields = 0
    init_features_number = len(poly_base_geojson["features"])
    
    out_json = OUT_BASE.copy()
    try:
        for item in poly_base_geojson["features"]:
            temp_item = {}
            temp_item["type"] = item["type"]
            temp_item["id"] = item["id"]
            # init properties
            temp_item["properties"] = {}
            # init geometry
            temp_item["geometry"] = {}
            
            properties = item["properties"]
            geom_type = item["geometry"]["type"]
            if geom_type == "Point":
                count_deleted_points += 1
            else:
                # build properties
                #make dafault value of height if absent
                if "height" in properties:
                    temp_item["properties"]["height"] = float(re.sub(r"[;,]", ".", re.sub(r"[a-z'`]", "", item["properties"]["height"])))
                elif "height" not in properties and "building:levels" not in properties:
                    temp_item["properties"]["height"] = DEFAULT_VALUE_HEIGHT
                    count_build_without_height += 1
                elif "height" not in properties and len(re.sub(r"[^0-9^.]", "", item["properties"]["building:levels"])) > 0:
                    # remove all symbols except number from the building:levels field and calculate height field
                    temp_item["properties"]["height"] = HEIGHT_OF_FLOOR * float(re.sub(r"[;,]", ".", re.sub(r"[^0-9^.]", "", item["properties"]["building:levels"])))
                else:
                    temp_item["properties"]["height"] = DEFAULT_VALUE_HEIGHT
                    
                # add another fields if applicable
                for field in USED_FILEDS:
                    if field in item["properties"]:
                        temp_item["properties"][str(field)] = item["properties"][str(field)]
                        #print(temp_item)
                        count_deleted_unused_fields += 1
                        
                # add `min_height` field
                if "min_heihgt" in item["properties"]:
                    #add min height
                    min_height = float(re.sub(r"[a-z]", "", item["properties"]["min_height"]))
                    if min_height < 0:
                        min_height = 0
                    temp_item["properties"]["min_height"] = min_height
                else:
                    #add default value
                    temp_item["properties"]["min_height"] = DEFAULT_MIN_HEIGHT

                # build geometry
                # correct LineString geometry type to Polygon
                geom_coordinates = item["geometry"]["coordinates"]
                if geom_type == "LineString":
                    temp_item["geometry"]["coordinates"] = [geom_coordinates]
                    temp_item["geometry"]["type"] = "Polygon"
                    count_bug_linestring += 1
                else:
                    temp_item["geometry"]["coordinates"] = geom_coordinates
                    temp_item["geometry"]["type"] = geom_type
                    
                #add corrected feature to result
                out_json["features"] += [temp_item]
                        
    except TypeError as error:
        print(error)
        print(type(item))
        
    final_features_number = len(out_json["features"])
    #save corrected json in the file
    with open(PROCESSED_JSON_FILENAME, 'w') as outfile:
        json.dump(out_json, outfile, indent=3)
    # print report 
    print(REPORT_SUMMARY.format(init_features_number, final_features_number, count_build_without_height, count_bug_linestring, count_deleted_points, count_deleted_multi_points))
    
def test_height_number():
    id_without_height = []
    
    with open(PROCESSED_JSON_FILENAME) as geojson_temp:
        proc = json.load(geojson_temp)

    for feature in proc["features"]:
        if "height" not in feature["properties"]:
            id_without_height.append(feature["id"])
    
    print("\n Buildings without height id: {} \n Total num {} \n Without height num {}".format(id_without_height, len(proc["features"]), len(id_without_height)))
   

In [None]:
#base = start_pipeline_to_geojson()
#print("\n\nProcessing of geojson")
with open(MERGED_FILENAME) as geojson_temp:
            base = json.load(geojson_temp)
process_geojson(base)

In [None]:
# make mbtile
os.system('tippecanoe -o {}.mbtiles {}.geojson'.format("lnbuildings", PROCESSED_JSON_FILENAME))

In [None]:
test_height_number()

In [None]:
files = []
os.chdir(RESULT_DIR)
for file in glob.glob("*.geojson"):
    files.append(file)
    print(file)

In [None]:
# open base file
print(files[0])
with open(files[0]) as geojson1:
    poly_base_geojson = json.load(geojson1)

# merdge features of the base file with other
print("\n Merging")
for index, file in enumerate(files[1:]):
    with open(file) as geojson_temp:
        print(file)
        poly_temp_geojson = json.load(geojson_temp)
    # add to features of the next 
    poly_base_geojson['features'] += poly_temp_geojson['features']
    update_progress(index, len(files[1:]))
    
with open(MERGED_FILENAME, 'w') as outfile:
    json.dump(poly_base_geojson, outfile, indent=3)