In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os

from shapely.geometry import Point

In [6]:
#%% Read Data
# dbsn buildings
buildings = gpd.read_file("DBSN/EDIF_MERGED_6875.gpkg")
# omi poly
omi_poly = gpd.read_file("OMI_2016/OMI_MERGED_6875.gpkg")
# omi values
omi_values = pd.read_csv("OMI_2016/PROV_ABDAC_201601_VALORI.csv")


In [None]:
#%%
# create macro_use from edific_uso
# Merge categories of "edifc_uso"
def merge_category(cat):
    macro_cats = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '95', '93']
    for macro in macro_cats:
        if cat.startswith(macro):
            return macro
        else:
            pass



# spatial join buildings with omi poly --> each buildings must have the corresponding codcom and codzona
# Spatial joint between polygon with the largest area of intersection
def largest_intersection(gdf_left, gdf_right, mode):
    """
    Take two geodataframes, do a spatial join, and return the polygon 
    with the largest area of intersection
    """
    out_gdf = gpd.sjoin(gdf_left, gdf_right, how = "left", predicate = mode).dropna()
    out_gdf['intersection'] = [a.intersection(gdf_right[gdf_right.index == b].geometry.values[0]).area for a, b in zip(out_gdf.geometry.values, out_gdf.index_right)]
    out_gdf['index'] = out_gdf.index
    out_gdf = out_gdf.sort_values(by='intersection')
    out_gdf = out_gdf.drop_duplicates(subset = 'index', keep='last')
    out_gdf = out_gdf.sort_values(by='index')
    out_gdf = out_gdf.drop(columns=['index_right', 'intersection', 'index'])
    
    return out_gdf



# fill missing values of Compr_mean by type and nearest neighbours (k)
def fill_missing_values_by_type(gdf, col, col_type='VAL_Descr_Tipologia', k=3):
    # creo empty geodataframe
    result_gdf = gpd.GeoDataFrame()
    # result_gdf = result_gdf.set_crs(gdf.crs)
    # iterate on types
    types = gdf[col_type].unique()
    for t in types:
        print(f'Filling {t}')
        filled_gdf_type = gdf.loc[gdf[col_type] == t]
        # selezioniamo i record con valore nullo nella colonna "VAL_Compr_min"
        null_rows = filled_gdf_type.loc[filled_gdf_type[col].isnull()]
        # iteriamo sui record con valore nullo
        for idx, row in null_rows.iterrows():
            # selezioniamo i tre poligoni più vicini al punto di interesse
            point = Point(row["geometry"].centroid.x, row["geometry"].centroid.y)
            distances = filled_gdf_type.distance(point)
            nearest_poly_indices = distances.sort_values().index[:k]
            
            # calcoliamo la media dei valori "VAL_Compr_min" dei tre poligoni vicini
            nearest_vals = filled_gdf_type[~filled_gdf_type.index.isin([idx]) & filled_gdf_type.index.isin(nearest_poly_indices)][col]
            mean_val = nearest_vals.mean()
            
            # sostituiamo il valore nullo con la media dei valori dei poligoni vicini
            filled_gdf_type.loc[idx, col] = mean_val
            # check remaining NANs and fill them with mean value of the series.
        mean_by_col_type = filled_gdf_type[col].mean()
        filled_gdf_type[col] = filled_gdf_type[col].fillna(mean_by_col_type)
    
        # append to results
        # result_gdf = result_gdf.append(filled_gdf_type)
        result_gdf = pd.concat([result_gdf, filled_gdf_type])
    return result_gdf

In [None]:
buildings['edifc_uso_macro'] = buildings['edifc_uso'].apply(merge_category)
buildings = buildings.reset_index()

In [None]:
# spatial join one-to-one BUILD with omi_codzona
buildings_codzona = largest_intersection(buildings, omi_poly, 'intersects')

# drop "Name", "LINKZONA", "layer", "classid", "edifc_stat", "level_0"
buildings_codzona = buildings_codzona.drop(columns = ["Name", "LINKZONA", "layer", "classid", "edifc_stat"])

# SAVE GDF
# buildings_codzona.to_file("DBSN/buildings_codzona.gpkg", driver='GPKG')


In [None]:
# map edific_uso and Descr_tipologia
# corrispondenze valori edifc_uso, VAL_Cod_Tip
edifc_uso_map = {'01':'Abitazioni civili',
                 '02':'Uffici',
                  '03': 'Abitazioni di tipo economico',
                 # '03': 'Uffici',
                 '04': 'Uffici',
                 '05': 'Culto',
                 '06': 'Abitazioni di tipo economico',
                 '07': 'Negozi',
                 '08': 'Capannoni industriali',
                 '09': 'Capannoni tipici',
                 '10': 'Abitazioni di tipo economico', # o abitazioni civili
                 '11': 'Abitazioni di tipo economico',
                 '12': 'Abitazioni di tipo economico', # o abitazioni civili
                 '95':'Abitazioni civili',
                 '93':'Abitazioni civili'}

buildings_codzona['Descr_Tipologia'] = buildings_codzona['edifc_uso_macro'].map(edifc_uso_map)

In [None]:
# calculate "Compr_mean" col from "Compr_min" and "Compr_max"
omi_values['Compr_mean'] = (omi_values['Compr_max'] + omi_values['Compr_min']) / 2
# rename cols in omi_values and select cols to keep
new_cols = {"Comune_amm": "CODCOM", "Zona":"CODZONA"}
omi_values = omi_values.rename(columns=new_cols)
cols_to_keep = ['Regione','Prov','CODCOM','CODZONA','Descr_Tipologia', 'Cod_Tip', 'Compr_mean']
omi_values_keep = omi_values[cols_to_keep]

In [None]:
# MERGE [VAL_Compr_min, VAL_Compr_max] on [CODZONA, Cod_Tip, Descr_Tipologia]
    # Create unique ID for buildings
buildings_codzona['ID'] = range(1, len(buildings_codzona) + 1)

buildings_w_val = buildings_codzona.merge(omi_values_keep, on=['CODCOM','CODZONA','Descr_Tipologia'], how='left')

# sort values and drop_duplicates keeping last
buildings_w_val = buildings_w_val.sort_values(by=['ID', 'Compr_mean']).drop_duplicates(subset=['ID'], keep='last')


In [None]:
buildings_w_val_filled = fill_missing_values_by_type(buildings_w_val, 'Compr_mean', col_type='Descr_Tipologia', k=5)

In [None]:
# Save GDF
buildings_w_val.to_file("RESULTS/buildings_w_values.gpkg", driver='GPKG')
buildings_w_val_filled.to_file("RESULTS/buildings_w_values_filled.gpkg", driver='GPKG')