In [None]:
import json
import os
import sys

import geopandas as gpd
import pandas as pd
import osmnx as ox
import numpy as np
import matplotlib.pyplot as plt
from pyproj import Geod

geod = Geod(ellps='WGS84')


In [None]:
cities = json.load(open('data_index/bldg/cities.json'))

In [None]:

LOAD = True

if LOAD:
    city_IoD = pd.read_csv('data_interface/city_IoD.csv', index_col=0)
else:
    def calculate_CPLX(polygon):
        area, perimeter = geod.geometry_area_perimeter(polygon)
        return abs(perimeter) / np.sqrt(np.sqrt(abs(area)))

    def calculate_AREA(polygon):
        area = geod.geometry_area_perimeter(polygon)[0]
        return abs(area)

    def calculate_IoD(series):
        
        return series.var() / series.mean()

    city_IoD = {}

    for key, cities_list in cities.items():
        for i, city in enumerate(cities_list):
            print(f"{key}({i+1}/{len(cities_list)}):{city}")
            folder = f"./data/bldg/{key}/"
            os.makedirs(folder, exist_ok=True)
            
            file = folder + f"buildings_{city}.geojson"
            if not os.path.exists(file):
                print(f"File {file} not found")
                continue
            
            gdf_building = gpd.read_file(file)

            gdf_building['CPLX'] = gdf_building['geometry'].apply(calculate_CPLX)
            gdf_building['AREA'] = gdf_building['geometry'].apply(calculate_AREA)

            city_IoD[city] = [calculate_IoD(gdf_building['CPLX']), calculate_IoD(gdf_building['AREA'])]

    city_IoD = pd.DataFrame(city_IoD).T
    city_IoD.columns = ['CPLX', 'AREA']

print(city_IoD.head())

In [None]:
X = city_IoD['AREA'].values
Y = city_IoD['CPLX'].values
X_ = X / X.max()
Y_ = Y / Y.max()

# K-means
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4, random_state=0).fit(np.array([X_, Y_]).T)
city_IoD['label'] = kmeans.labels_
city_IoD.loc['Beijing', 'label'] = 1
city_IoD.loc['Changsha', 'label'] = 1
# city_IoD.loc['Hong Kong', 'label'] = 1
city_IoD.loc['Sydney', 'label'] = 0
plt.rcParams['font.sans-serif'] = ['Times New Roman']
# plt.scatter(city_IoD['AREA'], city_IoD['CPLX'], c=city_IoD['label'], cmap='rainbow')
plt.scatter(city_IoD[city_IoD['label']==0]['AREA'], city_IoD[city_IoD['label']==0]['CPLX'], marker='s', label='Type 1', c='b')
plt.scatter(city_IoD[city_IoD['label']==1]['AREA'], city_IoD[city_IoD['label']==1]['CPLX'], marker='o', label='Type 2')
plt.scatter(city_IoD[city_IoD['label']==2]['AREA'], city_IoD[city_IoD['label']==2]['CPLX'], marker='^', label='Type 3', c= 'g')
plt.scatter(city_IoD[city_IoD['label']==3]['AREA'], city_IoD[city_IoD['label']==3]['CPLX'], marker='*', label='Type 4', c='r')
# plt.scatter(X[kmeans.labels_ == 0], Y[kmeans.labels_ == 0])
# plt.scatter(X[kmeans.labels_ == 1], Y[kmeans.labels_ == 1], marker='o')
# plt.scatter(X[kmeans.labels_ == 2], Y[kmeans.labels_ == 2], marker='^')
# plt.scatter(X[kmeans.labels_ == 3], Y[kmeans.labels_ == 3], marker='*')
for i, txt in enumerate(city_IoD.index):
    if city_IoD['AREA'][i] < 6000 and city_IoD['CPLX'][i] < 5:
        continue
    elif txt == 'Kiev':
        plt.annotate(txt, (city_IoD['AREA'][i]+500, city_IoD['CPLX'][i]-0.15))
    elif txt in ['Changsha', 'Hong Kong']:
        plt.annotate(txt, (city_IoD['AREA'][i]-2000, city_IoD['CPLX'][i]-0.5))
    elif txt=='Shanghai':
        plt.annotate(txt, (city_IoD['AREA'][i]-3500, city_IoD['CPLX'][i]+0.25))
    elif txt=='Tianjin':
        plt.annotate(txt, (city_IoD['AREA'][i]-2500, city_IoD['CPLX'][i]+0.25))
    elif txt=='Sydney':
        plt.annotate(txt, (city_IoD['AREA'][i]-1000, city_IoD['CPLX'][i]+0.25))
    else:
        plt.annotate(txt, (city_IoD['AREA'][i]-2000, city_IoD['CPLX'][i]+0.25))
plt.xlabel('IoD AREA', weight='bold')
plt.ylabel('IoD CPLX', weight='bold')
plt.xlim(0, 35000)
plt.ylim(0, 12)

plt.legend()
# plt.title('AREA vs CPLX', weight='bold')
plt.savefig('IoD_big.png', dpi=300)
plt.savefig('IoD big.pdf', dpi=300)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(city_IoD['AREA']['Sydney'], city_IoD['CPLX']['Sydney'], marker='s', label='Type 1', c='b')

plt.scatter(city_IoD['AREA'].drop('Sydney'), city_IoD['CPLX'].drop('Sydney'), marker='*', label='Type 4', c='r')
# for i, txt in enumerate(city_IoD.index):
#     if city_IoD['AREA'][i] < 10000 and city_IoD['CPLX'][i] < 5:
#         continue
#     elif txt == 'Kiev':
#         plt.annotate(txt, (city_IoD['AREA'][i]+500, city_IoD['CPLX'][i]-0.15))
#     elif txt in ['Changsha', 'Hong Kong']:
#         plt.annotate(txt, (city_IoD['AREA'][i]-2000, city_IoD['CPLX'][i]-0.5))
#     elif txt=='Shanghai':
#         plt.annotate(txt, (city_IoD['AREA'][i]-3500, city_IoD['CPLX'][i]+0.25))
#     elif txt=='Tianjin':
#         plt.annotate(txt, (city_IoD['AREA'][i]-2500, city_IoD['CPLX'][i]+0.25))
#     else:
#         plt.annotate(txt, (city_IoD['AREA'][i]-2000, city_IoD['CPLX'][i]+0.25))
for i, txt in enumerate(city_IoD.index):
    if txt == 'Sydney':
        continue
    if txt in ['San Francisco', 'Istanbul', 'Bangkok']:
        plt.annotate(txt, (city_IoD['AREA'][i]-300, city_IoD['CPLX'][i]-0.2))
    elif txt == 'Amsterdam':
        plt.annotate(txt, (city_IoD['AREA'][i]-1000, city_IoD['CPLX'][i]-0.2))
    elif txt == 'Buenos Aires':
        plt.annotate(txt, (city_IoD['AREA'][i]-400, city_IoD['CPLX'][i]-0.2))
    elif txt == 'Seoul':
        plt.annotate(txt, (city_IoD['AREA'][i]+100, city_IoD['CPLX'][i])) 
    elif txt == 'Warsaw':
        plt.annotate(txt, (city_IoD['AREA'][i]-200, city_IoD['CPLX'][i]+0.08))  
    else:
        plt.annotate(txt, (city_IoD['AREA'][i]-350, city_IoD['CPLX'][i]+0.08))
        
# plt.scatter(city_IoD['AREA']['Sydney'], city_IoD['CPLX']['Sydney'], marker='s', label='Type 1', c='b')

plt.annotate('Sydney', (city_IoD['AREA']['Sydney']-500, city_IoD['CPLX']['Sydney']+0.25))
plt.xlabel('IoD AREA')
plt.ylabel('IoD CPLX')
plt.xlim(0, 10000)
plt.ylim(0, 5)
# plt.title('AREA vs CPLX')
plt.legend()

plt.savefig('IoD_small.png', dpi=300)
plt.savefig('IoD small.pdf', dpi=300)

In [None]:
gdfs = []
for key, cities_list in cities.items():
    for i, city in enumerate(cities_list):
        print(f"{key}({i+1}/{len(cities_list)}):{city}")
        folder = f"./data/bldg/{key}/"
        os.makedirs(folder, exist_ok=True)
        file = folder + f"buildings_{city}.geojson"
        if not os.path.exists(file):
            print(f"File {file} not found")
            continue
        
        gdf_building = gpd.read_file(file)

        gdf_building['CPLX'] = gdf_building['geometry'].apply(calculate_CPLX)
        gdf_building['city'] = city
        gdfs.append(gdf_building[['CPLX', 'city']])

gdf = pd.concat(gdfs)
import joypy

fig, axes = joypy.joyplot(gdf, by="city", column="CPLX", figsize=(10, 10), legend=False, x_range=[0, 50], overlap=0.5, linewidth=1)

In [None]:
cov = {}
for key, cities_list in cities.items():
    for city in cities_list:
        folder = f"./data/bldg/{key}/"
        os.makedirs(folder, exist_ok=True)
        buildings_meta_file = folder + f"agg_cell_buildings_area_{city}.npy"
        if not os.path.exists(buildings_meta_file):
            print(f"File {buildings_meta_file} not found")
            continue
        buildings_meta = np.load(buildings_meta_file, allow_pickle=True)
        test = buildings_meta.flatten()
        test = test[test >1]
        print(test.shape)
        ratio = test/10000
        coverage_bins = list(range(0, 51, 2))
        coverage = np.histogram(ratio, bins=coverage_bins)[0]
        coverage = coverage / np.sum(coverage)
        cov[city] = coverage

cov = pd.DataFrame(cov).T

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
sns.heatmap(cov, fmt='.2%', cmap='inferno', cbar_kws={'ticks': [0, 5, 10, 15]}, linewidths=0.5, linecolor='white')
plt.title('Coverage of building footprints')
plt.xlabel('Coverage (%)')
plt.xticks(np.arange(0, 26, 5), labels=[f'{i}' for i in range(0, 51, 10)])
plt.ylabel('City')
plt.tight_layout()
plt.savefig('coverage.png', dpi=300)