# This notebook summarize additional features we can extract from images
1. Exposure = sum(any indicator of exposure: people, house, building, sidewalk)/total_num_image
2. Motorcycle = count()
3. Sidewalk Coverage = sum(indicator of sidewalk)/total_num_image
4. Crosswalk Coverage (later)
5. Bikelane Coverage (later)

In [1]:
import pandas as pd
import numpy as np
import os
from glob import glob
from sklearn.preprocessing import StandardScaler
import h3
from tqdm import tqdm


ROOT = "/lustre1/g/geog_pyloo/05_timemachine"
CURATED_FOLDER = f"{ROOT}/_curated"
EXPORT_FOLDER = f"{ROOT}/_curated/c_analysis"

IMG_SAMPLE_FOLDER = "/lustre1/g/geog_pyloo/05_timemachine/_curated/c_analysis/img_sample"

PANO_PATH = "{ROOT}/GSV/gsv_rgb/{cityabbr}/gsvmeta/gsv_pano.csv"
CURATED_TARGET = "/lustre1/g/geog_pyloo/05_timemachine/_curated/c_seg_hex"
META_PATH = "{ROOT}/GSV/gsv_rgb/{cityabbr}/gsvmeta/{cityabbr}_meta.csv"
OBJECT_DET = os.path.join(CURATED_FOLDER, "c_object_longitudinal")

N = 8
H3_RES = [9]
res = H3_RES[0]

In [None]:

def get_std(df_seg_update, variables_remain):
    scaler = StandardScaler().fit(df_seg_update[variables_remain])
    data = scaler.transform(df_seg_update[variables_remain])
    return data

def load_class():
    serviceaccount = "/home/yuanzf/uvi-time-machine/google_drive_personal.json"
    import gspread

    # from oauth2client.service_account import ServiceAccountCredentials
    gc = gspread.service_account(filename=serviceaccount)

    def read_url(url, SHEET_NAME):
        SHEET_ID = url.split("/")[5]
        spreadsheet = gc.open_by_key(SHEET_ID)
        worksheet = spreadsheet.worksheet(SHEET_NAME)
        rows = worksheet.get_all_records()
        df_spread = pd.DataFrame(rows)
        return df_spread, worksheet

    url = "https://docs.google.com/spreadsheets/d/1o5gFmZPUoDwrrbfE6M26uJF3HnEZll02ivnOxP6K6Xw/edit?usp=sharing"
    SHEETNAME = "object150"
    obj_meta, other_worksheet = read_url(url, SHEETNAME)
    return obj_meta

def get_result(cityabbr, curated_folder, f_suffixes = "*panoptic.csv"):
    outfolder = f"{curated_folder}/{cityabbr}"
    seg_file = glob(os.path.join(outfolder, f_suffixes))
    panoptic_df = []
    for p in seg_file:
        temp = pd.read_csv(p)
        panoptic_df.append(temp)
    panoptic_df = pd.concat(panoptic_df).reset_index(drop = True)
    return panoptic_df

def clean_seg(seg_df, pano_df, meta_df):

    seg_df_filtered = seg_df.merge(meta_df, on = 'img')
    seg_df_filtered = seg_df_filtered[seg_df_filtered['size']>=10000].reset_index(drop = True)
    print("Segmentation shape after filter: ", seg_df_filtered.shape[0])
    # count number of unique labels per image and drop it if image has fewer than 3
    seg_df_filtered['label_c'] = seg_df_filtered.groupby('img')['labels'].transform('nunique')
    seg_df_filtered = seg_df_filtered[seg_df_filtered['label_c']>=3].reset_index(drop = True)
    print("Segmentation shape after filter images with problems: ", seg_df_filtered.shape[0])
    
    seg_df_summary = seg_df_filtered.groupby(["img", "labels"]).agg({'areas':'sum'}).reset_index()
    seg_df_summary['panoid'] = seg_df_summary['img'].apply(lambda x: x[:22])

    col_cols = ["labels"]
    index_cols = ["img", "year", "h3_9"]
    seg_df_summary_pano = seg_df_summary.merge(pano_df, on = ['panoid'])
    
    
    if seg_df_summary_pano.shape[0]<seg_df_summary.shape[0]:
        print("data missing after data join.")
        print("Before join: ", seg_df_summary.shape[0])
        print("After join: ",seg_df_summary_pano.shape[0])
    else:
        print("data consistent")
    
    seg_df_summary = seg_df_summary_pano.drop_duplicates(index_cols+col_cols)
    print("Segmentation shape: ", seg_df_summary.shape[0])
    seg_df_pivot = seg_df_summary.pivot(
        columns = col_cols,
        index = index_cols,
        values = "areas"
    ).reset_index().fillna(0)
    return seg_df_pivot
def load_raw(city):
    cityabbr = city.lower().replace(" ", "")
    curate_folder = CURATED_FOLDER.format(ROOTFOLDER = ROOT)

    seg_df = get_result(cityabbr, CURATED_FOLDER, f_suffixes = "*seg.csv")
    
    pano_df = pd.read_csv(PANO_PATH.format(
    ROOT = ROOT,
    cityabbr = cityabbr
    ))[['panoid', 'lat', 'lon', 'year', 'month']]

    for res in [9]:
        pano_df[f'h3_{res}'] = pano_df.apply(lambda x: h3.geo_to_h3(x.lat, x.lon, res), axis=1)
        
    meta_df = pd.read_csv(META_PATH.format(
        ROOT = ROOT,
        cityabbr = cityabbr
    ))
    meta_df['img']= meta_df['path'].apply(lambda x: x.split("/")[-1].split(".")[0])
    # here make sure 
    meta_df = meta_df[['img','size','path']]
    return seg_df, meta_df, pano_df

def get_seg_data(city, hex_detail_cluster):
    cityabbr = city.lower().replace(" ", "")
    seg_df, meta_df, pano_df = load_raw(city)
    seg_df_pivot = clean_seg(seg_df, pano_df, meta_df)
    print(f"city {cityabbr} saved")
    print("*"*50)

    # hex_detail_cluster_city_sel = hex_detail_cluster[hex_detail_cluster['city_lower']==cityabbr].copy()
    # seg_df_pivot_sel = seg_df_pivot.merge(hex_detail_cluster_city_sel[['hex_id', 'cluster_8']], 
    #                                       left_on = 'h3_9', right_on = 'hex_id')\
    #     .drop('hex_id', axis = 1)
    seg_df_pivot_sel = seg_df_pivot.copy()
        
    new_cols = []
    for x in seg_df_pivot_sel.columns:
        if str(x) in obj_meta["id"].values:
            new_cols.append(ADE_CATEGORIES_DICT[str(x)])
        else:
            new_cols.append(str(x))
    seg_df_pivot_sel.columns = new_cols

    # drop the columns if all value are 0
    variables = set([v for v in seg_df_pivot_sel.columns if v in obj_meta["category"].unique()])
    print("Variables original: ", len(variables))
    to_drop = ["other"]
    variables_remain = [v for v in variables if not v in to_drop]
    print("Variables kept: ", len(variables_remain))
    seg_df_pivot_sel_stack = seg_df_pivot_sel.set_index(["img", "year", f"h3_{res}","cluster_8"]).stack().reset_index().fillna(0)\
        .groupby(["img", "year", f"h3_{res}","cluster_8","level_4"]).sum().reset_index()\
            .pivot_table(index = ["img", "year", f"h3_{res}","cluster_8"], columns = "level_4", values = 0).reset_index().fillna(0)\
                .drop(columns = ['other'], axis = 1)
    # standardize the data
    return seg_df_pivot_sel_stack, meta_df

def extract_seg(seg_df_pivot_sel):
    new_cols = []
    for x in seg_df_pivot_sel.columns:
        if str(x) in obj_meta["id"].values:
            new_cols.append(ADE_CATEGORIES_DICT[str(x)])
        else:
            new_cols.append(str(x))
    seg_df_pivot_sel.columns = new_cols

    # drop the columns if all value are 0
    variables = set([v for v in seg_df_pivot_sel.columns if v in obj_meta["category"].unique()])
    print("Variables original: ", len(variables))
    to_drop = ["other"]
    variables_remain = [v for v in variables if not v in to_drop]
    print("Variables kept: ", len(variables_remain))
    seg_df_pivot_sel_stack = seg_df_pivot_sel.set_index(["img", "year", f"h3_{res}"]).stack().reset_index().fillna(0)\
        .groupby(["img", "year", f"h3_{res}","level_3"]).sum().reset_index()\
            .pivot_table(index = ["img", "year", "h3_9"], columns = "level_3", values = 0).reset_index().fillna(0)\
                .drop(columns = ['other'], axis = 1)
    seg_df_pivot_sel_stack['sidewalk_indicator'] = seg_df_pivot_sel_stack['sidewalk'] > 0
    seg_df_pivot_sel_stack['exposure_indicator'] = seg_df_pivot_sel_stack[['building','house','table+chair','wall', 'window','person','bike','motorcycle','car']].sum(axis = 1) > 0
    seg_df_pivot_sel_stack['panoid'] = seg_df_pivot_sel_stack['img'].apply(lambda x: x[:22])
    seg_df_summary = seg_df_pivot_sel_stack\
        .groupby([f'h3_{res}'])\
            .agg({'sidewalk_indicator':'sum',
                'exposure_indicator':'sum',
                'img':'count',
                'panoid':'nunique'})\
                .reset_index()
    seg_df_summary['sidewalk_presence'] = seg_df_summary['sidewalk_indicator'] / seg_df_summary['img']
    seg_df_summary['exposure_presence'] = seg_df_summary['exposure_indicator'] / seg_df_summary['img']
    return seg_df_summary

def get_object_result(cityabbr, seg_df_summary):
    object_detect = pd.read_csv(f"{OBJECT_DET}/c_object_res={res}_{cityabbr}.csv")
    # calculate the total number of panoid per hex_id across all years
    object_detect_sum = object_detect.drop_duplicates(['year', 'hex_id','panoid_n']).groupby(['hex_id'])\
        .agg({'panoid_n':'sum'}).reset_index()
    object_detect_group = object_detect.groupby(['hex_id','object_name']).agg({
        'n':'sum'}).reset_index()\
            .merge(object_detect_sum, on = 'hex_id', how = 'left')
    object_detect_group['n_per_pano'] = object_detect_group['n'] / object_detect_group['panoid_n']
    object_detect_group_wide = object_detect_group[object_detect_group['object_name']!='0'].pivot(
        columns = 'object_name',
        index = 'hex_id',
        values = 'n_per_pano'
    ).fillna(0)
    object_detect_group_wide.columns = ['obj_'+str(x) for x in object_detect_group_wide.columns]
    seg_df_summary_update = seg_df_summary.merge(object_detect_group_wide, 
                                                left_on = 'h3_9', 
                                                right_index = True, 
                                                how = 'left')
    seg_df_summary_update.to_csv(f"{EXPORT_FOLDER}/{cityabbr}_seg_summary.csv", index = False)
    print(f"city {cityabbr} saved")
    return seg_df_summary_update

In [55]:
obj_meta = load_class()
obj_meta["id"] = obj_meta["id"].astype(str)
ADE_CATEGORIES_DICT = dict(zip(obj_meta["id"].values, obj_meta["category"].values))

In [None]:
city = "New York"
cityabbr = city.lower().replace(" ", "")
seg_df, meta_df, pano_df = load_raw(city)
seg_df_pivot = clean_seg(seg_df, pano_df, meta_df)
seg_summary = extract_seg(seg_df_pivot)
seg_summary_update = get_object_result(cityabbr, seg_summary)

Segmentation shape after filter:  7943563
Segmentation shape after filter images with problems:  7942738
data consistent
Segmentation shape:  7942738


In [58]:
# loop through all cities
city_ls = pd.read_csv("../city_meta.csv")["City"].unique().tolist()
for city in tqdm(city_ls):
    if city == "New York":
        continue
    print(f"Processing {city}")
    try:
        cityabbr = city.lower().replace(" ", "")
        seg_df, meta_df, pano_df = load_raw(city)
        seg_df_pivot = clean_seg(seg_df, pano_df, meta_df)
        seg_summary = extract_seg(seg_df_pivot)
        seg_summary_update = get_object_result(cityabbr, seg_summary)
    except Exception as e:
        print(city + " failed")
        print(f"Error: {e}")
        continue


  0%|          | 0/127 [00:00<?, ?it/s]

Processing Buenos Aires
Segmentation shape after filter:  2544226
Segmentation shape after filter images with problems:  2543977
data consistent
Segmentation shape:  2543977
Variables original:  31
Variables kept:  30


  1%|          | 1/127 [00:51<1:47:22, 51.13s/it]

city buenosaires saved
Processing Sydney
Segmentation shape after filter:  3246927
Segmentation shape after filter images with problems:  3246647
data consistent
Segmentation shape:  3155091
Variables original:  31
Variables kept:  30


  2%|▏         | 2/127 [02:49<3:09:20, 90.88s/it]

city sydney saved
Processing Vienna
Segmentation shape after filter:  4833171
Segmentation shape after filter images with problems:  4832696
data consistent
Segmentation shape:  3124569
Variables original:  31
Variables kept:  30


  2%|▏         | 3/127 [04:11<2:58:44, 86.49s/it]

city vienna saved
Processing Saidpur
Segmentation shape after filter:  191647
Segmentation shape after filter images with problems:  191635
data consistent
Segmentation shape:  119103
Variables original:  30
Variables kept:  29


  3%|▎         | 4/127 [04:14<1:49:41, 53.51s/it]

city saidpur saved
Processing Rajshahi
Segmentation shape after filter:  2039068
Segmentation shape after filter images with problems:  2038901
data consistent
Segmentation shape:  1598302
Variables original:  30
Variables kept:  29


  4%|▍         | 5/127 [04:54<1:39:17, 48.83s/it]

city rajshahi saved
Processing Dhaka
Segmentation shape after filter:  3966430
Segmentation shape after filter images with problems:  3964273
data consistent
Segmentation shape:  3964123
Variables original:  31
Variables kept:  30


  5%|▍         | 6/127 [06:47<2:22:40, 70.75s/it]

city dhaka saved
Processing Antwerp
Segmentation shape after filter:  2473824
Segmentation shape after filter images with problems:  2473423
data consistent
Segmentation shape:  1397228
Variables original:  31
Variables kept:  30


  6%|▌         | 7/127 [07:23<1:58:19, 59.16s/it]

city antwerp saved
Processing Brussels
Segmentation shape after filter:  11140084
Segmentation shape after filter images with problems:  11138322
data consistent
Segmentation shape:  10477750
Variables original:  31
Variables kept:  30


  6%|▋         | 8/127 [14:27<5:47:58, 175.45s/it]

city brussels saved
Processing Cochabamba
Segmentation shape after filter:  1641534
Segmentation shape after filter images with problems:  1641489
data consistent
Segmentation shape:  550393
Variables original:  31
Variables kept:  30


  7%|▋         | 9/127 [14:40<4:05:15, 124.71s/it]

city cochabamba saved
Processing Gaborone
Segmentation shape after filter:  1114504
Segmentation shape after filter images with problems:  1114460
data consistent
Segmentation shape:  1114460
Variables original:  30
Variables kept:  29


  8%|▊         | 10/127 [15:08<3:04:49, 94.79s/it]

city gaborone saved
Processing Ilheus
Segmentation shape after filter:  501854
Segmentation shape after filter images with problems:  501794
data consistent
Segmentation shape:  350007
Variables original:  31
Variables kept:  30


  9%|▊         | 11/127 [15:16<2:12:05, 68.33s/it]

city ilheus saved
Processing Jequie
Segmentation shape after filter:  598417
Segmentation shape after filter images with problems:  598391
data consistent
Segmentation shape:  452963
Variables original:  31
Variables kept:  30


  9%|▉         | 12/127 [15:26<1:36:33, 50.38s/it]

city jequie saved
Processing Palmas
Segmentation shape after filter:  1478402
Segmentation shape after filter images with problems:  1478378
data consistent
Segmentation shape:  1232482
Variables original:  31
Variables kept:  30


 10%|█         | 13/127 [15:54<1:22:58, 43.67s/it]

city palmas saved
Processing Florianopolis
Segmentation shape after filter:  2372020
Segmentation shape after filter images with problems:  2371854
data consistent
Segmentation shape:  2004279
Variables original:  31
Variables kept:  30


 11%|█         | 14/127 [16:36<1:21:13, 43.13s/it]

city florianopolis saved
Processing Ribeirao Preto
Segmentation shape after filter:  1716434
Segmentation shape after filter images with problems:  1715864
data consistent
Segmentation shape:  1715759
Variables original:  31
Variables kept:  30


 12%|█▏        | 15/127 [17:13<1:17:18, 41.41s/it]

city ribeiraopreto saved
Processing Belo Horizonte
Segmentation shape after filter:  2662143
Segmentation shape after filter images with problems:  2662037
data consistent
Segmentation shape:  2662013
Variables original:  31
Variables kept:  30


 13%|█▎        | 16/127 [18:12<1:26:23, 46.69s/it]

city belohorizonte saved
Processing Curitiba
Segmentation shape after filter:  4991510
Segmentation shape after filter images with problems:  4991244
data consistent
Segmentation shape:  4179965
Variables original:  31
Variables kept:  30


 13%|█▎        | 17/127 [19:42<1:49:36, 59.78s/it]

city curitiba saved
Processing Sao Paulo
Segmentation shape after filter:  3160720
Segmentation shape after filter images with problems:  3160452
data consistent
Segmentation shape:  3160452
Variables original:  31
Variables kept:  30


 14%|█▍        | 18/127 [22:20<2:41:53, 89.12s/it]

city saopaulo saved
Processing Rio de Janeiro
Segmentation shape after filter:  3307951
Segmentation shape after filter images with problems:  3306577
data consistent
Segmentation shape:  3306577
Variables original:  31
Variables kept:  30


 15%|█▍        | 19/127 [23:35<2:32:48, 84.89s/it]

city riodejaneiro saved
Processing Victoria
Segmentation shape after filter:  492539
Segmentation shape after filter images with problems:  492503
data consistent
Segmentation shape:  246925
Variables original:  31
Variables kept:  30


 16%|█▌        | 20/127 [23:42<1:49:34, 61.44s/it]

city victoria saved
Processing Montreal
Segmentation shape after filter:  4927106
Segmentation shape after filter images with problems:  4926930
data consistent
Segmentation shape:  3545678
Variables original:  31
Variables kept:  30


 17%|█▋        | 21/127 [25:30<2:13:15, 75.43s/it]

city montreal saved
Processing Toronto
Segmentation shape after filter:  1156620
Segmentation shape after filter images with problems:  1156550
data consistent
Segmentation shape:  1156550
Variables original:  31
Variables kept:  30


 17%|█▋        | 22/127 [25:58<1:47:12, 61.26s/it]

city toronto saved
Processing Santiago
Segmentation shape after filter:  376849
Segmentation shape after filter images with problems:  376746
data consistent
Segmentation shape:  275303
Variables original:  30
Variables kept:  29


 18%|█▊        | 23/127 [26:04<1:17:38, 44.79s/it]

city santiago saved
Processing Taipei
Segmentation shape after filter:  2562250
Segmentation shape after filter images with problems:  2560597
data consistent
Segmentation shape:  2560597
Variables original:  31
Variables kept:  30


 19%|█▉        | 24/127 [27:04<1:24:27, 49.20s/it]

city taipei saved
Processing Hong Kong
Segmentation shape after filter:  4377966
Segmentation shape after filter images with problems:  4373827
data consistent
Segmentation shape:  4373827
Variables original:  31
Variables kept:  30


 20%|█▉        | 25/127 [28:49<1:52:05, 65.93s/it]

city hongkong saved
Processing Valledupar
Segmentation shape after filter:  383795
Segmentation shape after filter images with problems:  383785
data consistent
Segmentation shape:  277217
Variables original:  31
Variables kept:  30


 20%|██        | 26/127 [28:55<1:21:01, 48.14s/it]

city valledupar saved
Processing Bogotá
Segmentation shape after filter:  680122
Segmentation shape after filter images with problems:  680052
data consistent
Segmentation shape:  680052
Variables original:  31
Variables kept:  30


 21%|██▏       | 27/127 [29:45<1:21:10, 48.71s/it]

city bogotá saved
Processing Copenhagen
Segmentation shape after filter:  799592
Segmentation shape after filter images with problems:  799545
data consistent
Segmentation shape:  799545
Variables original:  31
Variables kept:  30


 22%|██▏       | 28/127 [30:19<1:12:38, 44.03s/it]

city copenhagen saved
Processing Quito
Segmentation shape after filter:  4676093
Segmentation shape after filter images with problems:  4674679
data consistent
Segmentation shape:  2994061
Variables original:  31
Variables kept:  30


 23%|██▎       | 29/127 [31:28<1:24:09, 51.52s/it]

city quito saved
Processing Le Mans
Segmentation shape after filter:  533103
Segmentation shape after filter images with problems:  533047
data consistent
Segmentation shape:  533005
Variables original:  31
Variables kept:  30


 24%|██▎       | 30/127 [31:40<1:04:23, 39.83s/it]

city lemans saved
Processing Paris
Segmentation shape after filter:  203202
Segmentation shape after filter images with problems:  203154
data consistent
Segmentation shape:  202827
Variables original:  31
Variables kept:  30


 24%|██▍       | 31/127 [32:04<56:01, 35.01s/it]  

city paris saved
Processing Munich
Segmentation shape after filter:  1655495
Segmentation shape after filter images with problems:  1655395
data consistent
Segmentation shape:  1655395
Variables original:  31
Variables kept:  30


 25%|██▌       | 32/127 [32:38<54:50, 34.64s/it]

city munich saved
Processing Berlin
Segmentation shape after filter:  6212030
Segmentation shape after filter images with problems:  6211463
data consistent
Segmentation shape:  6211463
Variables original:  31
Variables kept:  30


 26%|██▌       | 33/127 [34:47<1:38:53, 63.12s/it]

city berlin saved
Processing Accra
Segmentation shape after filter:  1556663
Segmentation shape after filter images with problems:  1556653
data consistent
Segmentation shape:  1556487
Variables original:  31
Variables kept:  30


 27%|██▋       | 34/127 [35:15<1:21:12, 52.39s/it]

city accra saved
Processing Thessaloniki
Segmentation shape after filter:  2848365
Segmentation shape after filter images with problems:  2847879
data consistent
Segmentation shape:  2375507
Variables original:  31
Variables kept:  30


 28%|██▊       | 35/127 [36:13<1:23:19, 54.35s/it]

city thessaloniki saved
Processing Athens
Segmentation shape after filter:  4658181
Segmentation shape after filter images with problems:  4657745
data consistent
Segmentation shape:  4105579
Variables original:  31
Variables kept:  30


 28%|██▊       | 36/127 [37:51<1:41:56, 67.21s/it]

city athens saved
Processing Guatemala City
Segmentation shape after filter:  516987
Segmentation shape after filter images with problems:  516969
data consistent
Segmentation shape:  516669
Variables original:  31
Variables kept:  30


 29%|██▉       | 37/127 [38:00<1:14:44, 49.83s/it]

city guatemalacity saved
Processing Budapest
Segmentation shape after filter:  5096103
Segmentation shape after filter images with problems:  5095789
data consistent
Segmentation shape:  4014627
Variables original:  31
Variables kept:  30


 30%|██▉       | 38/127 [39:29<1:31:18, 61.56s/it]

city budapest saved
Processing Hindupur
Segmentation shape after filter:  125634
Segmentation shape after filter images with problems:  125628
data consistent
Segmentation shape:  78327
Variables original:  30
Variables kept:  29


 31%|███       | 39/127 [39:31<1:04:04, 43.69s/it]

city hindupur saved
Processing Vijayawada
Segmentation shape after filter:  178671
Segmentation shape after filter images with problems:  178665
data consistent
Segmentation shape:  116545
Variables original:  30
Variables kept:  29


 31%|███▏      | 40/127 [39:33<45:24, 31.31s/it]  

city vijayawada saved
Processing Delhi
Segmentation shape after filter:  4531955
Segmentation shape after filter images with problems:  4529020
data consistent
Segmentation shape:  4528772
Variables original:  31
Variables kept:  30


 32%|███▏      | 41/127 [41:23<1:18:37, 54.86s/it]

city delhi saved
Processing Bangalore
Segmentation shape after filter:  4223593
Segmentation shape after filter images with problems:  4222807
data consistent
Segmentation shape:  4218616
Variables original:  31
Variables kept:  30


 33%|███▎      | 42/127 [42:49<1:30:43, 64.05s/it]

city bangalore saved
Processing Kozhikode
Segmentation shape after filter:  2916760
Segmentation shape after filter images with problems:  2916401
data consistent
Segmentation shape:  1685349
Variables original:  31
Variables kept:  30


 34%|███▍      | 43/127 [43:27<1:18:53, 56.36s/it]

city kozhikode saved
Processing Mumbai
Segmentation shape after filter:  1042345
Segmentation shape after filter images with problems:  1042190
data consistent
Segmentation shape:  510844
Variables original:  31
Variables kept:  30


 35%|███▍      | 44/127 [43:38<59:07, 42.74s/it]  

city mumbai saved
Processing Malegaon
Segmentation shape after filter:  2142546
Segmentation shape after filter images with problems:  2141394
data consistent
Segmentation shape:  1235342
Variables original:  30
Variables kept:  29


 35%|███▌      | 45/127 [44:09<53:37, 39.24s/it]

city malegaon saved
Processing Pune
Segmentation shape after filter:  3819373
Segmentation shape after filter images with problems:  3818492
data consistent
Segmentation shape:  1769693
Variables original:  31
Variables kept:  30


 36%|███▌      | 46/127 [44:50<53:37, 39.72s/it]

city pune saved
Processing Parbhani
Segmentation shape after filter:  1935978
Segmentation shape after filter images with problems:  1935465
data consistent
Segmentation shape:  1423409
Variables original:  30
Variables kept:  29


 37%|███▋      | 47/127 [45:28<52:07, 39.09s/it]

city parbhani saved
Processing Jalna
Segmentation shape after filter:  132755
Segmentation shape after filter images with problems:  132716
data consistent
Segmentation shape:  119451
Variables original:  30
Variables kept:  29


 38%|███▊      | 48/127 [45:50<44:45, 34.00s/it]

city jalna saved
Processing Jaipur
Segmentation shape after filter:  2971314
Segmentation shape after filter images with problems:  2970648
data consistent
Segmentation shape:  2623085
Variables original:  30
Variables kept:  29


 39%|███▊      | 49/127 [46:50<54:19, 41.79s/it]

city jaipur saved
Processing Hyderabad
Segmentation shape after filter:  4061804
Segmentation shape after filter images with problems:  4061031
data consistent
Segmentation shape:  4060650
Variables original:  31
Variables kept:  30


 39%|███▉      | 50/127 [48:17<1:11:11, 55.47s/it]

city hyderabad saved
Processing Sitapur
Segmentation shape after filter:  957428
Segmentation shape after filter images with problems:  957005
data consistent
Segmentation shape:  523061
Variables original:  31
Variables kept:  30


 40%|████      | 51/127 [48:29<53:41, 42.39s/it]  

city sitapur saved
Processing Kanpur
Segmentation shape after filter:  3051027
Segmentation shape after filter images with problems:  3050621
data consistent
Segmentation shape:  2320799
Variables original:  31
Variables kept:  30


 41%|████      | 52/127 [49:19<56:01, 44.83s/it]

city kanpur saved
Processing Parepare
Segmentation shape after filter:  401223
Segmentation shape after filter images with problems:  401199
data consistent
Segmentation shape:  339921
Variables original:  31
Variables kept:  30


 42%|████▏     | 53/127 [49:27<41:27, 33.62s/it]

city parepare saved
Processing Cirebon
Segmentation shape after filter:  512915
Segmentation shape after filter images with problems:  512844
data consistent
Segmentation shape:  393945
Variables original:  30
Variables kept:  29


 43%|████▎     | 54/127 [49:35<31:32, 25.92s/it]

city cirebon saved
Processing Medan
Segmentation shape after filter:  2878026
Segmentation shape after filter images with problems:  2877165
data consistent
Segmentation shape:  2225972
Variables original:  31
Variables kept:  30


 43%|████▎     | 55/127 [50:21<38:31, 32.10s/it]

city medan saved
Processing Palembang
Segmentation shape after filter:  3057571
Segmentation shape after filter images with problems:  3057283
data consistent
Segmentation shape:  2243419
Variables original:  31
Variables kept:  30


 44%|████▍     | 56/127 [51:12<44:23, 37.52s/it]

city palembang saved
Processing Jakarta
Segmentation shape after filter:  5325105
Segmentation shape after filter images with problems:  5323605
data consistent
Segmentation shape:  5299667
Variables original:  31
Variables kept:  30


 45%|████▍     | 57/127 [53:19<1:15:24, 64.64s/it]

city jakarta saved
Processing Tel Aviv
Segmentation shape after filter:  462703
Segmentation shape after filter images with problems:  462680
data consistent
Segmentation shape:  462491
Variables original:  31
Variables kept:  30


 46%|████▌     | 58/127 [53:28<55:02, 47.87s/it]  

city telaviv saved
Processing Jerusalem
Segmentation shape after filter:  2277930
Segmentation shape after filter images with problems:  2277471
data consistent
Segmentation shape:  2277471
Variables original:  31
Variables kept:  30


 46%|████▋     | 59/127 [54:09<51:57, 45.85s/it]

city jerusalem saved
Processing Palermo
Segmentation shape after filter:  1314365
Segmentation shape after filter images with problems:  1314214
data consistent
Segmentation shape:  1033484
Variables original:  31
Variables kept:  30


 47%|████▋     | 60/127 [54:33<43:36, 39.06s/it]

city palermo saved
Processing Rome
Segmentation shape after filter:  4622444
Segmentation shape after filter images with problems:  4621679
data consistent
Segmentation shape:  4621679
Variables original:  31
Variables kept:  30


 48%|████▊     | 61/127 [57:30<1:28:32, 80.49s/it]

city rome saved
Processing Milan
Segmentation shape after filter:  1809439
Segmentation shape after filter images with problems:  1809204
data consistent
Segmentation shape:  1798767
Variables original:  31
Variables kept:  30


 49%|████▉     | 62/127 [58:16<1:15:59, 70.15s/it]

city milan saved
Processing Yamaguchi
Segmentation shape after filter:  3116688
Segmentation shape after filter images with problems:  3114508
data consistent
Segmentation shape:  2897948
Variables original:  31
Variables kept:  30


 50%|████▉     | 63/127 [59:20<1:12:49, 68.27s/it]

city yamaguchi saved
Processing Tokyo
Segmentation shape after filter:  5064465
Segmentation shape after filter images with problems:  5063708
data consistent
Segmentation shape:  4976211
Variables original:  31
Variables kept:  30


 50%|█████     | 64/127 [1:02:12<1:44:29, 99.52s/it]

city tokyo saved
Processing Okayama
Segmentation shape after filter:  3070633
Segmentation shape after filter images with problems:  3069391
data consistent
Segmentation shape:  2232387
Variables original:  31
Variables kept:  30


 51%|█████     | 65/127 [1:03:21<1:33:16, 90.26s/it]

city okayama saved
Processing Fukuoka
Segmentation shape after filter:  5772709
Segmentation shape after filter images with problems:  5771367
data consistent
Segmentation shape:  4555754
Variables original:  31
Variables kept:  30


 52%|█████▏    | 66/127 [1:05:26<1:42:34, 100.89s/it]

city fukuoka saved
Processing Nagoya
Segmentation shape after filter:  4174259
Segmentation shape after filter images with problems:  4173675
data consistent
Segmentation shape:  4173675
Variables original:  31
Variables kept:  30


 53%|█████▎    | 67/127 [1:07:20<1:44:51, 104.86s/it]

city nagoya saved
Processing Nairobi
Segmentation shape after filter:  6781470
Segmentation shape after filter images with problems:  6780119
data consistent
Segmentation shape:  6780119
Variables original:  31
Variables kept:  30


 54%|█████▎    | 68/127 [1:10:13<2:03:01, 125.12s/it]

city nairobi saved
Processing Kaunas
Segmentation shape after filter:  2220050
Segmentation shape after filter images with problems:  2219839
data consistent
Segmentation shape:  1235059
Variables original:  31
Variables kept:  30


 54%|█████▍    | 69/127 [1:10:42<1:33:14, 96.46s/it] 

city kaunas saved
Processing Kuala Lumpur
Segmentation shape after filter:  760417
Segmentation shape after filter images with problems:  760361
data consistent
Segmentation shape:  760361
Variables original:  31
Variables kept:  30


 55%|█████▌    | 70/127 [1:11:02<1:09:37, 73.30s/it]

city kualalumpur saved
Processing Guadalajara
Segmentation shape after filter:  1530365
Segmentation shape after filter images with problems:  1530307
data consistent
Segmentation shape:  1374233
Variables original:  31
Variables kept:  30


 56%|█████▌    | 71/127 [1:11:34<56:57, 61.02s/it]  

city guadalajara saved
Processing Reynosa
Segmentation shape after filter:  2189409
Segmentation shape after filter images with problems:  2189357
data consistent
Segmentation shape:  1211139
Variables original:  31
Variables kept:  30


 57%|█████▋    | 72/127 [1:12:04<47:30, 51.82s/it]

city reynosa saved
Processing Culiacan
Segmentation shape after filter:  1948226
Segmentation shape after filter images with problems:  1948144
data consistent
Segmentation shape:  1452794
Variables original:  31
Variables kept:  30


 57%|█████▋    | 73/127 [1:12:35<41:01, 45.58s/it]

city culiacan saved
Processing Mexico City
Segmentation shape after filter:  7425145
Segmentation shape after filter images with problems:  7423209
data consistent
Segmentation shape:  7423209
Variables original:  31
Variables kept:  30


 58%|█████▊    | 74/127 [1:16:08<1:24:26, 95.60s/it]

city mexicocity saved
Processing Zwolle
Segmentation shape after filter:  1123088
Segmentation shape after filter images with problems:  1123006
data consistent
Segmentation shape:  756600
Variables original:  31
Variables kept:  30


 59%|█████▉    | 75/127 [1:16:27<1:02:59, 72.69s/it]

city zwolle saved
Processing Amsterdam
Segmentation shape after filter:  4719004
Segmentation shape after filter images with problems:  4718461
data consistent
Segmentation shape:  4718352
Variables original:  31
Variables kept:  30


 60%|█████▉    | 76/127 [1:19:15<1:26:12, 101.43s/it]

city amsterdam saved
Processing Wellington
Segmentation shape after filter:  3203200
Segmentation shape after filter images with problems:  3202592
data consistent
Segmentation shape:  3202592
Variables original:  31
Variables kept:  30


 61%|██████    | 77/127 [1:20:18<1:14:46, 89.74s/it] 

city wellington saved
Processing Auckland
Segmentation shape after filter:  5766361
Segmentation shape after filter images with problems:  5766188
data consistent
Segmentation shape:  4582108
Variables original:  31
Variables kept:  30


 61%|██████▏   | 78/127 [1:22:07<1:18:05, 95.61s/it]

city auckland saved
Processing Gombe
Segmentation shape after filter:  449672
Segmentation shape after filter images with problems:  449577
data consistent
Segmentation shape:  362838
Variables original:  30
Variables kept:  29


 62%|██████▏   | 79/127 [1:22:15<55:28, 69.34s/it]  

city gombe saved
Processing Lagos
Segmentation shape after filter:  4676460
Segmentation shape after filter images with problems:  4676357
data consistent
Segmentation shape:  4676357
Variables original:  31
Variables kept:  30


 63%|██████▎   | 80/127 [1:23:44<58:52, 75.16s/it]

city lagos saved
Processing Lima
Segmentation shape after filter:  4851403
Segmentation shape after filter images with problems:  4850923
data consistent
Segmentation shape:  4850923
Variables original:  31
Variables kept:  30


 64%|██████▍   | 81/127 [1:25:30<1:04:46, 84.48s/it]

city lima saved
Processing Manila
Segmentation shape after filter:  542549
Segmentation shape after filter images with problems:  542480
data consistent
Segmentation shape:  471941
Variables original:  31
Variables kept:  30


 65%|██████▍   | 82/127 [1:25:39<46:19, 61.77s/it]  

city manila saved
Processing Bacolod
Segmentation shape after filter:  1022292
Segmentation shape after filter images with problems:  1021982
data consistent
Segmentation shape:  897583
Variables original:  30
Variables kept:  29


 65%|██████▌   | 83/127 [1:25:57<35:33, 48.49s/it]

city bacolod saved
Processing Cebu City
Segmentation shape after filter:  952647
Segmentation shape after filter images with problems:  952421
data consistent
Segmentation shape:  952421
Variables original:  31
Variables kept:  30


 66%|██████▌   | 84/127 [1:26:17<28:41, 40.04s/it]

city cebucity saved
Processing Metro Manila
Segmentation shape after filter:  5390018
Segmentation shape after filter images with problems:  5389427
data consistent
Segmentation shape:  5389427
Variables original:  31
Variables kept:  30


 67%|██████▋   | 85/127 [1:28:14<44:09, 63.09s/it]

city metromanila saved
Processing Warsaw
Segmentation shape after filter:  7853922
Segmentation shape after filter images with problems:  7852605
data consistent
Segmentation shape:  3768925
Variables original:  31
Variables kept:  30


 68%|██████▊   | 86/127 [1:29:52<50:24, 73.77s/it]

city warsaw saved
Processing Seoul
Segmentation shape after filter:  6084830
Segmentation shape after filter images with problems:  6083655
data consistent
Segmentation shape:  6081802
Variables original:  31
Variables kept:  30


 69%|██████▊   | 87/127 [1:32:00<59:58, 89.96s/it]

city seoul saved
Processing Berezniki
Segmentation shape after filter:  806511
Segmentation shape after filter images with problems:  806481
data consistent
Segmentation shape:  283668
Variables original:  31
Variables kept:  30


 69%|██████▉   | 88/127 [1:32:07<42:20, 65.13s/it]

city berezniki saved
Processing Dzerzhinsk
Segmentation shape after filter:  1073048
Segmentation shape after filter images with problems:  1072894
data consistent
Segmentation shape:  539923
Variables original:  31
Variables kept:  30


 70%|███████   | 89/127 [1:32:20<31:20, 49.48s/it]

city dzerzhinsk saved
Processing Astrakhan
Segmentation shape after filter:  1284947
Segmentation shape after filter images with problems:  1284523
data consistent
Segmentation shape:  704534
Variables original:  31
Variables kept:  30


 71%|███████   | 90/127 [1:32:37<24:22, 39.53s/it]

city astrakhan saved
Processing Tyumen
Segmentation shape after filter:  1955965
Segmentation shape after filter images with problems:  1955925
data consistent
Segmentation shape:  819983
Variables original:  31
Variables kept:  30


 72%|███████▏  | 91/127 [1:32:55<19:56, 33.24s/it]

city tyumen saved
Processing Saint Petersburg
Segmentation shape after filter:  4730966
Segmentation shape after filter images with problems:  4730619
data consistent
Segmentation shape:  4730586
Variables original:  31
Variables kept:  30


 72%|███████▏  | 92/127 [1:35:00<35:21, 60.62s/it]

city saintpetersburg saved
Processing Moscow
Segmentation shape after filter:  9036352
Segmentation shape after filter images with problems:  9035871
data consistent
Segmentation shape:  9035871
Variables original:  31
Variables kept:  30


 73%|███████▎  | 93/127 [1:38:45<1:02:22, 110.06s/it]

city moscow saved
Processing Kigali
Segmentation shape after filter:  1098656
Segmentation shape after filter images with problems:  1098634
data consistent
Segmentation shape:  913738
Variables original:  30
Variables kept:  29


 74%|███████▍  | 94/127 [1:39:05<45:43, 83.13s/it]   

city kigali saved
Processing Belgrade
Segmentation shape after filter:  2239146
Segmentation shape after filter images with problems:  2238860
data consistent
Segmentation shape:  1374581
Variables original:  31
Variables kept:  30


 75%|███████▍  | 95/127 [1:39:34<35:35, 66.75s/it]

city belgrade saved
Processing Singapore
Segmentation shape after filter:  7624184
Segmentation shape after filter images with problems:  7623821
data consistent
Segmentation shape:  7623821
Variables original:  31
Variables kept:  30


 76%|███████▌  | 96/127 [1:42:12<48:33, 93.99s/it]

city singapore saved
Processing Capetown
Segmentation shape after filter:  3454380
Segmentation shape after filter images with problems:  3453676
data consistent
Segmentation shape:  3453676
Variables original:  31
Variables kept:  30


 76%|███████▋  | 97/127 [1:43:42<46:24, 92.81s/it]

city capetown saved
Processing Johannesburg
Segmentation shape after filter:  10973546
Segmentation shape after filter images with problems:  10973139
data consistent
Segmentation shape:  10970866
Variables original:  31
Variables kept:  30


 77%|███████▋  | 98/127 [1:48:08<1:10:04, 144.97s/it]

city johannesburg saved
Processing Madrid
Segmentation shape after filter:  2749038
Segmentation shape after filter images with problems:  2748611
data consistent
Segmentation shape:  2748535
Variables original:  31
Variables kept:  30


 78%|███████▊  | 99/127 [1:49:00<54:32, 116.86s/it]  

city madrid saved
Processing Stockholm
Segmentation shape after filter:  2344922
Segmentation shape after filter images with problems:  2344493
data consistent
Segmentation shape:  2344406
Variables original:  31
Variables kept:  30


 79%|███████▊  | 100/127 [1:49:50<43:35, 96.86s/it]

city stockholm saved
Processing Bangkok
Segmentation shape after filter:  5171084
Segmentation shape after filter images with problems:  5170397
data consistent
Segmentation shape:  5169578
Variables original:  31
Variables kept:  30


 80%|███████▉  | 101/127 [1:52:01<46:24, 107.08s/it]

city bangkok saved
Processing Istanbul
Segmentation shape after filter:  10550243
Segmentation shape after filter images with problems:  10548839
data consistent
Segmentation shape:  10544073
Variables original:  31
Variables kept:  30


 80%|████████  | 102/127 [1:56:29<1:04:49, 155.59s/it]

city istanbul saved
Processing Kampala
Segmentation shape after filter:  1213931
Segmentation shape after filter images with problems:  1213914
data consistent
Segmentation shape:  1213849
Variables original:  31
Variables kept:  30


 81%|████████  | 103/127 [1:56:50<46:03, 115.17s/it]  

city kampala saved
Processing Rovno
Segmentation shape after filter:  872696
Segmentation shape after filter images with problems:  872448
data consistent
Segmentation shape:  332486
Variables original:  31
Variables kept:  30


 82%|████████▏ | 104/127 [1:56:58<31:44, 82.80s/it] 

city rovno saved
Processing Kyiv
Segmentation shape after filter:  3121008
Segmentation shape after filter images with problems:  3120858
data consistent
Segmentation shape:  3120858
Variables original:  31
Variables kept:  30


 83%|████████▎ | 105/127 [1:57:55<27:34, 75.21s/it]

city kyiv saved
Processing Dubai
Segmentation shape after filter:  4037138
Segmentation shape after filter images with problems:  4036697
data consistent
Segmentation shape:  4036697
Variables original:  31
Variables kept:  30


 83%|████████▎ | 106/127 [1:59:13<26:34, 75.92s/it]

city dubai saved
Processing Manchester
Segmentation shape after filter:  1913992
Segmentation shape after filter images with problems:  1913918
data consistent
Segmentation shape:  1338161
Variables original:  31
Variables kept:  30


 84%|████████▍ | 107/127 [1:59:46<21:00, 63.02s/it]

city manchester saved
Processing Sheffield
Segmentation shape after filter:  2844080
Segmentation shape after filter images with problems:  2843906
data consistent
Segmentation shape:  1990806
Variables original:  31
Variables kept:  30


 85%|████████▌ | 108/127 [2:00:28<18:01, 56.93s/it]

city sheffield saved
Processing London
Segmentation shape after filter:  15982042
Segmentation shape after filter images with problems:  15980853
data consistent
Segmentation shape:  15980618
Variables original:  31
Variables kept:  30


 86%|████████▌ | 109/127 [2:07:11<48:14, 160.81s/it]

city london saved
Processing Killeen
Segmentation shape after filter:  1198134
Segmentation shape after filter images with problems:  1198134
data consistent
Segmentation shape:  856116
Variables original:  30
Variables kept:  29


 87%|████████▋ | 110/127 [2:07:29<33:21, 117.75s/it]

city killeen saved
Processing Modesto
Segmentation shape after filter:  1190067
Segmentation shape after filter images with problems:  1190057
data consistent
Segmentation shape:  947753
Variables original:  30
Variables kept:  29


 87%|████████▋ | 111/127 [2:07:49<23:33, 88.36s/it] 

city modesto saved
Processing Gainesville, FL
Segmentation shape after filter:  964153
Segmentation shape after filter images with problems:  964123
data consistent
Segmentation shape:  964102
Variables original:  31
Variables kept:  30


 88%|████████▊ | 112/127 [2:08:11<17:07, 68.50s/it]

city gainesville,fl saved
Processing Minneapolis
Segmentation shape after filter:  1773970
Segmentation shape after filter images with problems:  1773916
data consistent
Segmentation shape:  1514176
Variables original:  31
Variables kept:  30


 89%|████████▉ | 113/127 [2:08:44<13:30, 57.87s/it]

city minneapolis saved
Processing Toledo
Segmentation shape after filter:  2471817
Segmentation shape after filter images with problems:  2471793
data consistent
Segmentation shape:  1827523
Variables original:  31
Variables kept:  30


 90%|████████▉ | 114/127 [2:09:21<11:13, 51.82s/it]

city toledo saved
Processing Cleveland
Segmentation shape after filter:  2406930
Segmentation shape after filter images with problems:  2406831
data consistent
Segmentation shape:  2049202
Variables original:  31
Variables kept:  30


 91%|█████████ | 115/127 [2:10:04<09:46, 48.91s/it]

city cleveland saved
Processing Portland, OR
Segmentation shape after filter:  3295830
Segmentation shape after filter images with problems:  3295480
data consistent
Segmentation shape:  3295480
Variables original:  31
Variables kept:  30


 91%|█████████▏| 116/127 [2:11:10<09:55, 54.18s/it]

city portland,or saved
Processing Raleigh
Segmentation shape after filter:  4658351
Segmentation shape after filter images with problems:  4658088
data consistent
Segmentation shape:  3120413
Variables original:  31
Variables kept:  30


 92%|█████████▏| 117/127 [2:12:23<09:56, 59.67s/it]

city raleigh saved
Processing Miami
Segmentation shape after filter:  1387559
Segmentation shape after filter images with problems:  1387372
data consistent
Segmentation shape:  1352920
Variables original:  31
Variables kept:  30


 93%|█████████▎| 118/127 [2:12:54<07:40, 51.20s/it]

city miami saved
Processing San Francisco
Segmentation shape after filter:  1311181
Segmentation shape after filter images with problems:  1311069
data consistent
Segmentation shape:  1279693
Variables original:  31
Variables kept:  30


 94%|█████████▎| 119/127 [2:13:32<06:17, 47.23s/it]

city sanfrancisco saved
Processing Boston
Segmentation shape after filter:  441140
Segmentation shape after filter images with problems:  441120
data consistent
Segmentation shape:  436799
Variables original:  31
Variables kept:  30


 94%|█████████▍| 120/127 [2:13:43<04:13, 36.25s/it]

city boston saved
Processing Denver
Segmentation shape after filter:  3230259
Segmentation shape after filter images with problems:  3229997
data consistent
Segmentation shape:  3108131
Variables original:  31
Variables kept:  30


 95%|█████████▌| 121/127 [2:14:51<04:35, 45.90s/it]

city denver saved
Processing Detroit
Segmentation shape after filter:  3895144
Segmentation shape after filter images with problems:  3894982
data consistent
Segmentation shape:  3894952
Variables original:  31
Variables kept:  30


 96%|█████████▌| 122/127 [2:16:19<04:51, 58.40s/it]

city detroit saved
Processing Los Angeles
Segmentation shape after filter:  10108054
Segmentation shape after filter images with problems:  10107621
data consistent
Segmentation shape:  10107621
Variables original:  31
Variables kept:  30


 98%|█████████▊| 124/127 [2:19:52<04:01, 80.65s/it]

city losangeles saved
Processing Houston
Segmentation shape after filter:  3269415
Segmentation shape after filter images with problems:  3269202
data consistent
Segmentation shape:  2604887
Variables original:  31
Variables kept:  30


 98%|█████████▊| 125/127 [2:21:53<03:01, 90.78s/it]

city houston saved
Processing Philadelphia
Segmentation shape after filter:  5177748
Segmentation shape after filter images with problems:  5177328
data consistent
Segmentation shape:  4964552
Variables original:  31
Variables kept:  30


 99%|█████████▉| 126/127 [2:23:39<01:34, 94.60s/it]

city philadelphia saved
Processing Chicago
Segmentation shape after filter:  1632887
Segmentation shape after filter images with problems:  1632603
data consistent
Segmentation shape:  1159541
Variables original:  31
Variables kept:  30


100%|██████████| 127/127 [2:25:44<00:00, 68.85s/it] 

city chicago saved





In [2]:
# check the exported data
cityabbr = "hongkong"
seg_df_summary_update = pd.read_csv(f"{EXPORT_FOLDER}/{cityabbr}_seg_summary.csv")

In [2]:
# load seg summary for all cities and concat
seg_df_all_h3 = []
city_ls = pd.read_csv("../city_meta.csv")["City"].unique().tolist()
for city in tqdm(city_ls):
    cityabbr = city.lower().replace(" ", "")
    temp = pd.read_csv(f"{EXPORT_FOLDER}/{cityabbr}_seg_summary.csv")
    temp['city_lower'] = cityabbr
    seg_df_all_h3.append(temp)
seg_df_all_h3 = pd.concat(seg_df_all_h3).reset_index(drop = True)
seg_df_all_h3.head()

  0%|          | 0/127 [00:00<?, ?it/s]

100%|██████████| 127/127 [00:08<00:00, 15.49it/s]


Unnamed: 0,h3_9,sidewalk_indicator,exposure_indicator,img,panoid,sidewalk_presence,exposure_presence,obj_bench,obj_bicycle,obj_bus,obj_car,obj_chair,obj_fire hydrant,obj_motorcycle,obj_person,obj_stop sign,obj_traffic light,obj_train,obj_truck,city_lower
0,89c2e302003ffff,87,92,92,23,0.945652,1.0,0.0,0.142857,1.309524,25.404762,0.0,0.071429,0.428571,2.309524,0.0,1.47619,0.0,1.904762,buenosaires
1,89c2e302007ffff,107,108,108,27,0.990741,1.0,0.013289,0.172757,0.833887,26.524917,0.0,0.016611,0.750831,3.392027,0.0,0.674419,0.006645,0.973422,buenosaires
2,89c2e30200bffff,109,112,112,28,0.973214,1.0,0.0,0.018349,0.633028,21.06422,0.0,0.009174,0.137615,2.559633,0.055046,0.614679,0.018349,0.853211,buenosaires
3,89c2e30200fffff,118,120,120,30,0.983333,1.0,0.0,0.0,0.418182,26.490909,0.0,0.018182,0.163636,1.836364,0.0,0.0,0.0,0.872727,buenosaires
4,89c2e302013ffff,106,108,108,27,0.981481,1.0,0.02934,0.112469,0.687042,24.848411,0.0,0.01956,0.616137,2.948655,0.00978,0.787286,0.017115,1.178484,buenosaires


In [5]:
seg_df_all_h3.to_csv(os.path.join(EXPORT_FOLDER, 'c_exposure_sidewalk_h3.csv'), index = False)

In [4]:
seg_df_all_h3.groupby(['city_lower'])['exposure_presence'].mean()

city_lower
accra         0.978613
amsterdam     0.834003
antwerp       0.906148
astrakhan     0.915536
athens        0.771027
                ...   
vijayawada    0.855779
warsaw        0.843687
wellington    0.653854
yamaguchi     0.668904
zwolle        0.829262
Name: exposure_presence, Length: 127, dtype: float64