In [76]:
import warnings
warnings.filterwarnings('ignore')
import json
import requests
import os
import pandas as pd
from tqdm import trange

# Load published map and download datasets

In [2]:
with open('published_map.json', 'r') as js_f:
    published_map = js_f.read()

published_map = json.loads(published_map)

In [10]:
datasets = published_map['item']['datasets']
datasets

[{'id': '33f5fd80-ec17-4d8d-b99e-94fbb9e896ea',
  'url': 'https://cdn.published.unfolded.ai/maps/300f66f4-fc9a-4680-8b5a-ba8905629427/published/f17abf10-91e9-45f0-8adc-abea8c43c182/dataset-33f5fd80-ec17-4d8d-b99e-94fbb9e896ea.json',
  'name': 'Needs _demolished.geojson',
  'type': 'managed'},
 {'id': '4f92a025-9e69-4215-9298-349419156687',
  'url': 'https://cdn.published.unfolded.ai/maps/300f66f4-fc9a-4680-8b5a-ba8905629427/published/f17abf10-91e9-45f0-8adc-abea8c43c182/dataset-4f92a025-9e69-4215-9298-349419156687.json',
  'name': 'Heavily_damaged.geojson',
  'type': 'managed'},
 {'id': 'a0334259-aaf4-4a73-8dc0-a6e64af2547b',
  'url': 'https://cdn.published.unfolded.ai/maps/300f66f4-fc9a-4680-8b5a-ba8905629427/published/f17abf10-91e9-45f0-8adc-abea8c43c182/dataset-a0334259-aaf4-4a73-8dc0-a6e64af2547b.json',
  'name': 'Slightly.geojson',
  'type': 'managed'},
 {'id': 'ea49e697-e702-426f-8a8c-6d4feb25d32f',
  'url': 'https://cdn.published.unfolded.ai/maps/300f66f4-fc9a-4680-8b5a-ba890562

In [12]:
for dataset in datasets:
    response = requests.get(dataset['url'])
    if response.status_code == 200:
        with open(os.path.join('datasets', dataset['name']), 'w') as f:
            f.write(response.text)
    else:
        print(f'Failed to download the file. Status code: {response.status_code}')

# Process each dataset and save lat, lng and dmg type for each row

In [77]:
# main df to concat outputs
df = pd.DataFrame()

In [78]:
len_dict = {}
ds_names = os.listdir('datasets')
for i in trange(len(ds_names), desc='Process dataset'):
    
    ds_name = ds_names[i]
    ds_type = ds_name.split('.')[0]
    ds_path = os.path.join('datasets', ds_name)
    
    with open(ds_path, 'r') as f:
        ds = f.read()
    ds = json.loads(ds)
    
    sub_df = pd.DataFrame([ds_type]*len(ds['data']['allData']), columns=['dmg_state'])
    sub_df['coordinate'] = sub_df.apply(lambda row: ds['data']['allData'][row.name][0]['geometry']['coordinates'], axis=1)
    sub_df['lat'] = sub_df['coordinate'].apply(lambda x: x[0])
    sub_df['lng'] = sub_df['coordinate'].apply(lambda x: x[1])
    sub_df = sub_df.drop(columns=['coordinate'])
    
    len_dict[ds_type] = len(sub_df)
    
    if df.empty:
        df = sub_df
    else:
        df = pd.concat([df, sub_df], axis=0)

for ds_type, len_ds in len_dict.items():
    print(f'{ds_type}: {len_ds} rows')

Process dataset: 100%|██████████| 4/4 [00:02<00:00,  1.62it/s]

Slightly: 140200 rows
Collapsed: 10237 rows
Needs _demolished: 5714 rows
Heavily_damaged: 42482 rows





![Screenshot](Screenshot_dmg_state_len.png)

In [79]:
df

Unnamed: 0,dmg_state,lat,lng
0,Slightly,36.230874,37.076459
1,Slightly,38.825763,37.160653
2,Slightly,36.869738,37.581015
3,Slightly,37.409086,37.048067
4,Slightly,38.312896,37.767825
...,...,...,...
42477,Heavily_damaged,38.791988,37.211223
42478,Heavily_damaged,36.221066,37.064872
42479,Heavily_damaged,38.485977,37.581812
42480,Heavily_damaged,38.254576,37.756840


# Save df

In [69]:
df.to_parquet('Building_dmg_w_geo.parquet', index=False)