In [1]:
import os
import pandas as pd
import uuid
from utils import xroad

In [2]:
# preliminaries
pref_code = '07' # two-digit num
city_code = '075035' # code list : https://www.soumu.go.jp/main_content/000925834.pdf
fetch_limit = 200
url = f'https://road-structures-db-bridge.mlit.go.jp/xROAD/api/v1/bridges?pref={pref_code}&city={city_code}&limit={fetch_limit}'

dacs_dataset_dir = './outputs/hiratamura_dacs_dataset'
if not os.path.exists(dacs_dataset_dir):
    os.makedirs(dacs_dataset_dir)

In [3]:
response_json = xroad.fetch_by_url(url)
total_bridges_num = response_json['resultset']['count']
result = response_json['result']
print('total # of bridges in the city: ', total_bridges_num)
if total_bridges_num > fetch_limit:
    print('fetch limit is too low, increase it to get all the bridges')

total # of bridges in the city:  114


In [4]:
success_dict = {}

for i in range(len(result)):
    shisetsu_id = result[i]['shisetsu_id']
    inspection_nendo = result[i]['tenken']['nendo']
    name = result[i]['syogen']['shisetsu']['meisyou']
    lat, lng = result[i]['syogen']['ichi'].values()
    point = f'("{lng}","{lat}")'
    data_id = f'i1_bridge_excel_{i}'

    save_file_name = f'{shisetsu_id}_{inspection_nendo}.xlsx'
    save_path = os.path.join(dacs_dataset_dir, data_id, save_file_name)
    if not os.path.exists(os.path.join(dacs_dataset_dir, data_id)):
        os.makedirs(os.path.join(dacs_dataset_dir, data_id))
    
    success = xroad.save_77_excel(shisetsu_id, inspection_nendo, save_path)
    if success:
        print(f'{shisetsu_id}_{inspection_nendo} saved')
        success_dict[str(i)] = {
            'root_ja':{
                'data_id': data_id,
                'title': name,            
                'point': point,
            },
            'files_ja':{
                'data_id': data_id,
                'file_id': uuid.uuid1(),
                'file': save_file_name
            }
        }
    else:
        print(f'{shisetsu_id}_{inspection_nendo} failed')
        # delete the directory
        os.rmdir(os.path.join(dacs_dataset_dir, data_id))

37.17417,140.59528_2020 saved
37.16639,140.59333_2022 saved
37.20389,140.52611_2019 saved
37.22667,140.54750_2019 saved
37.23694,140.57111_2020 saved
37.23667,140.57111_2022 saved
37.22917,140.55278_2019 saved
37.17417,140.54111_2021 saved
37.18947,140.57292_2023 saved
37.20556,140.59056_2022 saved
37.17554,140.54461_2023 saved
37.16889,140.54194_2022 saved
37.18778,140.58556_2022 saved
37.21861,140.56889_2021 saved
37.21008,140.58154_2023 saved
37.20167,140.58972_2019 saved
37.20932,140.58204_2023 saved
37.17806,140.59333_2020 saved
37.21417,140.57556_2022 saved
37.23259,140.56854_2023 saved
37.20364,140.51138_2020 failed
37.20469,140.51633_2020 saved
37.20591,140.52205_2020 saved
37.20679,140.52185_2022 failed
37.21009,140.53764_2020 saved
37.22612,140.54635_2020 saved
37.22974,140.55218_2020 saved
37.23498,140.55620_2020 saved
37.23194,140.55472_2021 saved
37.21861,140.56861_2022 saved
37.23944,140.55583_2021 saved
37.25500,140.56361_2021 saved
37.26111,140.56694_2021 saved
37.24972

In [5]:
success_dict

{'0': {'root_ja': {'data_id': 'i1_bridge_excel_0',
   'title': '蕨平ボックスカルバート',
   'point': '("37.174170","140.595280")'},
  'files_ja': {'data_id': 'i1_bridge_excel_0',
   'file_id': UUID('13e37566-60f9-11ef-871a-a08069f5749f'),
   'file': '37.17417,140.59528_2020.xlsx'}},
 '1': {'root_ja': {'data_id': 'i1_bridge_excel_1',
   'title': '楢坂第１ボックスカルバート',
   'point': '("37.166390","140.593330")'},
  'files_ja': {'data_id': 'i1_bridge_excel_1',
   'file_id': UUID('1828a984-60f9-11ef-871a-a08069f5749f'),
   'file': '37.16639,140.59333_2022.xlsx'}},
 '2': {'root_ja': {'data_id': 'i1_bridge_excel_2',
   'title': '横川橋',
   'point': '("37.203890","140.526110")'},
  'files_ja': {'data_id': 'i1_bridge_excel_2',
   'file_id': UUID('1c092664-60f9-11ef-871a-a08069f5749f'),
   'file': '37.20389,140.52611_2019.xlsx'}},
 '3': {'root_ja': {'data_id': 'i1_bridge_excel_3',
   'title': '空釜橋',
   'point': '("37.226670","140.547500")'},
  'files_ja': {'data_id': 'i1_bridge_excel_3',
   'file_id': UUID('20adf65

In [6]:
# データフレームの作成
root_ja_df = pd.DataFrame(columns=["data_id", "title", "summary", "point", "archive_file"])
files_ja_df = pd.DataFrame(columns=["data_id", "file_id", "file"])

for key in success_dict.keys():
    root_ja_df.loc[key] = success_dict[key]['root_ja']
    files_ja_df.loc[key] = success_dict[key]['files_ja']
root_ja_df.fillna('')  # NaNを空文字に置換

# save meta data
with pd.ExcelWriter(os.path.join(dacs_dataset_dir, 'meta.xlsx'), engine='openpyxl') as writer:
    root_ja_df.to_excel(writer, sheet_name='root_ja', index=False)
    files_ja_df.to_excel(writer, sheet_name='files_ja', index=False)