In [1]:
import os
import pandas as pd
import uuid
from utils import xroad

In [2]:
dacs_dataset_dir = './outputs/ishikawa_dacs_dataset'
if not os.path.exists(dacs_dataset_dir):
    os.makedirs(dacs_dataset_dir)

In [3]:
prefecture_name = '石川'
office_name = '金沢河川国道'
fetch_limit = 200

bridge_locs = {}

for year in range(2019,2024):  # 5年分 (tenken/report の項目ラベルがが2019年度以降違うので2019以降で)
    url = f'https://road-structures-db-bridge.mlit.go.jp/xROAD/api/v1/bridges/tenken/list/{year}'
    params = {
            "querys":[
                {
                    "key": "start_addr",
                    "value": prefecture_name,
                    "op": 7
                },
                {
                    "key": "office_value",
                    "value": office_name,
                    "op": 7
                }
            ],
            "limit": fetch_limit,
            "offset": 0
            }

    response_json = xroad.fetch_by_url(url, params)
    bridge_locs[year] = response_json['result']

In [4]:
bridge_locs[2019] = bridge_locs[2019][0:10]  # 画像取得に時間がかかるので、動作試験用に10件のみ取得（すべて取得したい場合にはこの行は無視）

In [5]:
# 損傷図の名称を取得
img_names = {}
for year, locs in bridge_locs.items():
    for loc in locs:
        url_obtaining_names = f'https://road-structures-db-bridge.mlit.go.jp/xROAD/api/v1/bridges/tenken/report/{year}/{loc}/9/'  # 2004年以降　その9
        response_json = xroad.fetch_by_url(url_obtaining_names)
        names_in_diams = response_json['result'][0]['c9']
        img_names[loc] = names_in_diams

In [6]:
# データフレームの作成
root_ja_df = pd.DataFrame(columns=["data_id", "title", "summary", "point", "archive_file"])
files_ja_df = pd.DataFrame(columns=["data_id", "file_id", "file"])

In [7]:
counter = 0
for year, locs in bridge_locs.items():
    for loc in locs:
        diam_name_dicts = img_names[loc]
        lat, lng = loc.split(',')
        point = f'("{lng}0","{lat}0")'  # 無理やり一桁増やす
        data_id = f'i1_ishikawa_data_{loc}'
        for diam_name_dict in diam_name_dicts:
            diam = diam_name_dict['diameter']
            damage_figs = diam_name_dict['damage_figs']

            # dfに記載
            root_ja_df.loc[loc] = {
                "data_id": data_id,
                "title": loc,
                "summary":f'diameter: {diam}',
                "point": point,
                "archive_file": ''
            } 

            for damage_fig in damage_figs:
                # 画像を取得
                url_obtaining_img = f'https://road-structures-db-bridge.mlit.go.jp/xROAD/api/v1/bridges/tenken/image/{year}/{loc}/4/{damage_fig}'
                content = xroad.fetch_img(url_obtaining_img)
                save_img_path = os.path.join(dacs_dataset_dir, data_id, damage_fig)
                if not os.path.exists(os.path.join(dacs_dataset_dir, data_id)):
                    os.makedirs(os.path.join(dacs_dataset_dir, data_id))
                with open(save_img_path, 'wb') as f:
                    f.write(content)

                # dfに記載
                files_ja_df.loc[counter] = {
                    "data_id": data_id,
                    "file_id": uuid.uuid4(),
                    "file": f'{damage_fig}'
                }
                counter += 1


In [8]:
# save meta data
with pd.ExcelWriter(os.path.join(dacs_dataset_dir, 'meta.xlsx'), engine='openpyxl') as writer:
    root_ja_df.to_excel(writer, sheet_name='root_ja', index=False)
    files_ja_df.to_excel(writer, sheet_name='files_ja', index=False)