<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/hyougo/hyougo202104_json_isoformat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [80]:
!pip install jsonschema



In [81]:
PCR_XLSX = "https://web.pref.hyogo.lg.jp/kf16/documents/pcr.xlsx"
YOUSEI_XLSX = "https://web.pref.hyogo.lg.jp/kf16/documents/yousei.xlsx"
KANJA_HTML = "https://web.pref.hyogo.lg.jp/kk03/corona_hasseijyokyo.html"

DOWNLOAD_DIR = "download"
DATA_DIR = "data"

In [82]:
import datetime
import json
import pathlib
import re

In [83]:
import jsonschema
import pandas as pd
import requests

from bs4 import BeautifulSoup
from urllib.parse import urljoin

# ダウンロード

In [84]:
def fetch_soup(url, parser="html.parser"):

    r = requests.get(url)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, parser)

    return soup

In [85]:
def fetch_file(url, dir="."):

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    if not p.exists():

        r = requests.get(url)
        r.raise_for_status()

        with p.open(mode="wb") as fw:
            fw.write(r.content)

    return p

In [86]:
# 日付変換
def excel2date(data):

    flg_is_serial = data.astype("str").str.isdigit()

    fromSerial = pd.to_datetime(data[flg_is_serial].astype(float), unit="D", origin=pd.Timestamp("1899/12/30"))
    fromString = pd.to_datetime(data[~flg_is_serial], errors="coerce")

    result = pd.concat([fromString, fromSerial])

    return result

In [87]:
def fetch_patients(
    url,
    skip=5,
    header=[
        "番号",
        "発表日",
        "年代",
        "性別",
        "管轄",
        "居住地",
        "職業",
        "発症日",
        "接触歴_有",
        "接触歴_無",
        "接触歴_調査中",
        "備考欄",
    ],
    usecols="B:M",
):

    p = fetch_file(url)

    df = pd.read_excel(p, skiprows=skip, usecols=usecols, header=None).sort_index(ascending=False).reset_index(drop=True)

    df.set_axis(header, axis=1, inplace=True)

    df.dropna(subset=["番号"], inplace=True)

    df["番号"] = df["番号"].astype(int)
    df["発表日"] = excel2date(df["発表日"])
    df["備考欄"] = df["備考欄"].str.strip()

    return df.loc[:, ["番号", "発表日", "居住地", "年代", "性別", "備考欄"]]

In [88]:
def get_kanja():

    df1 = fetch_patients(
        "https://web.pref.hyogo.lg.jp/kk03/documents/corona-kanjajokyou1.xlsx",
        header=["番号", "発表日", "年代", "性別", "管轄", "居住地", "職業", "発症日", "渡航歴", "備考欄"],
        usecols="B:K",
    )

    df2 = fetch_patients("https://web.pref.hyogo.lg.jp/kk03/documents/corona-kanjajokyou2.xlsx", skip=3)

    df3 = fetch_patients("https://web.pref.hyogo.lg.jp/kk03/documents/corona-kanjajokyou3.xlsx")

    soup = fetch_soup(KANJA_HTML)
    tag = soup.find("a", class_="icon_excel", text=re.compile("^新型コロナウイルスに感染した患者の状況"))
    url = urljoin(KANJA_HTML, tag.get("href"))

    df4 = fetch_patients(url)

    # 21045～21050を21145～21150に変更、番号重複と21045～21050を抽出し+100
    df4["番号"] = df4["番号"].mask(df4.duplicated(subset=["番号"]) & df4["番号"].isin([21045, 21046, 21047, 21048, 21049, 21050]), df4["番号"] + 100)

    # 番号重複を削除
    df4.drop_duplicates(subset="番号", inplace=True)

    df = pd.concat([df1, df2, df3, df4]).set_index("番号").sort_index()

    return df

# SCHEMA

In [89]:
AGE_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "default": 0,
                "type": "integer"
            }
        },
        "last_update": {
            "format": "date-time",
            "type": "string"
        }
    },
    "required": [
        "data",
        "last_update"
    ],
}

In [90]:
AGE_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"format": "date", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [91]:
INSPECTIONS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "判明日": {"type": "string", "format": "date"},
                    "地方衛生研究所等": {"type": "integer"},
                    "民間検査機関等": {
                        "type": "object",
                        "additionalProperties": {"type": "integer"},
                    },
                    "陽性確認": {"type": "integer"},
                },
                "required": ["判明日", "地方衛生研究所等", "民間検査機関等", "陽性確認"],
            },
        },
        "last_update": {"type": "string", "format": "date-time"},
    },
    "required": ["data", "last_update"],
}

In [92]:
INSPECTIONS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"format": "date", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [93]:
MAIN_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-06/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Inspections"},
                },
                "last_update": {
                    "format": "date-time",
                    "type": "string",
                },
            },
            "required": ["attr", "children", "last_update", "value"],
            "title": "Main",
        },
        "Inspections": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Patients"},
                },
            },
            "required": ["attr", "children", "value"],
            "title": "Inspections",
        },
        "Patients": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Symptoms"},
                },
            },
            "required": ["attr", "value"],
            "title": "Patients",
        },
        "Symptoms": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
            },
            "required": ["attr", "value"],
            "title": "Symptoms",
        },
    },
}

In [94]:
PATIENTS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "data": {"type": "array", "items": {"$ref": "#/definitions/Datum"}},
                "last_update": {"type": "string"},
                "exclude_patients": {"type": "array", "items": {"type": "integer"}},
            },
            "required": ["data", "exclude_patients", "last_update"],
            "title": "Main",
        },
        "Datum": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "No": {"type": "integer"},
                "居住地": {"type": "string"},
                "年代": {"$ref": "#/definitions/Age"},
                "性別": {"$ref": "#/definitions/Sex"},
                "備考": {"type": "string"},
                "退院": {"type": "null"},
                "date": {"type": "string", "format": "date"},
                "リリース日": {"type": "string", "format": "date"},
                "曜日": {"$ref": "#/definitions/Week"},
            },
            "required": ["date", "リリース日", "備考", "居住地", "年代", "性別", "曜日", "No", "退院"],
            "title": "Datum",
        },
        "Age": {
            "type": "string",
            "enum": [
                "10歳未満",
                "10代",
                "20代",
                "30代",
                "40代",
                "50代",
                "60代",
                "70代",
                "80代",
                "90歳以上",
                "非公表",
            ],
            "title": "Age",
        },
        "Sex": {"type": "string", "enum": ["男性", "女性", "非公表"], "title": "Sex"},
        "Week": {
            "type": "string",
            "enum": ["月", "火", "水", "木", "金", "土", "日"],
            "title": "Week",
        },
    },
}

In [95]:
PATIENTS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "日付": {"type": "string", "format": "date"},
                    "小計": {"default": 0, "type": "integer"},
                },
                "required": ["小計", "日付"],
            },
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "last_update"],
}

# データラングリング

In [96]:
def dumps_json(file_name, json_data, dir=DATA_DIR):

    p = pathlib.Path(dir, file_name)
    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="w") as fw:
        json.dump(json_data, fw, ensure_ascii=False, indent=4)

In [97]:
# 最終更新日
JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_now = datetime.datetime.now(JST)

In [98]:
last_update = dt_now.replace(hour=0, minute=0, second=0, microsecond=0)
# last_update -= datetime.timedelta(days=1)

## pcr.xlsx

In [99]:
pcr_path = fetch_file(PCR_XLSX, DOWNLOAD_DIR)

In [100]:
df_pcr = pd.read_excel(pcr_path, index_col="年月日").fillna(0).astype(int)

In [101]:
df_pcr.rename(
    columns={
        "検査件数（合計）": "合計",
        "うち地方衛生研究所等によるPCR検査件数": "地方衛生研究所等",
        "うち民間検査機関等によるPCR検査件数": "民間検査機関等_PCR検査",
        "うち民間検査機関等による抗原検査件数": "民間検査機関等_抗原検査",
        "陽性件数": "陽性確認",
    },
    inplace=True,
)

In [102]:
df_pcr["民間検査機関等"] = df_pcr["民間検査機関等_PCR検査"] + df_pcr["民間検査機関等_抗原検査"] 

In [103]:
df_pcr["日付"] = df_pcr.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [104]:
df_pcr.to_csv("pcr.tsv", sep="\t")

### inspections_summary

In [105]:
df_insp_sum = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等"]].copy()

labels = df_insp_sum.index.strftime("%Y-%m-%d")

In [106]:
inspections_summary = {
    "data": df_insp_sum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [107]:
jsonschema.validate(inspections_summary, INSPECTIONS_SUMMARY_SCHEMA)

dumps_json("inspections_summary.json", inspections_summary)

### inspections

In [108]:
df_insp = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等_PCR検査", "民間検査機関等_抗原検査", "陽性確認"]].copy()
df_insp["判明日"] = df_insp.index.strftime("%Y-%m-%d")

In [109]:
df_insp.sort_index(inplace=True)

In [110]:
insp_dict = [
    {
        "判明日": row["判明日"],
        "地方衛生研究所等": row["地方衛生研究所等"],
        "民間検査機関等": {"PCR検査": row["民間検査機関等_PCR検査"], "抗原検査": row["民間検査機関等_抗原検査"]},
        "陽性確認": row["陽性確認"],
    }
    for _, row in df_insp.iterrows()
]

In [111]:
inspections = {
    "data": insp_dict,
    "last_update": last_update.isoformat(),
}

In [112]:
# jsonschema.validate(inspections, INSPECTIONS_SCHEMA)
dumps_json("inspections.json", inspections)

### parent_summary

In [113]:
df_pts = df_pcr.loc[:, ["日付", "陽性確認"]].copy()

df_pts.rename(columns={"陽性確認": "小計"}, inplace=True)

In [114]:
patients_summary = {
    "data": df_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [115]:
jsonschema.validate(patients_summary, PATIENTS_SUMMARY_SCHEMA)

dumps_json("patients_summary.json", patients_summary)

## kanjya.xlsx

### patients

In [116]:
df_kanja = get_kanja()

In [117]:
df_kanja.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32169 entries, 1 to 32169
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   発表日     32042 non-null  datetime64[ns]
 1   居住地     32042 non-null  object        
 2   年代      32042 non-null  object        
 3   性別      32042 non-null  object        
 4   備考欄     14095 non-null  object        
dtypes: datetime64[ns](1), object(4)
memory usage: 1.5+ MB


In [118]:
exclude_lst = df_kanja[df_kanja.isnull().all(axis=1)].index.to_list()

In [119]:
df_kanja.dropna(how="all", inplace=True)

df_kanja["年代"] = df_kanja["年代"].astype(str)
df_kanja["年代"] = df_kanja["年代"].replace({"10?[歳代]未満": "10歳未満", "90(歳以上)?": "90歳以上", "([1-8]0$)": r"\1代"}, regex=True)

df_kanja["居住地"] = df_kanja["居住地"].str.replace("\n", "")

df_kanja["備考欄"] = df_kanja["備考欄"].str.replace("\n", " ")

In [120]:
df_kanja.to_csv("kanja.tsv", sep="\t")

In [121]:
# 陽性患者情報

df_pt = df_kanja.reset_index()

df_pt["退院"] = None

df_pt["date"] = df_pt["発表日"].dt.strftime("%Y-%m-%d")
df_pt["リリース日"] = df_pt["発表日"].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

df_pt["曜日"] = df_pt["発表日"].dt.dayofweek.map({0: "月", 1: "火", 2: "水", 3: "木", 4: "金", 5: "土", 6: "日"})

df_pt["備考欄"] = df_pt["備考欄"].str.replace("NO.|N0.|NO,|N0,|No,", "No.")
df_pt["備考欄"] = df_pt["備考欄"].str.replace("・", "、")
df_pt["備考欄"] = df_pt["備考欄"].fillna("")
df_pt.rename(columns={"番号": "No", "備考欄": "備考"}, inplace=True)

df_pt["性別"] = df_pt["性別"].str.replace("\s", "", regex=True).replace({"男": "男性", "女": "女性"})

df_pt.drop("発表日", axis=1, inplace=True)

df_pt

Unnamed: 0,No,居住地,年代,性別,備考,退院,date,リリース日,曜日
0,1,西宮市,40代,男性,特定できず,,2020-03-01,2020-03-01T00:00:00+09:00,日
1,2,神戸市,40代,男性,感染経路確認済,,2020-03-03,2020-03-03T00:00:00+09:00,火
2,3,神戸市,40代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-03,2020-03-03T00:00:00+09:00,火
3,4,福崎町,50代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-05,2020-03-05T00:00:00+09:00,木
4,5,姫路市,40代,男性,2/19に大阪のSoap operaライブに参加,,2020-03-06,2020-03-06T00:00:00+09:00,金
...,...,...,...,...,...,...,...,...,...
32037,32165,洲本健康福祉事務所管内,80代,女性,,,2021-05-01,2021-05-01T00:00:00+09:00,土
32038,32166,洲本健康福祉事務所管内,50代,女性,,,2021-05-01,2021-05-01T00:00:00+09:00,土
32039,32167,洲本健康福祉事務所管内,50代,女性,,,2021-05-01,2021-05-01T00:00:00+09:00,土
32040,32168,洲本健康福祉事務所管内,80代,女性,,,2021-05-01,2021-05-01T00:00:00+09:00,土


In [122]:
df_kanja.to_csv("kanja.csv")

In [123]:
patients = {
    "data": df_pt.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
    "exclude_patients": exclude_lst,
}

In [124]:
jsonschema.validate(patients, PATIENTS_SCHEMA)

dumps_json("patients.json", patients)

### age

In [125]:
# 年代集計

age_list = ["10歳未満","10代", "20代", "30代", "40代", "50代", "60代", "70代", "80代", "90歳以上", "非公表"]

df_age = df_kanja["年代"].value_counts().sort_index().reindex(age_list, fill_value=0)

df_age = df_age.astype(int)

In [126]:
age = {
    "data": df_age.to_dict(),
    "last_update": last_update.isoformat(),
}

In [127]:
jsonschema.validate(age, AGE_SCHEMA)

dumps_json("age.json", age)

### age_summary

In [128]:
df_ages = (
    pd.crosstab(df_kanja["発表日"], df_kanja["年代"])
    .reindex(age_list, axis=1, fill_value=0)
    .astype(int)
)

# 日付補完
dt_range = pd.date_range(df_ages.index[0], df_pcr.index[-1])
df_agesum = df_ages.reindex(index=dt_range, fill_value=0)

df_agesum

年代,10歳未満,10代,20代,30代,40代,50代,60代,70代,80代,90歳以上,非公表
2020-03-01,0,0,0,0,1,0,0,0,0,0,0
2020-03-02,0,0,0,0,0,0,0,0,0,0,0
2020-03-03,0,0,0,0,2,0,0,0,0,0,0
2020-03-04,0,0,0,0,0,0,0,0,0,0,0
2020-03-05,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2021-04-27,16,66,86,63,77,57,43,51,31,13,0
2021-04-28,25,62,97,78,79,86,64,60,36,11,1
2021-04-29,16,72,64,76,88,77,50,52,31,8,0
2021-04-30,13,41,56,33,40,46,23,31,30,20,0


In [129]:
labels = df_agesum.index.strftime("%Y-%m-%d")

In [130]:
age_summary = {
    "data": df_agesum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [131]:
jsonschema.validate(age_summary, AGE_SUMMARY_SCHEMA)

dumps_json("age_summary.json", age_summary)

## yousei.xlsx

### main_summary

In [132]:
yousei_path = fetch_file(YOUSEI_XLSX, DOWNLOAD_DIR)

In [133]:
df_yousei = pd.read_excel(yousei_path, index_col="発表年月日", na_values="-")

df_yousei.columns = df_yousei.columns.map(lambda s: s.replace("（累計）", "").strip())

# df_yousei.index += pd.to_timedelta("1 days")

df_yousei.rename(
    columns={
        "入院中（合計）": "入院中",
        "入院中（中等症以下）": "軽症・中等症",
        "入院中（重症）": "重症",
        "陽性者数": "陽性患者数",
        "その他医療機関\n福祉施設等": "その他医療機関福祉施設等",
    },
    inplace=True,
)

df_yousei.drop("発表時間", axis=1, inplace=True)

In [134]:
df_yousei.to_csv("yousei.tsv", sep="\t")

In [135]:
d = df_yousei.iloc[-1].to_dict()

In [136]:
d

{'うち入院調整': 1642.0,
 'その他医療機関福祉施設等': 281.0,
 '入院・宿泊療養調整等': 1866.0,
 '入院中': 741.0,
 '宿泊療養': 502.0,
 '検査実施人数': 381462.0,
 '死亡': 677.0,
 '自宅療養': 1404.0,
 '軽症・中等症': 647.0,
 '退院': 26033.0,
 '重症': 94.0,
 '陽性患者数': 31504.0}

In [137]:
main_summary = {
    "attr": "検査実施人数",
    "value": int(d["検査実施人数"]),
    "children": [
        {
            "attr": "陽性患者数",
            "value": int(d["陽性患者数"]),
            "children": [
                {
                    "attr": "入院中",
                    "value": int(d["入院中"]),
                    "children": [
                        {"attr": "軽症・中等症", "value": int(d["軽症・中等症"])},
                        {"attr": "重症", "value": int(d["重症"])},
                    ],
                },
                {"attr": "宿泊療養", "value": int(d["宿泊療養"])},
                {
                    "attr": "入院・宿泊療養調整等",
                    "value": int(d["入院・宿泊療養調整等"]),
                    "children": [
                        {
                            "attr": "入院調整",
                            "value": int(d["うち入院調整"]),
                        }
                    ],
                },
                {"attr": "自宅療養", "value": int(d["自宅療養"])},
                {"attr": "その他医療機関福祉施設等", "value": int(d["その他医療機関福祉施設等"])},
                {"attr": "死亡", "value": int(d["死亡"])},
                {"attr": "退院", "value": int(d["退院"])},
            ],
        }
    ],
    "last_update": last_update.isoformat(),
}


In [138]:
dumps_json("main_summary.json", main_summary)

In [139]:
jsonschema.validate(main_summary, MAIN_SUMMARY_SCHEMA)

dumps_json("main_summary.json", main_summary)

### current_patients

In [140]:
df_yousei["治療中"] = df_yousei["陽性患者数"] - df_yousei["死亡"] - df_yousei["退院"]

In [141]:
df_test = pd.concat([df_pcr["陽性確認"].cumsum(), df_yousei["陽性患者数"].reindex(index=df_pcr.index, fill_value=0)], axis=1)

In [142]:
df_test[df_test["陽性確認"] != df_test["陽性患者数"]]

Unnamed: 0_level_0,陽性確認,陽性患者数
年月日,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-01,1,0
2020-03-02,1,0
2020-03-03,3,0
2020-03-04,3,0
2020-03-05,4,0
2020-03-06,8,0
2020-03-07,10,0
2020-03-08,12,0
2020-03-09,16,0
2020-05-04,667,668


In [143]:
ser_cur = df_yousei["治療中"].reindex(df_pcr.index)

In [144]:
df_current = pd.DataFrame({"小計": ser_cur.combine_first(df_pcr["陽性確認"].cumsum())}).diff().fillna(0).astype(int)

In [145]:
df_current["日付"] = df_current.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [146]:
df_cur_pts = df_current.loc[:, ["日付", "小計"]].copy()

In [147]:
current_patients = {
    "data": df_cur_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [148]:
jsonschema.validate(current_patients, PATIENTS_SUMMARY_SCHEMA)

dumps_json("current_patients.json", current_patients)

# チェック

In [149]:
!pip install dictdiffer



In [150]:
from dictdiffer import diff, patch, swap, revert

In [151]:
import pprint

In [152]:
def json_check(fn, d):

    r = requests.get(
        "https://raw.githubusercontent.com/stop-covid19-hyogo/covid19-scraping/gh-pages/"
        + fn
    )

    result = d == r.json()

    if not result:

        print(fn)
        pprint.pprint(list(diff(d, r.json())))

In [153]:
json_check("inspections.json", inspections)
json_check("inspections_summary.json", inspections_summary)

inspections.json
[('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-02T00:00:00+09:00'))]
inspections_summary.json
[('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-02T00:00:00+09:00'))]


In [154]:
json_check("age.json", age)
json_check("age_summary.json", age_summary)

age.json
[('change', 'data.10歳未満', (1069, 1072)),
 ('change', 'data.10代', (2642, 2646)),
 ('change', 'data.20代', (5543, 5549)),
 ('change', 'data.30代', (3760, 3769)),
 ('change', 'data.40代', (4525, 4535)),
 ('change', 'data.50代', (4697, 4702)),
 ('change', 'data.60代', (3178, 3186)),
 ('change', 'data.70代', (3152, 3164)),
 ('change', 'data.80代', (2392, 2406)),
 ('change', 'data.90歳以上', (1044, 1047)),
 ('change', 'data.非公表', (40, 41)),
 ('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-02T00:00:00+09:00'))]
age_summary.json
[('change', ['data', '10歳未満', 365], (1, 2)),
 ('change', ['data', '10歳未満', 366], (2, 4)),
 ('change', ['data', '10代', 365], (1, 2)),
 ('change', ['data', '10代', 366], (1, 2)),
 ('change', ['data', '10代', 367], (4, 6)),
 ('change', ['data', '20代', 365], (1, 2)),
 ('change', ['data', '20代', 366], (5, 10)),
 ('change', ['data', '30代', 365], (2, 4)),
 ('change', ['data', '30代', 366], (5, 10)),
 ('change', ['data', '30代', 367], (8, 10)),
 ('change', ['da

In [155]:
json_check("patients_summary.json", patients_summary)

patients_summary.json
[('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-02T00:00:00+09:00'))]


In [156]:
json_check("patients.json", patients)

[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
 ('change', ['data', 31207, '居住地'], ('調査中', '加東健康福祉事務所管内')),
 ('change', ['data', 31207, '年代'], ('40代', '60代')),
 ('change', ['data', 31207, '性別'], ('女性', '男性')),
 ('change', ['data', 31207, '備考'], ('', '陽性患者の濃厚接触者')),
 ('change', ['data', 31207, 'date'], ('2021-04-30', '2021-04-29')),
 ('change',
  ['data', 31207, 'リリース日'],
  ('2021-04-30T00:00:00+09:00', '2021-04-29T00:00:00+09:00')),
 ('change', ['data', 31207, '曜日'], ('金', '木')),
 ('change', ['data', 31208, 'No'], (31336, 31261)),
 ('change', ['data', 31208, '居住地'], ('調査中', '加東健康福祉事務所管内')),
 ('change', ['data', 31208, '年代'], ('10代', '30代')),
 ('change', ['data', 31208, '備考'], ('', '陽性患者の濃厚接触者')),
 ('change', ['data', 31208, 'date'], ('2021-04-30', '2021-04-29')),
 ('change',
  ['data', 31208, 'リリース日'],
  ('2021-04-30T00:00:00+09:00', '2021-04-29T00:00:00+09:00')),
 ('change', ['data', 31208, '曜日'], ('金', '木')),
 ('change', ['data', 31209, 'No'], (31337, 31262)),
 ('change', ['data', 312

In [157]:
json_check("current_patients.json", current_patients)

current_patients.json
[('remove', 'data', [(456, {'小計': 27248, '日付': '2021-05-01T00:00:00+09:00'})]),
 ('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-02T00:00:00+09:00'))]


In [158]:
json_check("main_summary.json", main_summary)

main_summary.json
[('change',
  'last_update',
  ('2021-05-03T00:00:00+09:00', '2021-05-01T00:00:00+09:00'))]
