<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/hyougo/hyougo_json_isoformat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install jsonschema



In [2]:
!pip install pycurl
!pip install retry

Collecting pycurl
[?25l  Downloading https://files.pythonhosted.org/packages/50/1a/35b1d8b8e4e23a234f1b17a8a40299fd550940b16866c9a1f2d47a04b969/pycurl-7.43.0.6.tar.gz (222kB)
[K     |█▌                              | 10kB 15.2MB/s eta 0:00:01[K     |███                             | 20kB 3.0MB/s eta 0:00:01[K     |████▍                           | 30kB 3.7MB/s eta 0:00:01[K     |█████▉                          | 40kB 4.3MB/s eta 0:00:01[K     |███████▍                        | 51kB 3.4MB/s eta 0:00:01[K     |████████▉                       | 61kB 3.8MB/s eta 0:00:01[K     |██████████▎                     | 71kB 4.0MB/s eta 0:00:01[K     |███████████▊                    | 81kB 4.2MB/s eta 0:00:01[K     |█████████████▎                  | 92kB 4.4MB/s eta 0:00:01[K     |██████████████▊                 | 102kB 4.3MB/s eta 0:00:01[K     |████████████████▏               | 112kB 4.3MB/s eta 0:00:01[K     |█████████████████▋              | 122kB 4.3MB/s eta 0:00:01[K

In [3]:
PCR_XLSX = "https://web.pref.hyogo.lg.jp/kk03/documents/pcr.xlsx"
YOUSEI_XLSX = "https://web.pref.hyogo.lg.jp/kk03/documents/yousei.xlsx"
KANJA_HTML = "https://web.pref.hyogo.lg.jp/kk03/corona_kanjyajyokyo.html"

DOWNLOAD_DIR = "download"
DATA_DIR = "data"

In [4]:
from retry import retry
import pathlib

# ダウンロード

In [5]:
@retry(tries=5, delay=5, backoff=3)
def get_file(url, dir="."):

    r = requests.get(url)
    r.raise_for_status()

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="wb") as fw:
        fw.write(r.content)

    return p

# SCHEMA

In [6]:
AGE_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "default": 0,
                "type": "integer"
            }
        },
        "last_update": {
            "format": "date-time",
            "type": "string"
        }
    },
    "required": [
        "data",
        "last_update"
    ],
}

In [7]:
AGE_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"pattern": "^[0-9]{1,2}/[0-9]{1,2}$", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [8]:
CLUSTERS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "oneOf": [
                    {
                        "properties": {"日付": {"type": "string", "format": "date-time"}},
                    },
                    {
                        "additionalProperties": {"type": "integer"},
                    }
                ],
            },
        },
        "last_update": {"type": "string", "format": "date-time"},
    },
    "required": ["data", "last_update"],
}

In [9]:
CLUSTERS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {"default": 0, "type": "integer"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "last_update"],
}

In [10]:
INSPECTIONS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "判明日": {"type": "string", "format": "date"},
                    "地方衛生研究所等": {"type": "integer"},
                    "民間検査機関等": {
                        "type": "object",
                        "additionalProperties": {"type": "integer"},
                    },
                    "陽性確認": {"type": "integer"},
                },
                "required": ["判明日", "地方衛生研究所等", "民間検査機関等", "陽性確認"],
            },
        },
        "last_update": {"type": "string", "format": "date-time"},
    },
    "required": ["data", "last_update"],
}

In [11]:
INSPECTIONS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"pattern": r"^[0-9]{1,2}/[0-9]{1,2}$", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [12]:
MAIN_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-06/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Inspections"},
                },
                "last_update": {
                    "format": "date-time",
                    "type": "string",
                },
            },
            "required": ["attr", "children", "last_update", "value"],
            "title": "Main",
        },
        "Inspections": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Patients"},
                },
            },
            "required": ["attr", "children", "value"],
            "title": "Inspections",
        },
        "Patients": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Symptoms"},
                },
            },
            "required": ["attr", "value"],
            "title": "Patients",
        },
        "Symptoms": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
            },
            "required": ["attr", "value"],
            "title": "Symptoms",
        },
    },
}

In [13]:
PATIENTS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "data": {"type": "array", "items": {"$ref": "#/definitions/Datum"}},
                "last_update": {"type": "string"},
            },
            "required": ["data", "last_update"],
            "title": "Main",
        },
        "Datum": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "No": {"type": "integer"},
                "居住地": {"type": "string"},
                "年代": {"$ref": "#/definitions/Age"},
                "性別": {"$ref": "#/definitions/Sex"},
                "備考": {"type": "string"},
                "退院": {"type": "null"},
                "date": {"type": "string", "format": "date"},
                "リリース日": {"type": "string", "format": "date"},
                "曜日": {"$ref": "#/definitions/Week"},
            },
            "required": ["date", "リリース日", "備考", "居住地", "年代", "性別", "曜日", "No", "退院"],
            "title": "Datum",
        },
        "Age": {
            "type": "string",
            "enum": [
                "10歳未満",
                "10代",
                "20代",
                "30代",
                "40代",
                "50代",
                "60代",
                "70代",
                "80代",
                "90歳以上",
                "非公表",
            ],
            "title": "Age",
        },
        "Sex": {"type": "string", "enum": ["男性", "女性", "非公表"], "title": "Sex"},
        "Week": {
            "type": "string",
            "enum": ["月", "火", "水", "木", "金", "土", "日"],
            "title": "Week",
        },
    },
}

In [14]:
PATIENTS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "日付": {"type": "string", "format": "date"},
                    "小計": {"default": 0, "type": "integer"},
                },
                "required": ["小計", "日付"],
            },
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "last_update"],
}

# データラングリング

In [15]:
import datetime
import json

import jsonschema
import pandas as pd
import requests

In [16]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [17]:
def dumps_json(file_name, json_data, dir=DATA_DIR):

    p = pathlib.Path(dir, file_name)

    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="w") as fw:
        json.dump(json_data, fw, ensure_ascii=False, indent=4)

In [18]:
# 最終更新日
JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_now = datetime.datetime.now(JST)

In [19]:
last_update = dt_now.replace(hour=0, minute=0, second=0, microsecond=0)
# last_update -= datetime.timedelta(days=1)

## pcr.xlsx

In [20]:
pcr_path = get_file(PCR_XLSX, DOWNLOAD_DIR)

In [21]:
df_pcr = pd.read_excel(pcr_path, index_col="年月日").fillna(0).astype(int)

In [22]:
df_pcr.rename(
    columns={
        "検査件数（合計）": "合計",
        "うち地方衛生研究所等によるPCR検査件数": "地方衛生研究所等",
        "うち民間検査機関等によるPCR検査件数": "民間検査機関等_PCR検査",
        "うち民間検査機関等による抗原検査件数": "民間検査機関等_抗原検査",
        "陽性件数": "陽性確認",
    },
    inplace=True,
)

In [23]:
df_pcr["民間検査機関等"] = df_pcr["民間検査機関等_PCR検査"] + df_pcr["民間検査機関等_抗原検査"] 

In [24]:
df_pcr["日付"] = df_pcr.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [25]:
df_pcr.to_csv("pcr.tsv", sep="\t")

In [26]:
# inspections_summary

df_insp_sum = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等"]].copy()

labels = df_insp_sum.index.map(lambda x: f"{x.month}/{x.day}")

In [27]:
inspections_summary = {
    "data": df_insp_sum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [28]:
jsonschema.validate(inspections_summary, INSPECTIONS_SUMMARY_SCHEMA)

dumps_json("inspections_summary.json", inspections_summary)

In [29]:
# inspections

df_insp = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等_PCR検査", "民間検査機関等_抗原検査", "陽性確認"]].copy()
df_insp["判明日"] = df_insp.index.strftime("%Y-%m-%d")

In [30]:
df_insp.sort_index(inplace=True)

In [31]:
insp_dict = [
    {
        "判明日": row["判明日"],
        "地方衛生研究所等": row["地方衛生研究所等"],
        "民間検査機関等": {"PCR検査": row["民間検査機関等_PCR検査"], "抗原検査": row["民間検査機関等_抗原検査"]},
        "陽性確認": row["陽性確認"],
    }
    for _, row in df_insp.iterrows()
]

In [32]:
inspections = {
    "data": insp_dict,
    "last_update": last_update.isoformat(),
}

In [33]:
# jsonschema.validate(inspections, INSPECTIONS_SCHEMA)
dumps_json("inspections.json", inspections)

In [34]:
# parent_summary

df_pts = df_pcr.loc[:, ["日付", "陽性確認"]].copy()

df_pts.rename(columns={"陽性確認": "小計"}, inplace=True)

In [35]:
patients_summary = {
    "data": df_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [36]:
jsonschema.validate(patients_summary, PATIENTS_SUMMARY_SCHEMA)

dumps_json("patients_summary.json", patients_summary)

## kanjya.xlsx

In [37]:
p = get_file(KANJA_HTML, DOWNLOAD_DIR)

soup = BeautifulSoup(p.open(encoding='utf-8'), "html.parser")

In [38]:
import re

In [39]:
# tag = soup.find("a", class_="icon_excel")

tag = soup.find("a", href=re.compile(".xls[mx]?$"))

link = urljoin(KANJA_HTML, tag.get("href"))

In [40]:
kanja_path = get_file(link, DOWNLOAD_DIR)

In [41]:
df_head = pd.read_excel(kanja_path, header=None, skiprows=4).dropna(how="all", axis=1)

In [42]:
df_head.columns = ["".join(i).strip() for i in df_head.head(2).fillna("").T.values]
df_tmp = df_head.iloc[2:, :].copy().reset_index(drop=True)

df_kanja = df_tmp[df_tmp["番号"].notnull()].copy()

df_kanja.dropna(how="all", axis=1, inplace=True)

df_kanja.dropna(thresh=2, inplace=True)

df_kanja.columns = df_kanja.columns.map(lambda s: s.replace("\n", ""))

df_kanja["番号"] = df_kanja["番号"].astype(int)
df_kanja["年代"] = df_kanja["年代"].astype(str)
df_kanja["年代"] = df_kanja["年代"].replace({"10?[歳代]未満": "10歳未満", "90(歳以上)?": "90歳以上", "([1-8]0$)": r"\1代"}, regex=True)

flg_is_serial = df_kanja["発表日"].astype("str").str.isdigit()

fromSerial = pd.to_datetime(df_kanja.loc[flg_is_serial, "発表日"].astype(float), unit="D", origin=pd.Timestamp("1899/12/30"))
fromString = pd.to_datetime(df_kanja.loc[~flg_is_serial, "発表日"])

df_kanja["発表日"] = pd.concat([fromString, fromSerial])

df_kanja["居住地"] = df_kanja["居住地"].str.replace("\n", "")
df_kanja["備考欄"] = df_kanja["備考欄"].str.replace("\n", " ")

df_kanja.set_index("番号", inplace=True)

In [44]:
df_kanja.to_csv("kanja.tsv", sep="\t")

In [45]:
# 陽性患者数（日別）
"""
df_pts = (
    df_kanja["発表日"]
    .value_counts()
    .sort_index()
    .asfreq("D", fill_value=0)
    .reset_index()
)

df_pts["日付"] = df_pts["index"].dt.strftime("%Y-%m-%d")

df_pts.rename(columns={"発表日": "小計"}, inplace=True)

df_pts.drop("index", axis=1, inplace=True)

df_pts
"""

'\ndf_pts = (\n    df_kanja["発表日"]\n    .value_counts()\n    .sort_index()\n    .asfreq("D", fill_value=0)\n    .reset_index()\n)\n\ndf_pts["日付"] = df_pts["index"].dt.strftime("%Y-%m-%d")\n\ndf_pts.rename(columns={"発表日": "小計"}, inplace=True)\n\ndf_pts.drop("index", axis=1, inplace=True)\n\ndf_pts\n'

In [46]:
"""
patients_summary = {
    "data": df_pts.to_dict(orient="records"),
    "last_update": last_update.strftime("%Y-%m-%d %H:%M"),
}
"""

'\npatients_summary = {\n    "data": df_pts.to_dict(orient="records"),\n    "last_update": last_update.strftime("%Y-%m-%d %H:%M"),\n}\n'

In [47]:
jsonschema.validate(patients_summary, PATIENTS_SUMMARY_SCHEMA)

dumps_json("patients_summary.json", patients_summary)

In [48]:
# 陽性患者情報

df_pt = df_kanja.loc[:, ["発表日", "居住地", "年代", "性別", "備考欄"]].sort_index().reset_index()
df_pt.head(10)

df_pt["退院"] = None

df_pt["date"] = df_pt["発表日"].dt.strftime("%Y-%m-%d")
df_pt["リリース日"] = df_pt["発表日"].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

week = ["月", "火", "水", "木", "金", "土", "日"]

df_pt["曜日"] = df_pt["発表日"].dt.dayofweek.apply(lambda x: week[x])

df_pt["備考欄"] = df_pt["備考欄"].str.replace("NO.|N0.|NO,|N0,|No,", "No.")
df_pt["備考欄"] = df_pt["備考欄"].str.replace("・", "、")
df_pt["備考欄"] = df_pt["備考欄"].fillna("")
df_pt.rename(columns={"番号": "No", "備考欄": "備考"}, inplace=True)

df_pt.drop("発表日", axis=1, inplace=True)

df_pt

Unnamed: 0,No,居住地,年代,性別,備考,退院,date,リリース日,曜日
0,1,西宮市,40代,男性,特定できず,,2020-03-01,2020-03-01T00:00:00+09:00,日
1,2,神戸市,40代,男性,感染経路確認済,,2020-03-03,2020-03-03T00:00:00+09:00,火
2,3,神戸市,40代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-03,2020-03-03T00:00:00+09:00,火
3,4,福崎町,50代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-05,2020-03-05T00:00:00+09:00,木
4,5,姫路市,40代,男性,2/19に大阪のSoap operaライブに参加,,2020-03-06,2020-03-06T00:00:00+09:00,金
...,...,...,...,...,...,...,...,...,...
2516,2528,宝塚市,90歳以上,男性,陽性患者の濃厚接触者,,2020-09-16,2020-09-16T00:00:00+09:00,水
2517,2529,宝塚市,20代,女性,陽性患者の濃厚接触者,,2020-09-16,2020-09-16T00:00:00+09:00,水
2518,2530,宝塚市,30代,女性,No2445の濃厚接触者,,2020-09-16,2020-09-16T00:00:00+09:00,水
2519,2531,宝塚健康福祉事務所管内,20代,男性,行動歴調査中,,2020-09-16,2020-09-16T00:00:00+09:00,水


In [49]:
patients = {
    "data": df_pt.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [50]:
jsonschema.validate(patients, PATIENTS_SCHEMA)

dumps_json("patients.json", patients)

In [51]:
# 年代集計

age_list = ["10歳未満","10代", "20代", "30代", "40代", "50代", "60代", "70代", "80代", "90歳以上", "非公表"]

df_age = df_kanja["年代"].value_counts().sort_index().reindex(age_list, fill_value=0)

df_age = df_age.astype(int)

In [52]:
age = {
    "data": df_age.to_dict(),
    "last_update": last_update.isoformat(),
}

In [53]:
jsonschema.validate(age, AGE_SCHEMA)

dumps_json("age.json", age)

In [54]:
df_ages = pd.crosstab(df_kanja["発表日"], df_kanja["年代"]).reindex(
    age_list, axis=1, fill_value=0
)

if df_pcr.index[-1] not in df_ages.index:
    df_ages.loc[df_pcr.index[-1]] = 0

df_ages = df_ages.astype(int)
df_ages.sort_index(inplace=True)

df_agesum = df_ages.asfreq("D", fill_value=0)

df_agesum

年代,10歳未満,10代,20代,30代,40代,50代,60代,70代,80代,90歳以上,非公表
発表日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-03-01,0,0,0,0,1,0,0,0,0,0,0
2020-03-02,0,0,0,0,0,0,0,0,0,0,0
2020-03-03,0,0,0,0,2,0,0,0,0,0,0
2020-03-04,0,0,0,0,0,0,0,0,0,0,0
2020-03-05,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2020-09-12,2,0,4,3,3,3,2,0,3,0,0
2020-09-13,1,3,3,3,1,2,0,1,1,0,0
2020-09-14,1,0,2,1,0,1,2,1,0,0,0
2020-09-15,0,1,1,5,5,0,3,3,5,0,0


In [55]:
labels = df_agesum.index.map(lambda d: f"{d.month}/{d.day}")

In [56]:
age_summary = {
    "data": df_agesum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [57]:
jsonschema.validate(age_summary, AGE_SUMMARY_SCHEMA)

dumps_json("age_summary.json", age_summary)

In [58]:
# クラスタ概要

df_cluster_sum = df_kanja.loc[:, "認定こども園":"特定できず"].copy().notnull().sum()

In [59]:
clusters_summary = {
    "data": df_cluster_sum.to_dict(),
    "last_update": last_update.isoformat(),
}

In [60]:
jsonschema.validate(clusters_summary, CLUSTERS_SUMMARY_SCHEMA)

dumps_json("clusters_summary.json", clusters_summary)

In [61]:
# クラスタ

df_clusters = df_kanja.loc[:, "認定こども園":"特定できず"].copy().fillna(0)

In [62]:
df_clusters[df_clusters != 0] = 1

In [63]:
df_clusters["発表日"] = df_kanja["発表日"]

In [64]:
pv_clusters = df_clusters.pivot_table(index="発表日", aggfunc="sum")

In [65]:
if df_pcr.index[-1] not in pv_clusters.index:
    pv_clusters.loc[df_pcr.index[-1]] = 0

pv_clusters.sort_index(inplace=True)

In [66]:
pv_clusters = pv_clusters.asfreq("D", fill_value=0)

pv_clusters["日付"] = pv_clusters.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [67]:
clusters = {
    "data": pv_clusters.to_dict(orient="recodes"),
    "last_update": last_update.isoformat(),
}

In [68]:
jsonschema.validate(clusters, CLUSTERS_SCHEMA)
dumps_json("clusters.json", clusters)

In [69]:
# 重複者

(df_kanja.loc[:, "認定こども園":].copy().notnull().sum(axis=1) > 1).sum()

6

## yousei.xlsx

In [70]:
yousei_path = get_file(YOUSEI_XLSX, DOWNLOAD_DIR)

In [71]:
df_yousei = pd.read_excel(yousei_path, index_col="発表年月日")

df_yousei.columns = df_yousei.columns.map(lambda s: s.replace("（累計）", "").strip())

# df_yousei.index += pd.to_timedelta("1 days")

df_yousei.rename(columns={"入院中（合計）": "入院中", "入院中（中等症以下）": "軽症・中等症", "入院中（重症）": "重症", "陽性者数": "陽性患者数"}, inplace=True)

df_yousei.drop("発表時間", axis=1, inplace=True)

In [72]:
df_yousei.to_csv("yousei.tsv", sep="\t")

In [73]:
d = df_yousei.iloc[-1].to_dict()

In [74]:
d

{'入院中': 121,
 '宿泊療養': 16,
 '検査実施人数': 51144,
 '死亡': 55,
 '軽症・中等症': 110,
 '退院': 2329,
 '重症': 11,
 '陽性患者数': 2521}

In [75]:
main_summary = {
    "attr": "検査実施人数",
    "value": d["検査実施人数"],
    "children": [
        {
            "attr": "陽性患者数",
            "value": d["陽性患者数"],
            "children": [
                {
                    "attr": "入院中",
                    "value": d["入院中"],
                    "children": [
                        {"attr": "軽症・中等症", "value": d["軽症・中等症"]},
                        {"attr": "重症", "value": d["重症"]},
                    ],
                },
                {"attr": "宿泊療養", "value": d["宿泊療養"]},
                {"attr": "死亡", "value": d["死亡"]},
                {"attr": "退院", "value": d["退院"]},
            ],
        }
    ],
    "last_update": last_update.isoformat(),
}

In [76]:
jsonschema.validate(main_summary, MAIN_SUMMARY_SCHEMA)

dumps_json("main_summary.json", main_summary)

In [77]:
df_yousei["治療中"] = df_yousei["入院中"] + df_yousei["宿泊療養"]

In [78]:
ser_cur = df_yousei["治療中"].reindex(df_pcr.index)

In [79]:
df_current = pd.DataFrame({"小計": ser_cur.combine_first(df_pcr["陽性確認"].cumsum())}).diff().fillna(0).astype(int)

In [80]:
df_current["日付"] = df_current.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [81]:
df_cur_pts = df_current.loc[:, ["日付", "小計"]].copy()

In [82]:
current_patients = {
    "data": df_cur_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [83]:
jsonschema.validate(current_patients, PATIENTS_SUMMARY_SCHEMA)

dumps_json("current_patients.json", current_patients)

# チェック

In [84]:
!pip install dictdiffer

Collecting dictdiffer
  Downloading https://files.pythonhosted.org/packages/97/92/350b6b6ec39c5f87d98d04c91a50c498518716a05368e6dea88b5c69b590/dictdiffer-0.8.1-py2.py3-none-any.whl
Installing collected packages: dictdiffer
Successfully installed dictdiffer-0.8.1


In [85]:
from dictdiffer import diff, patch, swap, revert

In [86]:
import pprint

In [87]:
def json_check(fn, d):

    r = requests.get(
        "https://raw.githubusercontent.com/stop-covid19-hyogo/covid19-scraping/gh-pages/"
        + fn
    )

    result = d == r.json()

    if not result:

        print(fn)
        pprint.pprint(list(diff(d, r.json())))

In [88]:
json_check("inspections.json", inspections)
json_check("inspections_summary.json", inspections_summary)

inspections.json
[('change', ['data', 228, '陽性確認'], (23, 24)),
 ('remove',
  'data',
  [(229,
    {'判明日': '2020-09-16',
     '地方衛生研究所等': 153,
     '民間検査機関等': {'PCR検査': 174, '抗原検査': 80},
     '陽性確認': 20})]),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]
inspections_summary.json
[('remove', 'data.地方衛生研究所等', [(229, 153)]),
 ('remove', 'data.民間検査機関等', [(229, 254)]),
 ('remove', 'labels', [(229, '9/16')]),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]


In [89]:
json_check("age.json", age)
json_check("age_summary.json", age_summary)

age.json
[('change', 'data.10歳未満', (83, 84)),
 ('change', 'data.20代', (607, 603)),
 ('change', 'data.30代', (350, 348)),
 ('change', 'data.40代', (391, 390)),
 ('change', 'data.50代', (379, 375)),
 ('change', 'data.60代', (212, 210)),
 ('change', 'data.70代', (182, 178)),
 ('change', 'data.80代', (112, 110)),
 ('change', 'data.90歳以上', (43, 42)),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]
age_summary.json
[('change', ['data', '10歳未満', 198], (0, 1)),
 ('remove', 'data.10歳未満', [(199, 0)]),
 ('remove', 'data.10代', [(199, 0)]),
 ('remove', 'data.20代', [(199, 4)]),
 ('remove', 'data.30代', [(199, 2)]),
 ('remove', 'data.40代', [(199, 1)]),
 ('remove', 'data.50代', [(199, 4)]),
 ('remove', 'data.60代', [(199, 2)]),
 ('remove', 'data.70代', [(199, 4)]),
 ('remove', 'data.80代', [(199, 2)]),
 ('remove', 'data.90歳以上', [(199, 1)]),
 ('remove', 'data.非公表', [(199, 0)]),
 ('remove', 'labels', [(199, '9/16')]),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+

In [90]:
json_check("patients_summary.json", patients_summary)
json_check("patients.json", patients)
json_check("current_patients.json", current_patients)

patients_summary.json
[('change', ['data', 228, '小計'], (23, 24)),
 ('remove', 'data', [(229, {'小計': 20, '日付': '2020-09-16T00:00:00+09:00'})]),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]
patients.json
[('change', ['data', 22, '備考'], ('', None)),
 ('change', ['data', 2215, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2215, '備考'], ('非公表', '行動歴調査中')),
 ('change', ['data', 2236, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2237, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2254, '居住地'], ('神戸市外', '調査中')),
 ('change', ['data', 2258, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2258, '備考'], ('神戸市外の感染患者の濃厚接触者', '行動歴調査中')),
 ('change', ['data', 2259, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2267, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2267, '備考'], ('神戸市外の感染患者の濃厚接触者', '行動歴調査中')),
 ('change', ['data', 2269, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2270, '居住地'], ('神戸市', '調査中')),
 ('change', ['data', 2270, '備考'], ('神戸市内の感染患者の濃厚接触者

In [91]:
json_check("clusters.json", clusters)
json_check("clusters_summary.json", clusters_summary)

clusters.json
[('change', ['data', 182, 'その他'], (6, 5)),
 ('change', ['data', 182, '行動歴調査中'], (7, 8)),
 ('change', ['data', 183, 'その他'], (7, 2)),
 ('change', ['data', 183, '行動歴調査中'], (1, 6)),
 ('change', ['data', 184, 'その他'], (10, 8)),
 ('change', ['data', 184, '行動歴調査中'], (11, 13)),
 ('change', ['data', 185, 'その他'], (2, 1)),
 ('change', ['data', 185, '行動歴調査中'], (5, 6)),
 ('change', ['data', 198, '行動歴調査中'], (9, 10)),
 ('remove',
  'data',
  [(199,
    {'その他': 13,
     'グリーンアルス関係': 0,
     'ライブ関係': 0,
     '仁恵病院': 0,
     '介護保険通所事業所': 0,
     '北播磨総合医療センター': 0,
     '宝塚第一病院': 0,
     '日付': '2020-09-16T00:00:00+09:00',
     '海外渡航関係': 0,
     '特定できず': 0,
     '神戸市中央市民病院': 0,
     '神戸市環境局': 0,
     '神戸西警察署': 0,
     '神戸赤十字病院': 0,
     '行動歴調査中': 7,
     '認定こども園': 0})]),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]
clusters_summary.json
[('change', 'data.その他', (1093, 1071)),
 ('change', 'data.行動歴調査中', (1028, 1031)),
 ('change',
  'last_update',
  

In [92]:
json_check("main_summary.json", main_summary)

main_summary.json
[('change', 'value', (51144, 50737)),
 ('change', ['children', 0, 'value'], (2521, 2502)),
 ('change', ['children', 0, 'children', 0, 'value'], (121, 113)),
 ('change', ['children', 0, 'children', 0, 'children', 0, 'value'], (110, 103)),
 ('change', ['children', 0, 'children', 0, 'children', 1, 'value'], (11, 10)),
 ('change', ['children', 0, 'children', 1, 'value'], (16, 13)),
 ('change', ['children', 0, 'children', 3, 'value'], (2329, 2321)),
 ('change',
  'last_update',
  ('2020-09-17T00:00:00+09:00', '2020-09-16T00:00:00+09:00'))]
