<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/hyougo/hyougo_json_isoformat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [111]:
!pip install jsonschema



In [112]:
!pip install pycurl
!pip install retry



In [113]:
PCR_XLSX = "https://web.pref.hyogo.lg.jp/kk03/documents/pcr.xlsx"
YOUSEI_XLSX = "https://web.pref.hyogo.lg.jp/kk03/documents/yousei.xlsx"
KANJA_HTML = "https://web.pref.hyogo.lg.jp/kk03/corona_kanjyajyokyo.html"

DOWNLOAD_DIR = "download"
DATA_DIR = "data"

In [114]:
from retry import retry
import pathlib

# ダウンロード

In [115]:
@retry(tries=5, delay=5, backoff=3)
def get_file(url, dir="."):

    r = requests.get(url)
    r.raise_for_status()

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="wb") as fw:
        fw.write(r.content)

    return p

# SCHEMA

In [116]:
AGE_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "default": 0,
                "type": "integer"
            }
        },
        "last_update": {
            "format": "date-time",
            "type": "string"
        }
    },
    "required": [
        "data",
        "last_update"
    ],
}

In [117]:
AGE_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"pattern": "^[0-9]{1,2}/[0-9]{1,2}$", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [118]:
CLUSTERS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "oneOf": [
                    {
                        "properties": {"日付": {"type": "string", "format": "date-time"}},
                    },
                    {
                        "additionalProperties": {"type": "integer"},
                    }
                ],
            },
        },
        "last_update": {"type": "string", "format": "date-time"},
    },
    "required": ["data", "last_update"],
}

In [119]:
CLUSTERS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {"default": 0, "type": "integer"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "last_update"],
}

In [120]:
INSPECTIONS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "判明日": {"type": "string", "format": "date"},
                    "地方衛生研究所等": {"type": "integer"},
                    "民間検査機関等": {
                        "type": "object",
                        "additionalProperties": {"type": "integer"},
                    },
                    "陽性確認": {"type": "integer"},
                },
                "required": ["判明日", "地方衛生研究所等", "民間検査機関等", "陽性確認"],
            },
        },
        "last_update": {"type": "string", "format": "date-time"},
    },
    "required": ["data", "last_update"],
}

In [121]:
INSPECTIONS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "object",
            "additionalProperties": {
                "type": "array",
                "items": {"default": 0, "type": "integer"},
            },
        },
        "labels": {
            "type": "array",
            "items": {"pattern": r"^[0-9]{1,2}/[0-9]{1,2}$", "type": "string"},
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "labels", "last_update"],
}

In [122]:
MAIN_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-06/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Inspections"},
                },
                "last_update": {
                    "format": "date-time",
                    "type": "string",
                },
            },
            "required": ["attr", "children", "last_update", "value"],
            "title": "Main",
        },
        "Inspections": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Patients"},
                },
            },
            "required": ["attr", "children", "value"],
            "title": "Inspections",
        },
        "Patients": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
                "children": {
                    "type": "array",
                    "items": {"$ref": "#/definitions/Symptoms"},
                },
            },
            "required": ["attr", "value"],
            "title": "Patients",
        },
        "Symptoms": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "attr": {"type": "string"},
                "value": {"type": "integer", "default": 0},
            },
            "required": ["attr", "value"],
            "title": "Symptoms",
        },
    },
}

In [123]:
PATIENTS_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$ref": "#/definitions/Main",
    "definitions": {
        "Main": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "data": {"type": "array", "items": {"$ref": "#/definitions/Datum"}},
                "last_update": {"type": "string"},
            },
            "required": ["data", "last_update"],
            "title": "Main",
        },
        "Datum": {
            "type": "object",
            "additionalProperties": False,
            "properties": {
                "No": {"type": "integer"},
                "居住地": {"type": "string", "pattern": "(都|道|府|県|市|区|町|村|市内|市外|県外|事務所管内|調査中)$",},
                "年代": {"$ref": "#/definitions/Age"},
                "性別": {"$ref": "#/definitions/Sex"},
                "備考": {"type": "string"},
                "退院": {"type": "null"},
                "date": {"type": "string", "format": "date"},
                "リリース日": {"type": "string", "format": "date"},
                "曜日": {"$ref": "#/definitions/Week"},
            },
            "required": ["date", "リリース日", "備考", "居住地", "年代", "性別", "曜日", "No", "退院"],
            "title": "Datum",
        },
        "Age": {
            "type": "string",
            "enum": [
                "10歳未満",
                "10代",
                "20代",
                "30代",
                "40代",
                "50代",
                "60代",
                "70代",
                "80代",
                "90歳以上",
                "非公表",
            ],
            "title": "Age",
        },
        "Sex": {"type": "string", "enum": ["男性", "女性", "非公表"], "title": "Sex"},
        "Week": {
            "type": "string",
            "enum": ["月", "火", "水", "木", "金", "土", "日"],
            "title": "Week",
        },
    },
}

In [124]:
PATIENTS_SUMMARY_SCHEMA = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "data": {
            "type": "array",
            "items": {
                "type": "object",
                "additionalProperties": False,
                "properties": {
                    "日付": {"type": "string", "format": "date"},
                    "小計": {"default": 0, "type": "integer"},
                },
                "required": ["小計", "日付"],
            },
        },
        "last_update": {
            "format": "date-time",
            "type": "string",
        },
    },
    "required": ["data", "last_update"],
}

# データラングリング

In [125]:
import datetime
import json

import jsonschema
import pandas as pd
import requests

In [126]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [127]:
def dumps_json(file_name, json_data, dir=DATA_DIR):

    p = pathlib.Path(dir, file_name)

    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="w") as fw:
        json.dump(json_data, fw, ensure_ascii=False, indent=4)

In [128]:
# 最終更新日
JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_now = datetime.datetime.now(JST)

In [129]:
last_update = dt_now.replace(hour=0, minute=0, second=0, microsecond=0)
# last_update -= datetime.timedelta(days=1)

## pcr.xlsx

In [130]:
pcr_path = get_file(PCR_XLSX, DOWNLOAD_DIR)

In [131]:
df_pcr = pd.read_excel(pcr_path, index_col="年月日").fillna(0).astype(int)

In [132]:
df_pcr.rename(
    columns={
        "検査件数（合計）": "合計",
        "うち地方衛生研究所等によるPCR検査件数": "地方衛生研究所等",
        "うち民間検査機関等によるPCR検査件数": "民間検査機関等_PCR検査",
        "うち民間検査機関等による抗原検査件数": "民間検査機関等_抗原検査",
        "陽性件数": "陽性確認",
    },
    inplace=True,
)

In [133]:
df_pcr["民間検査機関等"] = df_pcr["民間検査機関等_PCR検査"] + df_pcr["民間検査機関等_抗原検査"] 

In [134]:
df_pcr["日付"] = df_pcr.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [135]:
df_pcr.to_csv("pcr.tsv", sep="\t")

In [136]:
# inspections_summary

df_insp_sum = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等"]].copy()

labels = df_insp_sum.index.map(lambda x: f"{x.month}/{x.day}")

In [137]:
inspections_summary = {
    "data": df_insp_sum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [138]:
jsonschema.validate(inspections_summary, INSPECTIONS_SUMMARY_SCHEMA)

dumps_json("inspections_summary.json", inspections_summary)

In [139]:
# inspections

df_insp = df_pcr.loc[:, ["地方衛生研究所等", "民間検査機関等_PCR検査", "民間検査機関等_抗原検査", "陽性確認"]].copy()
df_insp["判明日"] = df_insp.index.strftime("%Y-%m-%d")

In [140]:
df_insp.sort_index(inplace=True)

In [141]:
insp_dict = [
    {
        "判明日": row["判明日"],
        "地方衛生研究所等": row["地方衛生研究所等"],
        "民間検査機関等": {"PCR検査": row["民間検査機関等_PCR検査"], "抗原検査": row["民間検査機関等_抗原検査"]},
        "陽性確認": row["陽性確認"],
    }
    for _, row in df_insp.iterrows()
]

In [142]:
inspections = {
    "data": insp_dict,
    "last_update": last_update.isoformat(),
}

In [143]:
# jsonschema.validate(inspections, INSPECTIONS_SCHEMA)
dumps_json("inspections.json", inspections)

In [144]:
# parent_summary

df_pts = df_pcr.loc[:, ["日付", "陽性確認"]].copy()

df_pts.rename(columns={"陽性確認": "小計"}, inplace=True)

In [145]:
patients_summary = {
    "data": df_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [146]:
jsonschema.validate(patients_summary, PATIENTS_SUMMARY_SCHEMA)

dumps_json("patients_summary.json", patients_summary)

## kanjya.xlsx

In [147]:
p = get_file(KANJA_HTML, DOWNLOAD_DIR)

soup = BeautifulSoup(p.open(encoding='utf-8'), "html.parser")

In [148]:
import re

In [149]:
# tag = soup.find("a", class_="icon_excel")

tag = soup.find("a", href=re.compile(".xls[mx]?$"))

link = urljoin(KANJA_HTML, tag.get("href"))

In [150]:
kanja_path = get_file(link, DOWNLOAD_DIR)

In [151]:
df_head = pd.read_excel(kanja_path, header=None, skiprows=3).dropna(how="all", axis=1)

df_head.columns = ["".join(i).strip() for i in df_head.head(2).fillna("").T.values]
df_tmp = df_head.iloc[2:, :].copy().reset_index(drop=True)

df_kanja = df_tmp[df_tmp["番号"].notnull()].copy()

df_kanja.dropna(how="all", axis=1, inplace=True)

df_kanja.columns = df_kanja.columns.map(lambda s: s.replace("\n", ""))

df_kanja["番号"] = df_kanja["番号"].astype(int)
df_kanja["年代"] = df_kanja["年代"].astype(str)
df_kanja["年代"] = df_kanja["年代"].replace({"10?[歳代]未満": "10歳未満", "90(歳以上)?": "90歳以上", "([1-8]0$)": r"\1代"}, regex=True)

flg_is_serial = df_kanja["発表日"].astype("str").str.isdigit()

fromSerial = pd.to_datetime(df_kanja.loc[flg_is_serial, "発表日"].astype(float), unit="D", origin=pd.Timestamp("1899/12/30"))
fromString = pd.to_datetime(df_kanja.loc[~flg_is_serial, "発表日"])

df_kanja["発表日"] = pd.concat([fromString, fromSerial])

df_kanja["備考欄"] = df_kanja["備考欄"].str.replace("\n", "")

df_kanja.set_index("番号", inplace=True)

In [152]:
# 欠番
df_kanja = df_kanja.drop(738)

In [153]:
df_kanja.to_csv("kanja.tsv", sep="\t")

In [154]:
# 陽性患者数（日別）
"""
df_pts = (
    df_kanja["発表日"]
    .value_counts()
    .sort_index()
    .asfreq("D", fill_value=0)
    .reset_index()
)

df_pts["日付"] = df_pts["index"].dt.strftime("%Y-%m-%d")

df_pts.rename(columns={"発表日": "小計"}, inplace=True)

df_pts.drop("index", axis=1, inplace=True)

df_pts
"""

'\ndf_pts = (\n    df_kanja["発表日"]\n    .value_counts()\n    .sort_index()\n    .asfreq("D", fill_value=0)\n    .reset_index()\n)\n\ndf_pts["日付"] = df_pts["index"].dt.strftime("%Y-%m-%d")\n\ndf_pts.rename(columns={"発表日": "小計"}, inplace=True)\n\ndf_pts.drop("index", axis=1, inplace=True)\n\ndf_pts\n'

In [155]:
"""
patients_summary = {
    "data": df_pts.to_dict(orient="records"),
    "last_update": last_update.strftime("%Y-%m-%d %H:%M"),
}
"""

'\npatients_summary = {\n    "data": df_pts.to_dict(orient="records"),\n    "last_update": last_update.strftime("%Y-%m-%d %H:%M"),\n}\n'

In [156]:
jsonschema.validate(patients_summary, PATIENTS_SUMMARY_SCHEMA)

dumps_json("patients_summary.json", patients_summary)

In [157]:
# 陽性患者情報

df_pt = df_kanja.loc[:, ["発表日", "居住地", "年代", "性別", "備考欄"]].sort_index().reset_index()
df_pt.head(10)

df_pt["退院"] = None

df_pt["date"] = df_pt["発表日"].dt.strftime("%Y-%m-%d")
df_pt["リリース日"] = df_pt["発表日"].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

week = ["月", "火", "水", "木", "金", "土", "日"]

df_pt["曜日"] = df_pt["発表日"].dt.dayofweek.apply(lambda x: week[x])

df_pt["備考欄"] = df_pt["備考欄"].str.replace("NO.|N0.|NO,|N0,|No,", "No.")
df_pt["備考欄"] = df_pt["備考欄"].str.replace("・", "、")
df_pt["備考欄"] = df_pt["備考欄"].fillna("")
df_pt.rename(columns={"番号": "No", "備考欄": "備考"}, inplace=True)

df_pt.drop("発表日", axis=1, inplace=True)

df_pt

Unnamed: 0,No,居住地,年代,性別,備考,退院,date,リリース日,曜日
0,1,西宮市,40代,男性,特定できず,,2020-03-01,2020-03-01T00:00:00+09:00,日
1,2,神戸市,40代,男性,感染経路確認済,,2020-03-03,2020-03-03T00:00:00+09:00,火
2,3,神戸市,40代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-03,2020-03-03T00:00:00+09:00,火
3,4,福崎町,50代,女性,2/15、16に大阪のライブハウスArcに参加,,2020-03-05,2020-03-05T00:00:00+09:00,木
4,5,姫路市,40代,男性,2/19に大阪のSoap operaライブに参加,,2020-03-06,2020-03-06T00:00:00+09:00,金
...,...,...,...,...,...,...,...,...,...
1858,1860,川西市,60代,男性,No1728の濃厚接触者,,2020-08-15,2020-08-15T00:00:00+09:00,土
1859,1861,伊丹健康福祉事務所管内,40代,男性,No1728の濃厚接触者,,2020-08-15,2020-08-15T00:00:00+09:00,土
1860,1862,加古川市,40代,男性,行動歴調査中,,2020-08-15,2020-08-15T00:00:00+09:00,土
1861,1863,加東健康福祉事務所管内,70代,女性,行動歴調査中,,2020-08-15,2020-08-15T00:00:00+09:00,土


In [158]:
patients = {
    "data": df_pt.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [159]:
jsonschema.validate(patients, PATIENTS_SCHEMA)

dumps_json("patients.json", patients)

In [160]:
# 年代集計

age_list = ["10歳未満","10代", "20代", "30代", "40代", "50代", "60代", "70代", "80代", "90歳以上", "非公表"]

df_age = df_kanja["年代"].value_counts().sort_index().reindex(age_list, fill_value=0)

df_age = df_age.astype(int)

In [161]:
age = {
    "data": df_age.to_dict(),
    "last_update": last_update.isoformat(),
}

In [162]:
jsonschema.validate(age, AGE_SCHEMA)

dumps_json("age.json", age)

In [163]:
df_ages = pd.crosstab(df_kanja["発表日"], df_kanja["年代"]).reindex(
    age_list, axis=1, fill_value=0
)

if df_pcr.index[-1] not in df_ages.index:
    df_ages.loc[df_pcr.index[-1]] = 0

df_ages = df_ages.astype(int)
df_ages.sort_index(inplace=True)

df_agesum = df_ages.asfreq("D", fill_value=0)

df_agesum

年代,10歳未満,10代,20代,30代,40代,50代,60代,70代,80代,90歳以上,非公表
発表日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-03-01,0,0,0,0,1,0,0,0,0,0,0
2020-03-02,0,0,0,0,0,0,0,0,0,0,0
2020-03-03,0,0,0,0,2,0,0,0,0,0,0
2020-03-04,0,0,0,0,0,0,0,0,0,0,0
2020-03-05,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2020-08-11,5,2,7,5,5,3,3,0,0,0,0
2020-08-12,0,2,2,6,6,5,3,4,2,6,0
2020-08-13,2,4,13,4,7,7,6,2,1,2,0
2020-08-14,0,1,7,2,5,8,5,2,2,2,0


In [164]:
labels = df_agesum.index.map(lambda d: f"{d.month}/{d.day}")

In [165]:
age_summary = {
    "data": df_agesum.to_dict(orient="list"),
    "labels": labels.tolist(),
    "last_update": last_update.isoformat(),
}

In [166]:
jsonschema.validate(age_summary, AGE_SUMMARY_SCHEMA)

dumps_json("age_summary.json", age_summary)

In [167]:
# クラスタ概要

df_cluster_sum = df_kanja.loc[:, "認定こども園":"特定できず"].copy().notnull().sum()

In [168]:
clusters_summary = {
    "data": df_cluster_sum.to_dict(),
    "last_update": last_update.isoformat(),
}

In [169]:
jsonschema.validate(clusters_summary, CLUSTERS_SUMMARY_SCHEMA)

dumps_json("clusters_summary.json", clusters_summary)

In [170]:
# クラスタ

df_clusters = df_kanja.loc[:, "認定こども園":"特定できず"].copy().fillna(0)

In [171]:
df_clusters[df_clusters != 0] = 1

In [172]:
df_clusters["発表日"] = df_kanja["発表日"]

In [173]:
pv_clusters = df_clusters.pivot_table(index="発表日", aggfunc="sum")

In [174]:
if df_pcr.index[-1] not in pv_clusters.index:
    pv_clusters.loc[df_pcr.index[-1]] = 0

pv_clusters.sort_index(inplace=True)

In [175]:
pv_clusters = pv_clusters.asfreq("D", fill_value=0)

pv_clusters["日付"] = pv_clusters.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [176]:
clusters = {
    "data": pv_clusters.to_dict(orient="recodes"),
    "last_update": last_update.isoformat(),
}

In [177]:
jsonschema.validate(clusters, CLUSTERS_SCHEMA)
dumps_json("clusters.json", clusters)

In [178]:
# 重複者

(df_kanja.loc[:, "認定こども園":].copy().notnull().sum(axis=1) > 1).sum()

4

## yousei.xlsx

In [179]:
yousei_path = get_file(YOUSEI_XLSX, DOWNLOAD_DIR)

In [180]:
df_yousei = pd.read_excel(yousei_path, index_col="発表年月日")

df_yousei.columns = df_yousei.columns.map(lambda s: s.replace("（累計）", "").strip())

# df_yousei.index += pd.to_timedelta("1 days")

df_yousei.rename(columns={"入院中（合計）": "入院中", "入院中（中等症以下）": "軽症・中等症", "入院中（重症）": "重症", "陽性者数": "陽性患者数"}, inplace=True)

df_yousei.drop("発表時間", axis=1, inplace=True)

In [181]:
df_yousei.to_csv("yousei.tsv", sep="\t")

In [182]:
d = df_yousei.iloc[-1].to_dict()

In [183]:
d

{'入院中': 219,
 '宿泊療養': 60,
 '検査実施人数': 35836,
 '死亡': 48,
 '軽症・中等症': 205,
 '退院': 1536,
 '重症': 14,
 '陽性患者数': 1863}

In [184]:
main_summary = {
    "attr": "検査実施人数",
    "value": d["検査実施人数"],
    "children": [
        {
            "attr": "陽性患者数",
            "value": d["陽性患者数"],
            "children": [
                {
                    "attr": "入院中",
                    "value": d["入院中"],
                    "children": [
                        {"attr": "軽症・中等症", "value": d["軽症・中等症"]},
                        {"attr": "重症", "value": d["重症"]},
                    ],
                },
                {"attr": "宿泊療養", "value": d["宿泊療養"]},
                {"attr": "死亡", "value": d["死亡"]},
                {"attr": "退院", "value": d["退院"]},
            ],
        }
    ],
    "last_update": last_update.isoformat(),
}

In [185]:
jsonschema.validate(main_summary, MAIN_SUMMARY_SCHEMA)

dumps_json("main_summary.json", main_summary)

In [186]:
df_yousei["治療中"] = df_yousei["入院中"] + df_yousei["宿泊療養"]

In [187]:
ser_cur = df_yousei["治療中"].reindex(df_pcr.index)

In [188]:
df_current = pd.DataFrame({"小計": ser_cur.combine_first(df_pcr["陽性確認"].cumsum())}).diff().fillna(0).astype(int)

In [189]:
df_current

Unnamed: 0_level_0,小計
年月日,Unnamed: 1_level_1
2020-01-31,0
2020-02-01,0
2020-02-02,0
2020-02-03,0
2020-02-04,0
...,...
2020-08-11,-12
2020-08-12,-1
2020-08-13,-10
2020-08-14,7


In [190]:
df_current["日付"] = df_current.index.map(lambda d: pd.Timestamp(d, tz='Asia/Tokyo').isoformat())

In [191]:
df_cur_pts = df_current.loc[:, ["日付", "小計"]].copy()

In [192]:
current_patients = {
    "data": df_cur_pts.to_dict(orient="records"),
    "last_update": last_update.isoformat(),
}

In [193]:
jsonschema.validate(current_patients, PATIENTS_SUMMARY_SCHEMA)

dumps_json("current_patients.json", current_patients)

# チェック

In [194]:
!pip install dictdiffer



In [195]:
from dictdiffer import diff, patch, swap, revert

In [196]:
import pprint

In [197]:
def json_check(fn, d):

    r = requests.get(
        "https://raw.githubusercontent.com/stop-covid19-hyogo/covid19-scraping/gh-pages/"
        + fn
    )

    result = d == r.json()

    if not result:

        print(fn)
        pprint.pprint(list(diff(d, r.json())))

In [198]:
json_check("inspections.json", inspections)
json_check("inspections_summary.json", inspections_summary)

In [199]:
json_check("age.json", age)
json_check("age_summary.json", age_summary)

In [200]:
json_check("patients_summary.json", patients_summary)
json_check("patients.json", patients)
json_check("current_patients.json", current_patients)

patients.json
[('change', ['data', 22, '備考'], ('', 'None')),
 ('change',
  ['data', 26, '備考'],
  ('介護老人保健施設グリーンアルス伊丹デイケアを利用宝塚第一病院の入院者',
   '介護老人保健施設グリーンアルス伊丹デイケアを利用\n宝塚第一病院の入院者')),
 ('change',
  ['data', 47, '備考'],
  ('2/19に大阪のSoap operaライブに参加\u3000No.49、69と同居',
   '2/19に大阪のSoap operaライブに参加\u3000\nNo.49、69と同居')),
 ('change',
  ['data', 49, '備考'],
  ('神戸市内の介護保険通所事業所の利用者（5/20再発、神戸No.284)',
   '神戸市内の介護保険通所事業所の利用者\n（5/20再発、神戸No.284)')),
 ('change',
  ['data', 68, '備考'],
  ('神戸市内の介護保険通所事業所の利用者No.48、49と同居', '神戸市内の介護保険通所事業所の利用者\nNo.48、49と同居')),
 ('change',
  ['data', 266, '備考'],
  ('No.268の同居人大阪府発表369例目患者と接触歴あり', 'No.268の同居人\n大阪府発表369例目患者と接触歴あり')),
 ('change',
  ['data', 288, '備考'],
  ('No.213の濃厚接触者（神戸西署）（5/16再発、神戸No.283)',
   'No.213の濃厚接触者（神戸西署）\n（5/16再発、神戸No.283)')),
 ('change',
  ['data', 595, '備考'],
  ('市内の介護施設に勤務する職員No.567の同居者', '市内の介護施設に勤務する職員\nNo.567の同居者')),
 ('change',
  ['data', 620, '備考'],
  ('市立医療センター中央市民病院に勤務する医師（5/20再発、神戸No.285)',
   '市立医療センター中央市民病院に勤務する医師\n（5/20再発、神戸No.285)')),


In [201]:
json_check("clusters.json", clusters)
json_check("clusters_summary.json", clusters_summary)

In [202]:
json_check("main_summary.json", main_summary)