<a href="https://colab.research.google.com/github/imabari/toyama/blob/master/toyama_covid19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ダウンロード

In [0]:
COUNTS_FILE = "toyama_counts.csv"
PATIENTS_FILE = "toyama_patients.csv"

In [0]:
!wget http://opendata.pref.toyama.jp/files/covid19/20200403/toyama_counts.csv -O $COUNTS_FILE

In [0]:
!wget http://opendata.pref.toyama.jp/files/covid19/20200403/toyama_patients.csv -O $PATIENTS_FILE

# データラングリング

In [0]:
import datetime
import pandas as pd
import json

In [0]:
def dumps_json(file_name, json_data):

    with open(file_name, "w") as fw:
        json.dump(json_data, fw, ensure_ascii=False, indent=2)

In [0]:
JST = datetime.timezone(datetime.timedelta(hours=+9), "JST")

# 現在の時刻
dt_now = datetime.datetime.now(JST).strftime("%Y/%m/%d %H:%M")

In [0]:
data = {"lastUpdate": dt_now}

# 集計結果

In [0]:
# データ読み込み
df = pd.read_csv(COUNTS_FILE, index_col= "年月日", parse_dates=True)

In [0]:
df.head(10)

In [0]:
df["日付"] = df.index.strftime("%Y-%m-%d")

In [0]:
# 検査実施人数
df_insp = df.loc[:, ("日付", "検査実施人数")].copy()
df_insp.rename(columns={"検査実施人数":"小計"}, inplace=True)

data["inspection_persons"] = {"date": dt_now, "data": df_insp.to_dict(orient="recodes")}

In [0]:
# 陽性患者数 
df_pats = df.loc[:, ("日付", "陽性人数")].copy()
df_pats.rename(columns={"陽性人数":"小計"}, inplace=True)

data["patients_summary"] = {"date": dt_now, "data": df_pats.to_dict(orient="recodes")}

In [0]:
# 一般相談件数
df_contacts = df.loc[:, ("日付", "一般相談件数")].copy()
df_contacts.rename(columns={"一般相談件数":"小計"}, inplace=True)

data["contacts"] = {"date": dt_now, "data": df_contacts.to_dict(orient="recodes")}

In [0]:
# 帰国者・接触者相談件数
df_querents = df.loc[:, ("日付", "帰国者相談件数")].copy()
df_querents.rename(columns={"帰国者相談件数":"小計"}, inplace=True)

data["querents"] = {"date": dt_now, "data": df_querents.to_dict(orient="recodes")}

# 患者情報

In [0]:
# 陽性患者の属性
df_kanja = pd.read_csv(PATIENTS_FILE, index_col="No", dtype={"年代": "object"})

In [0]:
df_kanja.rename(columns={"公表年月日": "date"}, inplace=True)

In [0]:
# 年代を数字に変換
df_kanja["age"] = df_kanja["年代"].replace({"10代未満": "0代", "90代以上": "90代"})
df_kanja["age"] = df_kanja["age"].str.rstrip("代").astype(int)

In [0]:
df_patients = df_kanja.loc[:, ("date", "居住地", "年代", "性別")].copy()

In [0]:
data["patients"] = {"date": dt_now, "data": df_patients.to_dict(orient="recodes")}

In [0]:
s_stat = df_kanja["状態"].value_counts().reindex(["入院中", "退院", "死亡"]).fillna(0).astype(int)

In [0]:
if len(df_kanja) != s_stat.sum():
    print("Warning:患者人数と状態の合計が違います")

In [0]:
# 状態が入院中以外の場合は症状を欠損値にする
df_kanja["症状"] = df_kanja["症状"].where(df_kanja["状態"] == "入院中")

In [0]:
s_symp = df_kanja["症状"].value_counts().reindex(["無症状", "軽症・中等症", "重症"]).fillna(0).astype(int)

In [0]:
if s_stat["入院中"] != s_symp.sum():
    print("Warning:入院中と症状の合計が違います")

In [0]:
main_sum = {"陽性患者数": len(df_kanja), "検査実施人数": int(df_insp["小計"].sum())}
main_sum.update(s_symp.to_dict())
main_sum.update(s_stat.to_dict())

In [0]:
data["main_summary"] = {
    "attr": "検査実施人数",
    "value": main_sum["検査実施人数"],
    "children": [
        {
            "attr": "陽性患者数",
            "value": main_sum["陽性患者数"],
            "children": [
                {
                    "attr": "入院中",
                    "value": main_sum["入院中"],
                    "children": [
                        {"attr": "無症状", "value": main_sum["無症状"]},
                        {"attr": "軽症・中等症", "value": main_sum["軽症・中等症"]},
                        {"attr": "重症", "value": main_sum["重症"]},
                    ],
                },
                {"attr": "退院", "value": main_sum["退院"]},
                {"attr": "死亡", "value": main_sum["死亡"]},
            ],
        }
    ],
}

In [0]:
print(data)

# JSON出力

In [0]:
dumps_json("data.json", data)

# ダウンロード

In [0]:
from google.colab import files

In [0]:
files.download("data.json")

# 可視化

In [0]:
!pip install japanize-matplotlib

In [0]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [0]:
import japanize_matplotlib

## 陽性者一覧

In [0]:
df_kanja

## 陽性者概要

In [0]:
s_main = pd.Series(main_sum).loc[["検査実施人数", "陽性患者数", "入院中", "無症状", "軽症・中等症", "重症", "死亡", "退院"]]
s_main

In [0]:
s_main.plot.barh()

## 陽性患者数

In [0]:
fig, ax = plt.subplots()
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

ax.bar(df.index, df["陽性人数"])

In [0]:
# 直近10件
df["陽性人数"].tail(10)

In [0]:
# 直近累計10件
df["陽性人数"].cumsum().tail(10)

## 検査実施人数

In [0]:
fig, ax = plt.subplots()
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

ax.bar(df.index, df["検査実施人数"])

In [0]:
# 直近10件
df["検査実施人数"].tail(10)

In [0]:
# 直近累計10件
df["検査実施人数"].cumsum().tail(10)

## 一般相談件数

In [0]:
fig, ax = plt.subplots()
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

ax.bar(df.index, df["一般相談件数"])

In [0]:
# 直近10件
df["一般相談件数"].tail(10)

In [0]:
# 直近累計10件
df["一般相談件数"].cumsum().tail(10)

## 帰国者・接触者相談センター

In [0]:
fig, ax = plt.subplots()
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

ax.bar(df.index, df["帰国者相談件数"])

In [0]:
# 直近10件
df["帰国者相談件数"].tail(10)

In [0]:
# 直近累計10件
df["帰国者相談件数"].cumsum().tail(10)

## 年代別

In [0]:
s_age = df_kanja["age"].value_counts().reindex([0, 10, 20, 30, 40, 50, 60, 70, 80, 90]).fillna(0).astype(int)

In [0]:
s_age.plot.barh()

## 性別

In [0]:
s_sex = df_kanja["性別"].value_counts()

In [0]:
s_sex.plot.barh()

## 年齢別性別

In [0]:
pv_age = pd.crosstab(df_kanja["age"], df_kanja["性別"]).reindex([0, 10, 20, 30, 40, 50, 60, 70, 80, 90]).fillna(0).astype(int)

In [0]:
pv_age.plot.barh(color=["#e41a1c", "#377eb8"])

## 市町村別

In [0]:
df_kanja["居住地"].value_counts().plot.barh()