<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/toyama/toyama_patient.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import requests
from bs4 import BeautifulSoup

import pandas as pd

import datetime
import json
import pathlib

In [0]:
url = "http://www.pref.toyama.jp/cms_sec/1205/kj00021798.html"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
}

In [0]:
r = requests.get(url, headers=headers)

r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")

In [0]:
file_list = soup.find("div", id="file")
link = file_list.find("a", text="富山県内における新型コロナウイルス感染症の発生状況一覧").get("href")

In [0]:
# 現在の日時
dt_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

In [0]:
df_kanja = pd.read_excel(link, skiprows=2)

In [0]:
df_kanja["検査結果判明日"] = df_kanja["検査結果判明日"].apply(
    lambda date: pd.to_datetime(date, unit="D", origin=pd.Timestamp("1899/12/30")).strftime("%Y-%m-%d")
)

In [0]:
df_kanja.rename(columns={"県番号": "No"}, inplace=True)

In [0]:
df_patients = df_kanja.loc[:, ["No", "検査結果判明日", "居住地", "年代", "性別", "職業"]]

In [0]:
data = {"patients": {"data": df_patients.to_dict(orient="records"), "date": dt_now}}

p = pathlib.Path("data", "patients.json")
p.parent.mkdir(parents=True, exist_ok=True)

with p.open(mode="w", encoding="utf-8") as fw:
    json.dump(data, fw, ensure_ascii=False, indent=4)