<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/toyama/toyama_positive_rate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# オープンデータ

In [2]:
COUNTS_URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vSJuQThafLPC7OPqUC9TbLV1DmSU0x2Co8VZi2Q2ZZCKLJCTayDl6IoXKyK676mzBgpkoKMgpNK1VML/pub?gid=574469870&single=true&output=csv"

In [3]:
df1 = pd.read_csv(COUNTS_URL, index_col=0, parse_dates=True, usecols=[0, 1, 2, 3, 4, 7, 8]).dropna(thresh=5).fillna(0).astype(int)

In [4]:
df1

Unnamed: 0_level_0,PCR検査数,抗原検査数,陰性人数,陽性人数,退院者数,死亡者数
年月日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-27,24,0,24,0,0,0
2020-02-28,0,0,0,0,0,0
2020-02-29,0,0,0,0,0,0
2020-03-01,1,0,1,0,0,0
2020-03-02,1,0,1,0,0,0
...,...,...,...,...,...,...
2021-01-02,37,29,31,6,2,0
2021-01-03,28,31,21,7,8,0
2021-01-04,152,41,143,9,5,0
2021-01-05,238,88,219,19,14,0


In [5]:
df1["検査数"] = df1["PCR検査数"] + df1["抗原検査数"]

In [6]:
df1["陽性者累計"] = df1["陽性人数"].cumsum()
df1["退院者累計"] = df1["退院者数"].cumsum()
df1["死亡者累計"] = df1["死亡者数"].cumsum()

In [7]:
df1["入院者数"] = df1["陽性者累計"] - df1["退院者累計"] - df1["死亡者累計"]

In [8]:
df1["入院者数_週平均"] = df1["入院者数"].rolling(window=7).mean().fillna(0)

In [9]:
df1["陽性率"] = (
    df1["陽性人数"].rolling(window=7).mean() / df1["検査数"].rolling(window=7).mean() * 100
).round(1).fillna(0)

In [10]:
df1["PCR陽性率"] = (
    df1["陽性人数"].rolling(window=7).mean() / df1["PCR検査数"].rolling(window=7).mean() * 100
).round(1).fillna(0)

In [11]:
df1.tail(10)

Unnamed: 0_level_0,PCR検査数,抗原検査数,陰性人数,陽性人数,退院者数,死亡者数,検査数,陽性者累計,退院者累計,死亡者累計,入院者数,入院者数_週平均,陽性率,PCR陽性率
年月日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-12-28,110,28,105,5,3,0,138,548,462,26,60,52.285714,2.7,3.4
2020-12-29,132,83,129,3,6,0,215,551,468,26,57,54.857143,2.5,3.3
2020-12-30,41,41,38,3,7,0,82,554,475,26,53,57.142857,2.8,3.6
2020-12-31,44,34,36,8,3,0,78,562,478,26,58,57.571429,2.1,2.9
2021-01-01,25,44,18,7,3,0,69,569,481,26,62,58.142857,3.0,4.4
2021-01-02,37,29,31,6,2,0,66,575,483,26,66,59.142857,4.7,7.5
2021-01-03,28,31,21,7,8,0,59,582,491,26,65,60.142857,5.5,9.4
2021-01-04,152,41,143,9,5,0,193,591,496,26,69,61.428571,5.6,9.4
2021-01-05,238,88,219,19,14,0,326,610,510,26,74,63.857143,6.8,10.4
2021-01-06,179,79,152,27,9,0,258,637,519,26,92,69.428571,7.9,11.8


# 富山県

In [12]:
import requests
from bs4 import BeautifulSoup

In [13]:
import datetime

In [14]:
dt_now = datetime.datetime.now()

In [15]:
url = "http://www.pref.toyama.jp/cms_sec/1205/kj00022038.html"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
}

In [16]:
r = requests.get(url, headers=headers)

In [17]:
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")

link = soup.find("div", id="file").find("a", text="強化・緩和の判断指標（直近１週間平均）の推移").get("href")

In [18]:
df2 = pd.read_excel(link, index_col=[0,1,2], skiprows=2, header=None, skipfooter=4).T.reset_index(drop=True)

In [19]:
df2.set_axis(["年月日", "入院者数", "重症病床稼働率", "新規陽性者数", "感染経路不明の患者数", "陽性率", "比較", "達成状況"], axis=1, inplace=True)

In [20]:
df_date = df2["年月日"].str.normalize("NFKC").str.extract("(\d{1,2})/(\d{1,2})").rename(columns={0: "month", 1: "day"}).fillna(0).astype(int)
df_date["year"] = dt_now.year
tmp = pd.to_datetime(df_date, errors="coerce")
df_date["year"] = df_date["year"].mask(tmp > dt_now, df_date["year"] - 1)

df2["年月日"] = pd.to_datetime(df_date, errors="coerce")

In [21]:
df2["入院者数"] = df2["入院者数"].str.normalize("NFKC").str.rstrip("人").astype(float)
df2["新規陽性者数"] = df2["新規陽性者数"].str.normalize("NFKC").str.rstrip("人").astype(float)
df2["感染経路不明の患者数"] = df2["感染経路不明の患者数"].str.normalize("NFKC").str.rstrip("人").astype(float)

In [22]:
df2["重症病床稼働率"] = (df2["重症病床稼働率"].astype(float) * 100).round(1)
df2["陽性率"] = (df2["陽性率"].astype(float) * 100).round(1)

In [23]:
df2.set_index("年月日", inplace=True)

In [24]:
df2.dtypes

入院者数          float64
重症病床稼働率       float64
新規陽性者数        float64
感染経路不明の患者数    float64
陽性率           float64
比較             object
達成状況           object
dtype: object

In [25]:
df2.tail(10)

Unnamed: 0_level_0,入院者数,重症病床稼働率,新規陽性者数,感染経路不明の患者数,陽性率,比較,達成状況
年月日,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-12-29,34.4,4.1,5.8,1.1,3.5,1.4,-
2020-12-30,35.0,4.5,5.7,1.4,4.0,1.3,-
2020-12-31,34.3,4.9,3.6,1.8,3.3,0.5,-
2021-01-01,33.4,5.3,4.0,2.3,5.5,0.6,-
2021-01-02,33.1,5.7,4.3,2.6,13.2,0.6,-
2021-01-03,33.0,5.7,4.7,3.0,17.7,0.7,-
2021-01-04,34.3,5.7,5.1,3.2,8.3,0.8,-
2021-01-05,38.0,5.7,6.6,3.6,10.4,1.1,-
2021-01-06,45.0,5.7,11.3,5.8,11.7,2.0,-
2021-01-07,51.0,5.7,13.1,5.5,11.2,3.7,-


# 結合

In [26]:
df = df1.join(df2, rsuffix="（県）")

In [27]:
dt_range = pd.date_range("2020-05-12", "2021-01-06")

In [28]:
df = df.reindex(index=dt_range)

In [29]:
df

Unnamed: 0,PCR検査数,抗原検査数,陰性人数,陽性人数,退院者数,死亡者数,検査数,陽性者累計,退院者累計,死亡者累計,入院者数,入院者数_週平均,陽性率,PCR陽性率,入院者数（県）,重症病床稼働率,新規陽性者数,感染経路不明の患者数,陽性率（県）,比較,達成状況
2020-05-12,39,0,38,1,5,0,39,222,110,16,96,114.857143,2.1,2.1,77.9,7.9,0.8,0.14,2.1,-,-
2020-05-13,25,0,25,0,12,2,25,222,122,18,82,106.000000,2.1,2.1,74.9,7.9,0.8,0.14,2.1,-,-
2020-05-14,28,0,26,2,4,0,28,224,126,18,80,99.428571,2.6,2.6,70.6,8.6,1.0,0.14,2.6,-,-
2020-05-15,21,0,21,0,11,0,21,224,137,18,69,92.285714,2.7,2.7,66.6,9.3,0.8,0.14,2.7,-,-
2020-05-16,50,0,49,1,19,1,50,225,156,19,50,83.571429,2.2,2.2,62.9,10.0,0.5,0.14,2.2,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-02,37,29,31,6,2,0,66,575,483,26,66,59.142857,4.7,7.5,33.1,5.7,4.3,2.60,13.2,0.6,-
2021-01-03,28,31,21,7,8,0,59,582,491,26,65,60.142857,5.5,9.4,33.0,5.7,4.7,3.00,17.7,0.7,-
2021-01-04,152,41,143,9,5,0,193,591,496,26,69,61.428571,5.6,9.4,34.3,5.7,5.1,3.20,8.3,0.8,-
2021-01-05,238,88,219,19,14,0,326,610,510,26,74,63.857143,6.8,10.4,38.0,5.7,6.6,3.60,10.4,1.1,-


In [30]:
df.to_csv("data.csv")

In [31]:
(df["PCR陽性率"] == df["陽性率（県）"]).sum()

88

In [32]:
(df["PCR陽性率"] != df["陽性率（県）"]).sum()

152

In [33]:
df_false = df[df["PCR陽性率"] != df["陽性率（県）"]]

In [34]:
df_false.loc[:, ["PCR陽性率", "陽性率（県）"]]

Unnamed: 0,PCR陽性率,陽性率（県）
2020-07-02,0.8,1.0
2020-07-03,0.6,1.0
2020-07-04,0.5,0.7
2020-07-05,0.5,0.7
2020-07-06,0.4,0.7
...,...,...
2021-01-01,4.4,5.5
2021-01-02,7.5,13.2
2021-01-03,9.4,17.7
2021-01-04,9.4,8.3


In [35]:
df_false.to_csv("false.csv")