<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/ehime/matsuyama_covid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install japanize-matplotlib

In [None]:
import datetime
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
import japanize_matplotlib

In [None]:
import matplotlib as mpl
mpl.rcParams["figure.dpi"] = 200

In [None]:
def str2date(ser):
    df_date = (
        ser.str.extract("令和(\d{1,2})年度?(\d{1,2})月(\d{1,2})日")
        .rename(columns={0: "year", 1: "month", 2: "day"})
        .astype(int)
    )
    df_date["year"] = df_date["year"].replace({2: 2020, 3: 2021})

    return pd.to_datetime(df_date, errors="coerce")

In [None]:
def str2normalize(df):
    for col in df.select_dtypes(include=object).columns:
        df[col] = df[col].str.strip().str.normalize("NFKC").str.replace(" ", "")
    
    return df

In [None]:
def str2data(ser):
    df = (
        ser.str.extractall(
            "(.+?):(\d{1,3})人",
        )
        .droplevel(1)
        .rename(columns={0: ser.name, 1: "人数"})
        .pivot(columns=ser.name, values="人数")
        .fillna(0)
        .astype(int)
    )
    return df

# 2020/03/～2021/01/04

In [None]:
urls = [
    "https://www.city.matsuyama.ehime.jp/kurashi/iryo/hokenyobo/kansensho/tyuui/mcovid_R0210made.html",
    "https://www.city.matsuyama.ehime.jp/kurashi/iryo/hokenyobo/kansensho/tyuui/mcovid_R0211.html",
    "https://www.city.matsuyama.ehime.jp/kurashi/iryo/hokenyobo/kansensho/tyuui/mcovid_R0212-01.html",
]

In [None]:
dfs = [pd.read_html(url)[0].sort_index(ascending=False) for url in urls]

In [None]:
df1 = str2normalize(pd.concat(dfs).reset_index(drop=True))

In [None]:
df1["date"] = str2date(df1["発表日"])

In [None]:
df1 = df1[~df1["市内事例（県内）"].str.contains("陰性")]

In [None]:
df1

# 2021/01/05～

## 削除する日付

+ 2021/01/28　1人のため1/29とセルが結合されているため重複してしまう

In [None]:
del_date = [datetime.datetime(2021, 1, 28)]

In [None]:
df2 = str2normalize(
    pd.read_html(
        "https://www.city.matsuyama.ehime.jp/kurashi/iryo/hokenyobo/kansensho/tyuui/sinngatakorona.html"
    )[2]
    .sort_index(ascending=False)
    .dropna(thresh=3)
    .drop_duplicates()
    .reset_index(drop=True)
)

In [None]:
df2["date"] = str2date(df2["発表日"])

In [None]:
df2 = df2[~df2["date"].isin(del_date)]

In [None]:
df2

# 日付

In [None]:
df_case = str2data(df2["事例"]).join(df2["date"]).set_index("date")
df_case

In [None]:
s1_date = df1["date"].value_counts().sort_index()

In [None]:
s2_date = df_case.sum(axis=1)

In [None]:
s_date = pd.concat([s1_date, s2_date]).sort_index()

In [None]:
# s_date.index = s_date.index - datetime.timedelta(days=1)

In [None]:
s_date

In [None]:
s_date.sum()

In [None]:
s_date.resample("M").sum()

In [None]:
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
fig, ax = plt.subplots()
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)

ax.bar(s_date.index, s_date.values, width=1)

plt.savefig("01.png", dpi=200, bbox_inches="tight")
plt.show()

# 陽性者累計

In [None]:
s_date.sum()

In [None]:
s_date.asfreq("D", fill_value=0).to_csv("matsuyama.tsv", sep="\t")

# 年代

In [None]:
df1_ages = pd.crosstab(df1["date"], df1["年代"])
df2_ages = str2data(df2["年代"]).join(df2["date"]).set_index("date")

df_ages = pd.concat([df1_ages, df2_ages]).fillna(0).astype(int).sort_index()

In [None]:
df_ages

In [None]:
df_ages.sum().plot.barh()

plt.savefig("02.png", dpi=200, bbox_inches="tight")
plt.show()

# 性別

In [None]:
df1_sexs = pd.crosstab(df1["date"], df1["性別"])

df2_sexs = str2data(df2["性別"]).join(df2["date"]).set_index("date")
df2_sexs["男性"] += df2_sexs["男"]
df2_sexs["女性"] += df2_sexs["女"]
df2_sexs.drop(["男", "女"], axis=1, inplace=True)

df_sexs = pd.concat([df1_sexs, df2_sexs]).fillna(0).astype(int).sort_index()

In [None]:
df_sexs

In [None]:
df_sexs.sum().plot.barh()

plt.savefig("03.png", dpi=200, bbox_inches="tight")
plt.show()

# 職業等

In [None]:
df1["職業等"] = df1["職業等"].replace(
    {
        "入院患者等": "入院患者",
        "学校関係者※学校での活動なし": "学校関係者",
        "看護職員※居住地は松前町": "看護職員",
        "医療機関Aに勤務する職員": "医療機関職員",
        "医療機関職員※居住地は東温市": "医療機関職員",
        "医療機関Aに勤務する介護職員": "介護職員", 
    }
)

In [None]:
df1_works = pd.crosstab(df1["date"], df1["職業等"])
df2_works = str2data(df2["職業等"]).join(df2["date"]).set_index("date")

df_works = pd.concat([df1_works, df2_works]).fillna(0).astype(int).sort_index()

In [None]:
df_works.sum()

In [None]:
df_works.sum().plot.barh()

plt.savefig("04.png", dpi=200, bbox_inches="tight")
plt.show()

In [None]:
df_works