In [None]:
#!/usr/bin/env python
# coding: utf-8

In [None]:
import codecs
from datetime import datetime as dt
import sys
import numpy as np
import os
import pandas as pd
import plotly
from plotly import subplots
import plotly.express as px
import plotly.tools as tls
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as offline
import sys
if "ipy" in sys.argv[0]:
    offline.init_notebook_mode()
from cov19utils import create_basic_plot_figure, \
    show_and_clear, moving_average, \
    blank2zero, csv2array, \
    get_twitter, tweet_with_image, \
    get_gpr_predict, FONT_NAME, DT_OFFSET, \
    download_if_needed, json2nparr, code2int, age2int, \
    get_populations, get_os_idx_of_arr, dump_val_in_arr, \
    calc_last1w2w_dif, create_basic_scatter_figure, \
    show_and_save_plotly
import re
import requests
from sklearn.decomposition import PCA
from sklearn import manifold, cluster
from sklearn.linear_model import LinearRegression
import xlrd

In [None]:
uri = "https://www.mhlw.go.jp/stf/seisakunitsuite/"
file = "newpage_00023.html"
download_if_needed(uri, file)

In [None]:
prev = None
with open("np00023.tmp") as f:
    prev = f.read().strip()
print(prev)

In [None]:
ptn = re.compile(r"^.*\<a href=\"(.*?)\"\>エクセル版.*$")
xlsx = None
with codecs.open(file, encoding="utf-8") as f:
    for l in f:
        l.strip('\r\n')
        m = ptn.match(l)
        if m:
            xlsx = m.groups()[0]
            break

In [None]:
if xlsx:
    uri = "https://www.mhlw.go.jp" + xlsx
    (url, file) = os.path.split(uri)
    url = url + '/'
    print(url, file)
    if file == prev:
        print("maybe the same data, nothing to do.")
        if "ipy" in sys.argv[0]:
            pass#exit()
        else:
            sys.exit()
    with open("np00023.tmp", "wt") as f:
        f.write(file)
    download_if_needed(url, file)

In [None]:
names = ["{}".format(i) for i in np.arange(20)]
names = [
    "都道府県名",
    "陽性者数",
    "入院者数",
    "フェーズ",
    "病床数",
    "使用率",
    "即応病床",
    "重症者数",
    "重症フェーズ",
    "重症病床数",
    "重症病床使用率",
    "即応重症病床",
    "宿泊療養者数",
    "療養フェーズ",
    "居室数",
    "居室使用率",
    "計画居室数",
    "自宅療養者数",
    "社会福祉施設等療養者数",
    "確認中"
]
#print(names)

In [None]:
df = pd.read_excel(file, names=names, usecols="C:V", skiprows=6, nrows=47)

In [None]:
df['コード'] = 0
df['値'] = 1
df['非入院数'] = df['社会福祉施設等療養者数'] + df['自宅療養者数'] + df['確認中']
#df

In [None]:
populations = get_populations()
totals = {} # populations
for k, v in populations.items():
    totals[v['code']] = v['total']
#totals

In [None]:
for k, v in df.iterrows():
    args = df.at[k, '都道府県名'].split(' ')
    df.at[k, '都道府県名'] = args[1]
    df.at[k, 'コード'] = args[0]
    df.at[k, '使用率'] *= 100.0
    df.at[k, '重症病床使用率'] *= 100.0
    df.at[k, '居室使用率'] *= 100.0
    # 人口 per million peoples で正規化
    mp_multi = 1000000
    df.at[k, '陽性者数'] = (df.at[k, '陽性者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '入院者数'] = (df.at[k, '入院者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '重症者数'] = (df.at[k, '重症者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '宿泊療養者数'] = (df.at[k, '宿泊療養者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '自宅療養者数'] = (df.at[k, '自宅療養者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '社会福祉施設等療養者数'] = (df.at[k, '社会福祉施設等療養者数'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '確認中'] = (df.at[k, '確認中'] / totals[df.at[k, 'コード']]) * mp_multi
    df.at[k, '非入院数'] = (df.at[k, '非入院数'] / totals[df.at[k, 'コード']]) * mp_multi

In [None]:
#df

In [None]:
today = dt.now().isoformat()[:10]

In [None]:
title = '病床使用率と非入院者数'
fig = px.scatter(#使用率 陽性者数  入院者数    重症者数
    df, x='使用率', y='非入院数', size='陽性者数', color='陽性者数', text='都道府県名',
    log_y=True,
    width=700, height=700, title=title + today)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title='病床使用率 [%]'))
fig.update_layout(yaxis=dict(title='非入院者数 / 100万人'))
imgname = "newpage_00023.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw = get_twitter()

In [None]:
tw_body = title + 'のグラフ (' + today + \
    ")\n非入院者数＝施設療養者＋自宅療養者＋確認中。" + \
    "\n厚生労働省「新型コロナウイルス感染症患者の療養状況、病床数等に関する調査結果」を加工。"
print(tw_body)

In [None]:
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
# x使用率 重症病床使用率 陽性者数 入院者数 重症者数 x非入院数
title = '重症者数と入院者数'
fig = px.scatter(
    df, x='重症者数', y='入院者数', size='陽性者数', color='陽性者数', text='都道府県名',
    log_x=True, log_y=True,
    width=700, height=700, title=title + today)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title='重症者数 / 100万人'))
fig.update_layout(yaxis=dict(title='入院者数 / 100万人'))
imgname = "newpage_00023b.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw_body = title + 'のグラフ (' + today + \
    "\n厚生労働省「新型コロナウイルス感染症患者の療養状況、病床数等に関する調査結果」を加工。"
print(tw_body)

In [None]:
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
#'使用率''重症病床使用率''居室使用率''陽性者数''入院者数'
#'重症者数''宿泊療養者数''自宅療養者数''社会福祉施設等療養者数'
#'確認中''非入院数'

In [None]:
#features = ["使用率", "重症病床使用率", "居室使用率",
#            "陽性者数", "入院者数", "重症者数", "宿泊療養者数", "自宅療養者数",
#            '社会福祉施設等療養者数', '確認中', "非入院数"]
#X = df[features]
#pca = PCA(n_components=2)
#components = pca.fit_transform(X)
#fig = px.scatter(components, x=0, y=1, color=df['陽性者数'])
#fig.show()
#X_reduced = manifold.TSNE(n_components=2).fit_transform(X)

In [None]:
#title = '次元削減後の病床状況'
#fig = px.scatter(
#    X_reduced, x=0, y=1, size=df['陽性者数'], color=df['陽性者数'], text=df['都道府県名'],
#    width=700, height=700, title=title + today)
#fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
#fig.update_layout(xaxis=dict(title='', ticks=''))
#fig.update_layout(yaxis=dict(title='', ticks=''))
#imgname = "newpage_00023c.jpg"
#show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)