In [None]:
#!/usr/bin/env python
# coding: utf-8

In [None]:
import codecs
from datetime import datetime as dt
import sys
import numpy as np
import os
import pandas as pd
import plotly
from plotly import subplots
import plotly.express as px
import plotly.tools as tls
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as offline
import sys
if "ipy" in sys.argv[0]:
    offline.init_notebook_mode()
from cov19utils import create_basic_plot_figure, \
    show_and_clear, moving_average, \
    blank2zero, csv2array, \
    get_twitter, tweet_with_image, \
    get_gpr_predict, FONT_NAME, DT_OFFSET, \
    download_if_needed, json2nparr, code2int, age2int, \
    get_populations, get_os_idx_of_arr, dump_val_in_arr, \
    calc_last1w2w_dif, create_basic_scatter_figure, \
    show_and_save_plotly
import re
import requests
from sklearn.decomposition import PCA
from sklearn import manifold, cluster
from sklearn.linear_model import LinearRegression
import pdfplumber
from nhk_lib import get_nhk_df, get_nhk_keys, get_lr_col, \
    add_pref2fig, update_axes, update_layout, add_trace2fig, \
    make_prefs_subplots, get_template_fig

In [None]:
today = dt.now().isoformat()[:10]

In [None]:
nhk_df = get_nhk_df("nhk.csv")
nk = get_nhk_keys()
keys = [nk.ts, nk.pref_code, nk.pref_name, nk.total_case, nk.total_death]
nhk_df = nhk_df.loc[:, keys]
# 最新のデータのみ参照
last_day = nhk_df[nk.ts].tail(1).values[0]
print(last_day)
last_df = nhk_df.loc[nhk_df[nk.ts] == last_day]
# int から float へ変換
last_df = last_df.astype({nk.total_case: float})
last_df = last_df.astype({nk.total_death: float})
last_df['pcr'] = 0.0 # # of PCR tests
last_df['dpm'] = 0.0 # deaths per million people
last_df['cpm'] = 0.0 # cases per million people
last_df['dr'] = 0.0 # death rate
last_df['pop'] = 0.0 # populations

In [None]:
prev = None
with open("pcr-lr.tmp") as f:
    prev = f.read().strip()
print(prev)

if last_day == prev:
    print("maybe the same data, nothing to do.")
    if "ipy" in sys.argv[0]:
        pass#exit()
    else:
        sys.exit()
with open("pcr-lr.tmp", "wt") as f:
    f.write(last_day)

In [None]:
# 都道府県別人口を取得
populations = get_populations()
totals = {} # populations
pcodes = {} # 都道府県コード
ppops = {} # 都道府県人口
for k, v in populations.items():
    totals[v['code']] = v['total']
    pcodes[v['ja']] = v['code']
    ppops[v['ja']] = v['total']

In [None]:
#last_df

In [None]:
# per 100 万人で正規化
ppm = 1000000
for index, row in last_df.iterrows():
    pname = row[nk.pref_name].rstrip('県').rstrip('都').rstrip('府')
    last_df.at[index, nk.pref_code] = pcodes[pname]
    last_df.at[index, 'cpm'] = \
        (float(row[nk.total_case]) / ppops[pname]) * ppm
    last_df.at[index, 'dpm'] = \
        (float(row[nk.total_death]) / ppops[pname]) * ppm
    last_df.at[index, 'dr'] = \
        (float(row[nk.total_death]) / float(row[nk.total_case])) * 100.0
    last_df.at[index, 'pop'] = \
        totals[pcodes[pname]]

In [None]:
#last_df

In [None]:
title = '全国新型コロナ 感染者数/死者数 ' + today
fig = px.scatter(
    last_df, x='cpm', y='dpm',
    size=nk.total_case, color=nk.total_death,
    text=nk.pref_name, log_x=True, log_y=True,
    width=700, height=700, title=title)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title=nk.total_case + ' / 100万人'))
fig.update_layout(yaxis=dict(title=nk.total_death + ' / 100万人'))
imgname = "pcr-lr-1.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw = get_twitter()

In [None]:
#厚生労働省データ及び
tw_body = title + "\nNHKデータを独自に加工。"
print(tw_body)
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
uri = "https://www.mhlw.go.jp/stf/seisakunitsuite/"
file = "newpage_00016.html"
download_if_needed(uri, file)

In [None]:
ptn = re.compile(r"^.*\<a href=\"(.*?)\"\>PCR検査等の検査実施人数の推移（都道府県別・各日）.*$")
pdfname = None
with codecs.open(file, encoding="utf-8") as f:
    for l in f:
        l.strip('\r\n')
        m = ptn.match(l)
        if m:
            pdfname = m.groups()[0]
            break

In [None]:
if pdfname:
    uri = "https://www.mhlw.go.jp" + pdfname
    (url, file) = os.path.split(uri)
    url = url + '/'
    print(url, file)
    download_if_needed(url, file)

In [None]:
with pdfplumber.open(file) as pdf:
    for page in pdf.pages:
        data = page.extract_table()
        pcr_df = pd.DataFrame(data[3:])
        #print(pcr_df)
        pcr_df.to_csv(file.replace('pdf', 'csv'))

In [None]:
#pcr_df

In [None]:
pcr_df = pcr_df.dropna(how='all', axis=1)
# 最新の値のみ取得
pcr_df = pcr_df.iloc[:, [0, -1]]
pcr_df.columns = ['pref', 'latest']

In [None]:
#pcr_df

In [None]:
pcrs = {}
for k, v in pcr_df.iterrows():
    if ' ' in pcr_df.at[k, 'latest']:
        vals = pcr_df.at[k, 'latest'].split(' ')
        pnm = pcr_df.at[k, 'pref']
        pcrs[pnm] = float(vals[0].replace(',', ''))

In [None]:
for k, v in last_df.iterrows():
    p = last_df.at[k, nk.pref_name].rstrip('都').rstrip('県').rstrip('府')
    #print(p)
    if p in pcrs:
        last_df.at[k, 'pcr'] = pcrs[p]

In [None]:
# 総陽性率
last_df['rate'] = 0.0
last_df['rate'] = (last_df[nk.total_case] / last_df['pcr']) * 100.0

In [None]:
last_df['tpc'] = 0.0 # test per case
last_df['tpd'] = 0.0 # test per death
last_df['tpc'] = last_df['pcr'] / last_df[nk.total_case]
last_df['tpd'] = last_df['pcr'] / last_df[nk.total_death]
#last_df

In [None]:
#lr = LinearRegression()
#X = last_df[nk.total_death].values
#Y = last_df['rate'].values
#lr.fit(X.reshape(-1, 1), Y.reshape(-1, 1))
#Y_pred = lr.predict(X.reshape(-1, 1)).reshape(1, -1)[0]
#a = lr.coef_[0][0]
#print(a)

In [None]:
title = '全国新型コロナ 全期間陽性率-累計死者数 ' + today
fig = px.scatter(
    last_df, 
    x=nk.total_death,
    y='rate',
    size=nk.total_case, color=nk.total_death,
    text='都道府県名',
    log_x=True,
    width=700, height=700, title=title)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title='累計死者数 (total deaths) [対数 (log-scale)]')) #  / 100万人
fig.update_layout(yaxis=dict(title='全期間陽性率 (total positive rate) [%]', range=[0.0, 9.0]))
#trace_lr = go.Scatter(x=X, y=Y_pred, mode='lines', line=dict(width=1, color='red'))
#fig.add_trace(trace_lr)
imgname = "pcr-lr-2.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw_body = title + "\n厚生労働省データ及びNHKデータを独自に加工。"
print(tw_body)
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
title = '全国新型コロナ 全期間陽性率-累計感染者数 ' + today
fig = px.scatter(
    last_df, 
    x=nk.total_case,
    y='rate',
    size=nk.total_case, color=nk.total_death,
    text='都道府県名',
    log_x=True,
    width=700, height=700, title=title)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title='累計感染者数 (total cases) [対数 (log-scale)]')) #  / 100万人
fig.update_layout(yaxis=dict(title='全期間陽性率 (total positive rate) [%]', range=[0.0, 9.0]))
#trace_lr = go.Scatter(x=X, y=Y_pred, mode='lines', line=dict(width=1, color='red'))
#fig.add_trace(trace_lr)
imgname = "pcr-lr-3.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw_body = title + "\n厚生労働省データ及びNHKデータを独自に加工。"
print(tw_body)
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
title = '全国新型コロナ tpd-dpm-致死率 ' + today
fig = px.scatter(
    last_df, x='tpd', y='dpm', size='dr', color='dr', text='都道府県名',
    labels={'dr':'致死率'},
    log_x=True, log_y=True, width=700, height=700, title=title)
fig.update_layout(margin=dict(t=50, l=0, r=0, b=0))
fig.update_layout(xaxis=dict(title='検査数/死者 (tests per death) [対数 (log-scale)]'))
fig.update_layout(yaxis=dict(title='死者/100万人 (deaths / million people) [対数 (log-scale)]'))
imgname = "pcr-lr-4.jpg"
show_and_save_plotly(fig, imgname, js=False, show=True, image=True, html=False)

In [None]:
tw_body = title + "\n厚生労働省データ及びNHKデータを独自に加工。"
print(tw_body)
tweet_with_image(tw, "docs/images/" + imgname, tw_body)

In [None]:
#last_df

In [None]:
#ave_dpm = ppm * last_df['各地の死者数_累計'].sum() / last_df['pop'].sum()
#ave_dpm

In [None]:
#ave_tpd = last_df['pcr'].sum() / last_df['各地の死者数_累計'].sum()
#ave_tpd