In [None]:
#!/usr/bin/python3
# coding: utf-8
# prefectures

In [None]:
import codecs
from datetime import datetime as dt
from datetime import timedelta as td
import json
import matplotlib
import numpy as np
import os
import pandas as pd
import sys
if "ipy" not in sys.argv[0]:
    matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import plotly
import plotly.express as px
import plotly.tools as tls
import plotly.graph_objects as go
import plotly.io as pio
import plotly.offline as offline
from plotly.subplots import make_subplots
if "ipy" in sys.argv[0]:
    offline.init_notebook_mode()
from sklearn.neighbors import LocalOutlierFactor
from cov19utils import create_basic_plot_figure, \
    show_and_clear, moving_average, \
    blank2zero, csv2array, \
    get_twitter, tweet_with_image, \
    get_gpr_predict, FONT_NAME, DT_OFFSET, \
    download_if_needed, json2nparr, code2int, age2int, \
    get_populations, get_os_idx_of_arr, dump_val_in_arr, \
    calc_last1w2w_dif, create_basic_scatter_figure, \
    show_and_save_plotly, make_japan_heatmap, \
    make_japan_choropleth

In [None]:
populations = get_populations()
#print(populations)

In [None]:
today_str = dt.now().isoformat()[:16].replace('T', ' ')
# 全国情報
ogiwara_uri = "https://toyokeizai.net/sp/visual/tko/covid19/csv/"
pref_file = "prefectures.csv"
download_if_needed(ogiwara_uri, pref_file)

In [None]:
offsets = dict(
    year  = 0, # 年
    month = 1, # 月
    day   = 2, # 日
    st_ja = 3, # 都道府県
    st_en = 4, # state
    posis = 5, # 陽性者数
    tests = 6, # 検査数
    hospi = 7, # hospitalized
    serio = 8, # serious
    disch = 9, # discharged
    death =10, # deaths
    ern   =11, # Effective Reproduction Number
)

In [None]:
dt_start = dt.strptime("2020/2/8", "%Y/%m/%d") # 統計開始日
dt_now   = dt.now() # 現在日時
dt_delta = (dt_now - dt_start).days # 差分
n_states = 47 # 都道府県数
erns = [] # 実効再生産数
poss = [] # 陽性者数
test = [] # 検査数
for i in np.arange(dt_delta):
    erns.append(np.zeros(n_states+1).tolist())
    poss.append(np.zeros(n_states+1).tolist())
    test.append(np.zeros(n_states+1).tolist())
    test[i][0] = poss[i][0] = erns[i][0] = i

In [None]:
with codecs.open("prefectures.csv", encoding='utf-8') as f:
    l = f.readline()
    cnt = 0
    while l:
        l = f.readline().replace("\r\n", "").rstrip()
        elems = l.split(',')
        if len(elems) < 2:
            continue
        dt_idx  = (dt(int(elems[0]), int(elems[1]), int(elems[2])) - dt_start).days
        st_name = elems[offsets['st_en']]
        st_idx  = populations[st_name]['code']
        erns[dt_idx][st_idx] = blank2zero(elems[offsets['ern']])
        poss[dt_idx][st_idx] = blank2zero(elems[offsets['posis']])
        tested = blank2zero(elems[offsets['tests']])
        if tested > 0:
            test[dt_idx][st_idx] = tested
        cnt += 1

In [None]:
with open("prefectures.prev.tmp", "rt") as f:
    prev = int(f.read().rstrip())
print("Load {} lines, prev: {}.".format(cnt, prev))
if cnt == prev:
    print("maybe the same data, nothing to do.")
    if "ipy" in sys.argv[0]:
        pass#exit()
    else:
        sys.exit()

with open("prefectures.prev.tmp", "wt") as f:
    f.write("{}".format(cnt))

In [None]:
erns = np.array(erns)
poss = np.array(poss)
test = np.array(test)

In [None]:
os_idx = get_os_idx_of_arr(erns, dt_delta)
latest_erns = erns[os_idx] # 最新の実効再生産数
dump_val_in_arr(populations, latest_erns, "最新の実効再生産数")

In [None]:
os_idx = get_os_idx_of_arr(poss, dt_delta)
latest_poss = poss[os_idx] # 最新の感染者数
dump_val_in_arr(populations, latest_poss, "最新の感染者数", 0)

In [None]:
diff1w_poss, diff2w_poss = calc_last1w2w_dif(poss, dt_delta)
dump_val_in_arr(populations, diff1w_poss, "1週間陽性者数", 0)
dump_val_in_arr(populations, diff2w_poss, "2週間陽性者数", 0)

In [None]:
# 都道府県別 罹患率
incid1w_rate = np.zeros(n_states + 1).tolist() # 罹患率 (1週足)
incid2w_rate = np.zeros(n_states + 1).tolist() # 罹患率 (2週足)
incid_all_rate = np.zeros(n_states + 1).tolist() # 罹患率 (全期間)
for k, v in populations.items():
    i = v['code']
    incid1w_rate[i] = (diff1w_poss[i] / v['total']) * 100 # percentage
    incid2w_rate[i] = (diff2w_poss[i] / v['total']) * 100 # percentage
    incid_all_rate[i] = (latest_poss[i] / v['total']) * 100 # percentage
dump_val_in_arr(populations, incid_all_rate, "全期間罹患率", 3)

In [None]:
diff1w_test, diff2w_test = calc_last1w2w_dif(test, dt_delta)
# 茨城県は衛生研究所・水戸市保健所の検査数のみでオープンデータが公開。
# これとは別に民間検査・医療機関検査数が加わる。
# 民間・医療機関検査数は集計に時間がかかると、県のHPに記載されているため、
# 暫定対処として検査比率を掛ける
multi_ibaraki = 6
diff1w_test[8] *= multi_ibaraki
diff2w_test[8] *= multi_ibaraki
dump_val_in_arr(populations, diff1w_test, "1週間検査数", 0)
dump_val_in_arr(populations, diff2w_test, "2週間検査数", 0)

In [None]:
# 都道府県別 陽性率
test1w_rate = np.zeros(n_states + 1).tolist() # 陽性率 (1週足)
test2w_rate = np.zeros(n_states + 1).tolist() # 陽性率 (2週足)
for k, v in populations.items():
    i = v['code']
    # 陽性者数は再陽性と判明した患者で減少する場合があるため、max(0,a)を取る
    if diff1w_test[i] > 0:
        test1w_rate[i] = min(100, max(0, (diff1w_poss[i] / diff1w_test[i]) * 100)) # percentage
    if diff2w_test[i] > 0:
        test2w_rate[i] = min(100, max(0, (diff2w_poss[i] / diff2w_test[i]) * 100)) # percentage
dump_val_in_arr(populations, test1w_rate, "1週間陽性率", 1)
dump_val_in_arr(populations, test2w_rate, "2週間陽性率", 1)

In [None]:
if False:
    fig, ax = create_basic_scatter_figure("実効再生産数(感染拡大しているか？)", "直近1週間罹患率(県内で自分が罹患する確率)[%]")
    cols = [ "#ff0000" for i in range(n_states)]
    texts = [ "?" for i in range(n_states)]
    for k, v in populations.items():
        cols[v['code'] - 1]  = cm.hsv(v['region'] / 11)
        texts[v['code'] - 1] = v['ja']
    plt.scatter(latest_erns[1:], incid1w_rate[1:], s=50, c=cols)
    for i in range(n_states):
        ax.annotate(texts[i], xy=(latest_erns[1 + i], incid1w_rate[1 + i]), size=15, fontname=FONT_NAME)
    tw_body_1w = '全国 新型コロナ情報 都道府県別拡大傾向(直近1週間)'
    plt.title(tw_body_1w, fontname=FONT_NAME)
    show_and_clear(fig, "docs/images/ern-1w_poss.jpg")

In [None]:
fig = go.Figure()
cols = [ "#ff0000" for i in range(n_states)]
texts = [ "?" for i in range(n_states)]

for k, v in populations.items():
    cols[v['code'] - 1]  = "hsv({},100%,100%)".format(int((v['region'] * 255) / 11))
    texts[v['code'] - 1] = v['ja']

fig.add_trace(go.Scatter(x=latest_erns[1:], y=incid1w_rate[1:],
                         mode='markers+text', marker=dict(size=9, color=cols),
                         text=texts, textposition='top center'))
tw_body_1w = '全国 新型コロナ 県別 拡大傾向[直近1週間]({})'.format(today_str)
fig.update_layout(
    title=tw_body_1w,
    xaxis=dict(title="実効再生産数(感染拡大しているか？)"),
    yaxis=dict(title="直近1週間罹患率(県内で自分が罹患する確率)[%]")
)
show_and_save_plotly(fig, "ern-1w_poss.jpg", js=False)

In [None]:
def show_zoomed_ern_scatter(
    xbins, ybins, colors, xytexts, nweek, filename, title, xlabel, ylabel,
    n_neighbors=30, n_states=47):
    fig, ax = create_basic_scatter_figure(xlabel, ylabel)
    # LOF
    clf = LocalOutlierFactor(
        n_neighbors=n_neighbors, contamination='auto',
        algorithm='auto', metric='mahalanobis')
    X = np.stack([xbins[1:], ybins[1:]], 1)
    y_pred = clf.fit_predict(X)
    selected_cols = np.array(colors)[y_pred == 1]
    selected_text = np.array(xytexts)[y_pred == 1]
    # append top element
    y_pred = np.append(-1, y_pred)
    selected_xbins = xbins[y_pred == 1]
    selected_ybins = np.array(ybins)[y_pred == 1]
    plt.xlim(0, np.max(selected_xbins))
    plt.ylim(0, np.max(selected_ybins))
    plt.scatter(selected_xbins, selected_ybins, s=50, c=selected_cols)
    for i in np.arange(len(selected_xbins)):
        ax.annotate(selected_text[i], xy=(selected_xbins[i], selected_ybins[i]), size=15, fontname=FONT_NAME)
    plt.title(title, fontname=FONT_NAME)
    show_and_clear(fig, filename)

In [None]:
def show_zoomed_ern_plotly(
    xbins, ybins, colors, xytexts, nweek, filename, title, xlabel, ylabel,
    n_neighbors=30, n_states=47):
    fig = go.Figure()
    # LOF
    clf = LocalOutlierFactor(n_neighbors=n_neighbors, contamination='auto', algorithm='auto', metric='mahalanobis')
    X = np.stack([xbins[1:], ybins[1:]], 1)
    y_pred = clf.fit_predict(X)
    selected_cols = np.array(colors)[y_pred == 1]
    selected_text = np.array(xytexts)[y_pred == 1]
    # append top element
    y_pred = np.append(-1, y_pred)
    selected_xbins = xbins[y_pred == 1]
    selected_ybins = np.array(ybins)[y_pred == 1]
    fig.add_trace(go.Scatter(x=selected_xbins, y=selected_ybins,
                         mode='markers+text', marker=dict(size=9, color=selected_cols),
                         text=selected_text, textposition='top center'))
    fig.update_layout(title=title, xaxis=dict(title=xlabel), yaxis=dict(title=ylabel))
    show_and_save_plotly(fig, filename, js=False)

In [None]:
tw_body_1w_zoom = '全国 新型コロナ 県別拡大傾向[直近1週間]ズーム表示({})'.format(today_str)
show_zoomed_ern_plotly(
    latest_erns, incid1w_rate, cols, texts, 1, "ern-1w_poss-zoom.jpg",
    tw_body_1w_zoom,
    "実効再生産数(感染拡大しているか？)",
    "直近1週間罹患率(県内で自分が罹患する確率)[%]")

In [None]:
if False:
    pfig, ax = create_basic_scatter_figure("実効再生産数(感染拡大しているか？)", "直近2週間罹患率(県内で自分が罹患する確率)[%]")
    plt.scatter(latest_erns[1:], incid2w_rate[1:], s=50, c=cols)
    tw_body_2w = '全国 新型コロナ 府県別拡大傾向(直近2週間)'
    plt.title(tw_body_2w, fontname=FONT_NAME)
    for i in range(n_states):
        ax.annotate(texts[i], xy=(latest_erns[1 + i], incid2w_rate[1 + i]), size=15, fontname=FONT_NAME)
    show_and_clear(fig, "docs/images/ern-2w_poss.jpg")

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=latest_erns[1:], y=incid2w_rate[1:],
                         mode='markers+text', marker=dict(size=9, color=cols),
                         text=texts, textposition='top center'))
tw_body_2w = '全国 新型コロナ 県別 拡大傾向[直近2週間] ({})'.format(today_str)
fig.update_layout(
    title=tw_body_2w,
    xaxis=dict(title="実効再生産数(感染拡大しているか？)"),
    yaxis=dict(title="直近2週間罹患率(県内で自分が罹患する確率)[%]"))
show_and_save_plotly(fig, "ern-2w_poss.jpg", js=False)

In [None]:
tw_body_2w_zoom = '全国 新型コロナ 県別拡大傾向[直近2週間]ズーム表示 ({})'.format(today_str)
show_zoomed_ern_plotly(
    latest_erns, incid2w_rate, cols, texts, 2, "ern-2w_poss-zoom.jpg",
    tw_body_2w_zoom,
    "実効再生産数(感染拡大しているか？)",
    "直近2週間罹患率(県内で自分が罹患する確率)[%]")

In [None]:
tw_body_map_erns = "全国 新型コロナ 県別 実効再生産数 ({})".format(today_str)
imgname = 'japan-erns.jpg'
make_japan_choropleth(imgname, tw_body_map_erns, latest_erns[1:])
make_japan_heatmap(imgname, tw_body_map_erns, latest_erns, populations)

In [None]:
#tw_body_map_erns = "全国 新型コロナ情報 都道府県別 実効再生産数"
#mak_japan_heatmap('docs/images/japan-erns.jpg', tw_body_map_erns, latest_erns, populations)

In [None]:
tw_body_map_1w = "全国 新型コロナ 県別 直近1週間罹患率 ({})".format(today_str)
imgname = 'japan-1w.jpg'
make_japan_choropleth(imgname, tw_body_map_1w, incid1w_rate[1:])
make_japan_heatmap(imgname, tw_body_map_1w, incid1w_rate, populations)

In [None]:
tw_body_map_2w = "全国 新型コロナ 県別 直近2週間罹患率 ({})".format(today_str)
imgname = 'japan-2w.jpg'
make_japan_choropleth(imgname, tw_body_map_2w, incid2w_rate[1:])
make_japan_heatmap(imgname, tw_body_map_2w, incid2w_rate, populations)

In [None]:
tw_body_map_all = "全国 新型コロナ 県別 罹患率(全期間) ({})".format(today_str)
imgname = 'japan-all.jpg'
make_japan_choropleth(imgname, tw_body_map_all, incid_all_rate[1:])
make_japan_heatmap(imgname, tw_body_map_all, incid_all_rate, populations)

In [None]:
tw_body_map_rate1w = "全国 新型コロナ 県別 直近1週間陽性率 ({})".format(today_str)
imgname = 'japan-rate1w.jpg'
make_japan_choropleth(imgname, tw_body_map_rate1w, test1w_rate[1:])
make_japan_heatmap(imgname, tw_body_map_rate1w, test1w_rate, populations)

In [None]:
tw_body_map_rate2w = "全国 新型コロナ 県別 直近2週間陽性率 ({})".format(today_str)
imgname = 'japan-rate2w.jpg'
make_japan_choropleth(imgname, tw_body_map_rate2w, test2w_rate[1:])
make_japan_heatmap(imgname, tw_body_map_rate2w, test2w_rate, populations)

In [None]:
tw = get_twitter()
tw_body_postfix = " https://geneasyura.github.io/cov19-hm/pref.html"
tweet_with_image(tw, "docs/images/ern-1w_poss.jpg", tw_body_1w + tw_body_postfix)
tweet_with_image(tw, "docs/images/ern-1w_poss-zoom.jpg", tw_body_1w_zoom + tw_body_postfix)
tweet_with_image(tw, "docs/images/ern-2w_poss.jpg", tw_body_2w + tw_body_postfix)
tweet_with_image(tw, "docs/images/ern-2w_poss-zoom.jpg", tw_body_2w_zoom + tw_body_postfix)

In [None]:
tw_body_postfix = " https://geneasyura.github.io/cov19-hm/pref-hm.html"
tweet_with_image(tw, 'docs/images/japan-erns.jpg', tw_body_map_erns + tw_body_postfix)
tweet_with_image(tw, 'docs/images/japan-1w.jpg', tw_body_map_1w + tw_body_postfix)
tweet_with_image(tw, 'docs/images/japan-2w.jpg', tw_body_map_2w + tw_body_postfix)
tweet_with_image(tw, 'docs/images/japan-all.jpg', tw_body_map_all + tw_body_postfix)

In [None]:
tweet_with_image(tw, 'docs/images/japan-rate1w.jpg', tw_body_map_rate1w + tw_body_postfix)
tweet_with_image(tw, 'docs/images/japan-rate2w.jpg', tw_body_map_rate2w + tw_body_postfix)

In [None]:
#poss / test