<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/nagano/nagano_covid19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt install python3-tk ghostscript
!pip install camelot-py[cv]

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python3-tk is already the newest version (3.6.9-1~18.04).
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
The following additional packages will be installed:
  fonts-droid-fallback fonts-noto-mono gsfonts libcupsfilters1 libcupsimage2
  libgs9 libgs9-common libijs-0.35 libjbig2dec0 poppler-data
Suggested packages:
  fonts-noto ghostscript-x poppler-utils fonts-japanese-mincho
  | fonts-ipafont-mincho fonts-japanese-gothic | fonts-ipafont-gothic
  fonts-arphic-ukai fonts-arphic-uming fonts-nanum
The following NEW packages will be installed:
  fonts-droid-fallback fonts-noto-mono ghostscript gsfonts libcupsfilters1
  libcupsimage2 libgs9 libgs9-common libijs-0.35 libjbig2dec0 poppler-data
0 upgraded, 11 newly installed, 0 to remove and 39 not upgraded.
Need to get 14.1 MB of archives.
After this operation, 

In [2]:
!pip install jaconv

Collecting jaconv
  Downloading https://files.pythonhosted.org/packages/b0/9e/cf1353fb3e81a177bb52ca59a0ebee425f084b7298039a7965c5414d2d62/jaconv-0.2.4.tar.gz
Building wheels for collected packages: jaconv
  Building wheel for jaconv (setup.py) ... [?25l[?25hdone
  Created wheel for jaconv: filename=jaconv-0.2.4-cp36-none-any.whl size=12284 sha256=686b0b9294b14e1dea58bcd2fd311171adf2a69b6c5f64413fc62b6ca3cfd889
  Stored in directory: /root/.cache/pip/wheels/e1/46/f7/85a7f89bd3263423c8530dfed16083f9a142cc0fc78c81ff32
Successfully built jaconv
Installing collected packages: jaconv
Successfully installed jaconv-0.2.4


In [3]:
import datetime
import pathlib
import re
from urllib.parse import urljoin

In [4]:
import camelot
import jaconv
import requests
from bs4 import BeautifulSoup

In [5]:
def fetch_file(url, dir="."):

    r = requests.get(url)
    r.raise_for_status()

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="wb") as fw:
        fw.write(r.content)
    return p

In [6]:
url = "https://www.pref.nagano.lg.jp/hoken-shippei/kenko/kenko/kansensho/joho/corona-doko.html"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

In [7]:
r = requests.get(url, headers=headers)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")

In [8]:
tag = soup.find("a", text=re.compile("^グラフPDFデータ"), href=re.compile(".pdf"))
link = urljoin(url, tag.get("href"))

p = fetch_file(link)

In [9]:
p = fetch_file(link)

In [10]:
df = camelot.read_pdf(str(p), pages="1", flavor="stream")[0].df

In [11]:
df

Unnamed: 0,0,1,2,3,4,5,6
0,新型コロナウイルス感染症の状況,,,,,,
1,,,,,,９月８日 17時現在,
2,,,陽性者数,,,,
3,検査実施,陰性,,,,,
4,,,（累積）,,,,
5,,,,入院中,,退院等,
6,人数,,,,,,
7,,,,,重症,,死亡
8,,,292人,,,,
9,,,,39人,,258人,


In [12]:
df1 = df[~(df[0] + df[1]).str.startswith("・")]

In [13]:
temp = []

for _, item in df1.iloc[2:].iteritems():
    s = "".join(item.str.cat(sep="").split())
    temp.append(jaconv.z2h(s))

text = "".join(temp)

In [14]:
data = {}

for i in re.finditer(r"(検査実施人数|陰性|陽性者数（累積）|入院中|重症|退院等|死亡)([0-9,]+)人", text):
    data[i.group(1)] = int(i.group(2).replace(",", ""))

In [15]:
m = re.search("うち([0-9,]+)名", text)

if m:
    data["無症状病原体保有者"] = int(m.group(1).replace(",", ""))

In [16]:
txt = jaconv.z2h(df.iloc[1].str.cat(sep=""), kana=False, digit=True, ascii=True)

m_up = re.search("(\d{1,2})月(\d{1,2})日 *(\d{1,2})時現在", txt)

if m_up:
    month, day, hour = map(int, m_up.groups())
    dt_now = datetime.datetime(2020, month, day, hour)
else:
    dt_now = datetime.datetime.now()

data["更新日時"] = dt_now.isoformat()

In [17]:
data

{'入院中': 39,
 '更新日時': '2020-09-08T17:00:00',
 '検査実施人数': 16526,
 '無症状病原体保有者': 38,
 '退院等': 258,
 '重症': 0,
 '陰性': 16234,
 '陽性者数（累積）': 292}