<a href="https://colab.research.google.com/github/imabari/covid19-data/blob/master/toyama/toyama_opendata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -U pandas

Requirement already up-to-date: pandas in /usr/local/lib/python3.6/dist-packages (1.0.3)


In [0]:
import datetime
import json
import os

In [0]:
import pandas as pd

# 設定

In [0]:
PREF_CODE = "160008"
PREF_NAME = "富山県"
CITY_NAME = ""

In [0]:
OUT_DIR = "./data"

COUNTS_FILE = "./data/toyama_counts.csv"
PATIENTS_FILE = "./data/toyama_patients.csv"

In [0]:
!mkdir -p $OUT_DIR

# ダウンロード

In [7]:
!wget "https://docs.google.com/spreadsheets/d/e/2PACX-1vSJuQThafLPC7OPqUC9TbLV1DmSU0x2Co8VZi2Q2ZZCKLJCTayDl6IoXKyK676mzBgpkoKMgpNK1VML/pub?gid=0&single=true&output=csv" -O $PATIENTS_FILE
!wget "https://docs.google.com/spreadsheets/d/e/2PACX-1vSJuQThafLPC7OPqUC9TbLV1DmSU0x2Co8VZi2Q2ZZCKLJCTayDl6IoXKyK676mzBgpkoKMgpNK1VML/pub?gid=574469870&single=true&output=csv" -O $COUNTS_FILE

--2020-04-27 00:59:02--  https://docs.google.com/spreadsheets/d/e/2PACX-1vSJuQThafLPC7OPqUC9TbLV1DmSU0x2Co8VZi2Q2ZZCKLJCTayDl6IoXKyK676mzBgpkoKMgpNK1VML/pub?gid=0&single=true&output=csv
Resolving docs.google.com (docs.google.com)... 173.194.69.113, 173.194.69.138, 173.194.69.102, ...
Connecting to docs.google.com (docs.google.com)|173.194.69.113|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘./data/toyama_patients.csv’

./data/toyama_patie     [ <=>                ]  14.75K  --.-KB/s    in 0s      

2020-04-27 00:59:03 (83.4 MB/s) - ‘./data/toyama_patients.csv’ saved [15109]

--2020-04-27 00:59:04--  https://docs.google.com/spreadsheets/d/e/2PACX-1vSJuQThafLPC7OPqUC9TbLV1DmSU0x2Co8VZi2Q2ZZCKLJCTayDl6IoXKyK676mzBgpkoKMgpNK1VML/pub?gid=574469870&single=true&output=csv
Resolving docs.google.com (docs.google.com)... 173.194.69.139, 173.194.69.138, 173.194.69.102, ...
Connecting to docs.google.com (docs.google.com)|173.194.69.139

# オープンデータ作成

In [0]:
df_counts = pd.read_csv(
    COUNTS_FILE, index_col="年月日", parse_dates=True, dtype={"備考": "object"},
)

## 前処理

In [0]:
df_counts["備考"] = df_counts["備考"].fillna("")

# 検査実施人数
df_counts["実施_年月日"] = df_counts.index.strftime("%Y-%m-%d")

# 陰性確認数
df_counts["完了_年月日"] = df_counts.index.strftime("%Y-%m-%d")

# コールセンター相談件数
df_counts["受付_年月日"] = df_counts.index.strftime("%Y-%m-%d")

In [0]:
df_counts["全国地方公共団体コード"] = PREF_CODE
df_counts["都道府県名"] = PREF_NAME
df_counts["市区町村名"] = CITY_NAME

## 検査実施人数

In [0]:
df_counts.rename(columns={"検査実施人数": "検査実施_人数"}, inplace=True)

test_people = df_counts.loc[
    :, ["実施_年月日", "全国地方公共団体コード", "都道府県名", "市区町村名", "検査実施_人数", "備考"]
].copy()

test_people.to_csv(
    os.path.join(OUT_DIR, "160001_toyama_covid19_test_people.csv"),
    index=False,
    encoding="utf-8",
)

## 陰性確認数

In [0]:

df_counts.rename(columns={"陰性人数": "陰性確認_件数"}, inplace=True)

confirm_negative = df_counts.loc[
    :, ["完了_年月日", "全国地方公共団体コード", "都道府県名", "市区町村名", "陰性確認_件数", "備考"]
].copy()

confirm_negative.to_csv(
    os.path.join(OUT_DIR, "160001_toyama_covid19_confirm_negative.csv"),
    index=False,
    encoding="utf-8",
)

## 一般相談件数

In [0]:

call_center = df_counts.loc[
    :, ["受付_年月日", "全国地方公共団体コード", "都道府県名", "市区町村名", "一般相談件数"]
].copy()

call_center.rename(columns={"一般相談件数": "相談件数"}, inplace=True)
call_center.to_csv(
    os.path.join(OUT_DIR, "160001_toyama_covid19_call_center.csv"),
    index=False,
    encoding="utf-8",
)

## 帰国者・接触者相談センター相談件数

In [0]:
hot_line = df_counts.loc[
    :, ["受付_年月日", "全国地方公共団体コード", "都道府県名", "市区町村名", "帰国者相談件数"]
].copy()

hot_line.rename(columns={"帰国者相談件数": "相談件数"}, inplace=True)
hot_line.to_csv(
    os.path.join(OUT_DIR, "160001_toyama_covid19_hot_line.csv"),
    index=False,
    encoding="utf-8",
)

## 陽性患者属性

In [0]:
df_kanja = pd.read_csv(
    PATIENTS_FILE,
    index_col="No",
    dtype={"発症日": "object", "年代": "object", "備考": "object"},
)

In [0]:
# タイトル名変更
df_kanja.rename(
    columns={
        "検査結果判明日": "公表_年月日",
        "発症日": "発症_年月日",
        "居住地": "患者_居住地",
        "年代": "患者_年代",
        "性別": "患者_性別",
        "職業": "患者_職業",
        "症状": "患者_状態",
        "渡航歴の有無": "患者_渡航歴の有無フラグ",
        "状態": "患者_退院済フラグ",
    },
    inplace=True,
)

In [0]:
df_kanja["全国地方公共団体コード"] = PREF_CODE
df_kanja["都道府県名"] = PREF_NAME
df_kanja["市区町村名"] = CITY_NAME

In [0]:
df_kanja["患者_退院済フラグ"] = (
    df_kanja["患者_退院済フラグ"].replace({"入院中": 0, "退院": 1, "死亡": 1}).astype("Int64")
)

In [0]:
df_kanja["患者_渡航歴の有無フラグ"] = (
    df_kanja["患者_渡航歴の有無フラグ"].replace({"x": 0, "o": 1}).astype("Int64")
)

In [0]:
df_kanja["患者_症状"] = ""

In [0]:
df_kanja["患者_年代"] = df_kanja["患者_年代"].replace({"90代以上": "90歳以上"})

In [0]:
patients = df_kanja.loc[
    :,
    [
        "全国地方公共団体コード",
        "都道府県名",
        "市区町村名",
        "公表_年月日",
        "発症_年月日",
        "患者_居住地",
        "患者_年代",
        "患者_性別",
        "患者_職業",
        "患者_状態",
        "患者_症状",
        "患者_渡航歴の有無フラグ",
        "患者_退院済フラグ",
        "備考",
    ],
]

In [0]:
patients.to_csv(
    os.path.join(OUT_DIR, "160001_toyama_covid19_patients.csv"),
    index=False,
    encoding="utf-8",
)

# ファイル圧縮

In [24]:
!zip opendata -r data

  adding: data/ (stored 0%)
  adding: data/160001_toyama_covid19_call_center.csv (deflated 80%)
  adding: data/160001_toyama_covid19_confirm_negative.csv (deflated 80%)
  adding: data/160001_toyama_covid19_patients.csv (deflated 90%)
  adding: data/160001_toyama_covid19_hot_line.csv (deflated 81%)
  adding: data/toyama_patients.csv (deflated 87%)
  adding: data/160001_toyama_covid19_test_people.csv (deflated 80%)
  adding: data/toyama_counts.csv (deflated 66%)


# ダウンロード

In [0]:
from google.colab import files

In [0]:
files.download("opendata.zip")