# 実行環境の作成

## Google Drive

Google Driveをマウント

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Google Sheets

ライブラリのインストール

In [2]:
!pip install --upgrade -q gspread

[?25l[K     |████████                        | 10 kB 21.7 MB/s eta 0:00:01[K     |████████████████▏               | 20 kB 25.7 MB/s eta 0:00:01[K     |████████████████████████▎       | 30 kB 30.7 MB/s eta 0:00:01[K     |████████████████████████████████| 40 kB 1.5 MB/s 
[?25h

Google Sheetsの認証

In [3]:
from google.colab import auth
from google.auth import default
import gspread

auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

## Google Cloud Storage

下記コードでGCPに接続

In [4]:
from google.colab import auth
auth.authenticate_user()

認証に成功したらgcsfuseをインストール

In [None]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!apt-get -y -q update
!apt-get -y -q install gcsfuse

バケット「statistics-hyogo」をディレクトリ「statistics-hyogo」にマウント

In [None]:
! mkdir -p statistics-hyogo
! gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 statistics-hyogo statistics-hyogo

# estatレスポンスをDataFrameに格納

DataFrameを整形する関数

In [7]:
ESTAT_APPID = '724e5b90772a3e9289f41a253e4e7e32438f4fff'

import urllib.parse
import urllib.request
import json
import pandas as pd

"""
estat-APIのレスポンスを整形してDataFrameに変換
"""

def get_estat_dataframe(params):
  res = get_estat_response(params)
  df_res = conv_estat_response_to_dataframe(res)
  df_res = format_estat_dataframe(df_res)

  return df_res

"""
estat-APIのレスポンスを取得する関数
"""

def get_estat_response(params):
  p = params.copy()

  # appId
  p['appId'] = ESTAT_APPID
  
  # url生成
  url = 'http://api.e-stat.go.jp/rest/2.1/app/json/getStatsData?'
  url += urllib.parse.urlencode(p)

  with urllib.request.urlopen(url) as response:
    return json.loads(response.read().decode('utf-8'))


"""
estat-APIのレスポンスをDataFrameに変換する
"""

def conv_estat_response_to_dataframe(response):

  # VALUEをDataFrameに変換
  VALUE = response['GET_STATS_DATA']['STATISTICAL_DATA']['DATA_INF']['VALUE']
  df = pd.json_normalize(VALUE)

  # CLASS_OBJのDataFrameを結合
  CLASS_OBJ = response['GET_STATS_DATA']['STATISTICAL_DATA']['CLASS_INF']['CLASS_OBJ']
  for d in CLASS_OBJ :
    # DataFrameに変換 '@code','@name'だけ抽出
    df_class = pd.json_normalize(d['CLASS']) 
    df_class = df_class.copy()[['@code','@name']]

    # @codeをキー名に変更 @nameをキー名＋'_name'に変更
    key_name = '@{}'.format(d['@id'])
    df_class = df_class.rename(columns={'@code':key_name,'@name': key_name+'_name'})

    # DataFrameを結合
    df = pd.merge(df, df_class, on=key_name, how='outer')

  # 統計情報を追加
  TABLE_INF = response['GET_STATS_DATA']['STATISTICAL_DATA']['TABLE_INF']
  df['statsDataId'] = TABLE_INF['@id']
  df['statsDataName'] = TABLE_INF['STAT_NAME']['$']
  
  return df

"""
DataFrameを整形する
"""

def  format_estat_dataframe(df_arg):

  # 列を抽出して名前変更
  df =  df_arg[['statsDataId','statsDataName','@cat01','@cat01_name','@time','@time_name','@area','$','@unit']]
  df = df.rename(columns = {'@cat01':'categoryCode','@cat01_name':'categoryName','@time':'timeCode','@time_name':'timeName','@area':'areaCode','$':'value','@unit':'unit'})

  # 欠損データ削除
  df = df.dropna(subset=['categoryCode'])

  # categoryNameから不要な情報（categoryCode）を削除
  df['categoryName'] = df.apply(lambda x: x['categoryName'].replace(x['categoryCode']+'_', ''), 1)

  # timeCodeを文字列4桁に置換
  df['timeCode'] = df['timeCode'].astype(str)
  df['timeCode'] = df.apply(lambda x: x['timeCode'][:4], 1)
  
  # 地域情報をマージ
  with open('drive/MyDrive/statistics-hyogo/resas/arealist.json') as j:
    arealist = json.load(j)
    df_arealist = pd.json_normalize(arealist)  
  df = pd.merge(df, df_arealist, on='areaCode', how='right')


  df = df.dropna(subset=['categoryCode'])

  # '-'を'0'に置換
  df = df.replace({'value': {'-': '0'}})
  df = df.replace({'value': {'X': '0'}})

  return df

# 全国・都道府県ランキングを付与

## 都道府県データ

全国ランキングを付与

In [8]:
"""
都道府県のランキングを付与する関数
"""
def withrank_prefecture(df_arg):

  # 年度リスト、カテゴリリストの作成
  times = list(set(df_arg['timeCode'].tolist()))
  categories = list(set(df_arg['categoryCode'].tolist()))

  # 返却するDataFrameの定義
  df_res = pd.DataFrame(index=[], columns=[])

  for time in times:
    for category in categories:
      df = df_arg.copy()
      df = df[df['timeCode'] == time][df['categoryCode'] == category]
      
      # valueを数値に変換して、'rank'を付与
      df = df.astype({'value': float})
      df['rankJapan'] = df.rank(ascending=False)['value'].astype(int).astype(str)
      
      # 結合
      df_res = pd.concat([df_res, df])
  
  return df_res

## 市区町村データ

全国ランキングと都道府県内ランキングを付与

対象は政令指定都市（join）

In [9]:
"""
市区町村のランキングを付与する関数
"""
def withrank_city(df_arg):
  print(f'基のDataDrameは{len(df_arg)}行')

  # 年度リスト、カテゴリリストの作成
  times = list(set(df_arg['timeCode'].tolist()))
  categories = list(set(df_arg['categoryCode'].tolist()))
  prefs = list(set(df_arg['prefCode'].tolist()))
  print(prefs)

  # ランキングDataFrameを作成
  df_rank = pd.DataFrame(index=[], columns=[])
  for time in times:
    for category in categories:

      # DataFrameの抽出
      df = df_arg.copy()
      df = df[df['timeCode'] == time][df['categoryCode'] == category]
      df = df[df['bigCityFlag'] != '1']

      # valueを数値に変換して、'rank'を付与
      df = df.astype({'value': float})
      df['rankJapan'] = df.rank(ascending=False)['value'].astype(int).astype(str)
      
      # 必要な列だけ結合
      df = df[['timeCode','categoryCode','areaCode','rankJapan']]
      df_rank = pd.concat([df_rank, df])
  
  # 基のDataFrameにマージ
  df_res = df_arg.copy()
  df_res = pd.merge(df_res, df_rank, on=['timeCode','categoryCode','areaCode'], how='left')


  # ランキングDataFrameを作成
  df_rank = pd.DataFrame(index=[], columns=[])
  for time in times:
    for category in categories:
      for pref in prefs:
        # DataFrameの抽出
        df = df_arg.copy()
        df = df[df['timeCode'] == time][df['categoryCode'] == category]
        df = df[df['bigCityFlag'] != '1']
        df = df[df['prefCode'] == pref]

        # valueを数値に変換して、'rank'を付与
        df = df.astype({'value': float})
        df['rankPref'] = df.rank(ascending=False)['value'].astype(int).astype(str)
      
        # 必要な列だけ結合
        df = df[['timeCode','categoryCode','areaCode','rankPref']]
        df_rank = pd.concat([df_rank, df])

  df_res = pd.merge(df_res, df_rank, on=['timeCode','categoryCode','areaCode'], how='left')
  return df_res

# BigQueryに格納

pandas-gbqのインストール

In [None]:
!pip install pandas-gbq

統計カード一覧を取得

In [11]:
def get_statistics_cardlist():
  with open('/content/drive/MyDrive/statistics-hyogo/contents/statistics-cards.json') as j:
    cardlist = json.load(j)

  return cardlist

In [12]:
cardlist = get_statistics_cardlist()
print(cardlist)

[{'index': 0, 'cardId': 'total-population-prefecture', 'cardTitle': '都道府県の総人口', 'cardIndex': 1, 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1101', 'categoryName': '総人口', 'isSelect': 'TRUE', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110101', 'categoryName': '総人口（男）', 'isSelect': '', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110102', 'categoryName': '総人口（女）', 'isSelect': '', 'type': 'column', 'yAxis': 0}], 'estatParams': {'statsDataId': '0000010101', 'cdCat01': 'A1101,A110101,A110102'}}, {'index': 1, 'cardId': 'japanese-population-prefecture', 'cardTitle': '都道府県の日本人人口', 'cardIndex': '', 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1102', 'categoryName': '日本人人口', 'isSelect': 'TRUE', 'type': 

統計カードのデータをBigQueryに保存

In [13]:
from google.cloud import bigquery
from google.cloud.exceptions import NotFound

def save_statistics_card_to_gbq(cardlist):
  for card in cardlist:
    # BigQueryのテーブル定義
    project_id='primal-buttress-342908'
    dataset_id = 'contents'
    table_id= card['cardId']
    client = bigquery.Client()
    
    try:
      client.get_table(f'{project_id}.{dataset_id}.{table_id}')  
      print("Table {} already exists.".format(table_id))
      
    except NotFound:
      estatParams = card['estatParams']
      governmentType = card['governmentType']
      df = get_estat_dataframe(estatParams)
      
      # ランキングを付与
      if governmentType == 'prefecture':
        df = withrank_prefecture(df)
      else:
        df = withrank_city(df)
      
      # BigQueryに保存
      df.to_gbq(f'{dataset_id}.{table_id}', project_id=project_id, if_exists="replace")
      print(f'{table_id}を保存しました')

In [14]:
save_statistics_card_to_gbq(cardlist)

Table total-population-prefecture already exists.
Table japanese-population-prefecture already exists.
Table population-pyramid-prefecture already exists.
Table median-age-prefecture already exists.
Table population-age-prefecture  already exists.
Table unmarried-prefecture already exists.
Table spouse-prefecture already exists.
Table bereavement-prefecture already exists.
Table separated-prefecture already exists.
Table unmarried-rate-pretecture already exists.
Table foreigner-population-prefecture already exists.
Table densely-populated-area-population-prefecture already exists.
Table densely-populated-area-prefecture already exists.
Table foreigner-prefecture already exists.
Table foreign-resident-prefecture already exists.
Table birth-prefecture already exists.
Table total-fertility-rate-prefecture already exists.
Table death-prefecture already exists.
Table death-pyramid-prefecture already exists.
Table age-adjusted-mortality-prefecture already exists.
Table stillbirth-prefecture 

  app.launch_new_instance()
100%|██████████| 1/1 [00:00<00:00, 1524.65it/s]


worker-prefectureを保存しました
Table unemployed-prefecture already exists.
Table non-labor-population-prefecture already exists.
Table co-worker-household-prefecture already exists.
Table employees-pyramid-prefecture already exists.
Table working-days-prefecture already exists.
Table starting-salary-prefecture already exists.
Table guests-prefecture already exists.
Table foreigner-guests-prefecture already exists.
Table garbage-discharged-prefecture already exists.
Table waste-processed-prefecture already exists.
Table city-planning-area-prefecture already exists.
Table use-area-prefecture already exists.
Table doctors-prefecture already exists.
Table dentist-prefecture already exists.
Table pharmacist-prefecture already exists.
Table nurse-prefecture already exists.
Table associate-nurse-prefecture already exists.
Table pharmacy-prefecture already exists.
Table welfare-facilities-elderly-prefecture already exists.
Table national-medical-expenses-prefecture already exists.
Table national-hea

# JSONに保存

## BigQueryからデータ取得

In [None]:
import pandas as pd

def get_gbq(table_id):
  project_id='primal-buttress-342908'
  dataset_id = 'contents'  
  query = f'SELECT * FROM `{project_id}.{dataset_id}.{table_id}`'
  
  return pd.read_gbq(query, project_id, dialect='standard')

In [None]:
df_test = get_gbq('total-population-city')
df_test

Unnamed: 0,statsDataId,statsDataName,categoryCode,categoryName,timeCode,timeName,areaCode,value,unit,index,prefCode,prefName,cityCode,cityName,bigCityFlag,areaName,governmentType,rankJapan,rankPref
0,0000020201,社会・人口統計体系,A1101,総人口,1980,1980年度,01100,1401757,人,0,1,北海道,01100,札幌市,2,札幌市,city,5,1
1,0000020201,社会・人口統計体系,A1101,総人口,1985,1985年度,01100,1542979,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
2,0000020201,社会・人口統計体系,A1101,総人口,1990,1990年度,01100,1671742,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
3,0000020201,社会・人口統計体系,A1101,総人口,1995,1995年度,01100,1757025,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
4,0000020201,社会・人口統計体系,A1101,総人口,2000,2000年度,01100,1822368,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30628,0000020201,社会・人口統計体系,A110101,総人口（男）,2015,2015年度,47382,1147,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1638,31
30629,0000020201,社会・人口統計体系,A110102,総人口（女）,2000,2000年度,47382,895,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1694,32
30630,0000020201,社会・人口統計体系,A110102,総人口（女）,2005,2005年度,47382,857,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1690,31
30631,0000020201,社会・人口統計体系,A110102,総人口（女）,2010,2010年度,47382,816,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1689,31


## 統計カード一覧

In [None]:
def get_statistics_cardlist():
  with open('/content/drive/MyDrive/statistics-hyogo/contents/statistics-cards.json') as j:
    cardlist = json.load(j)

  return cardlist

In [None]:
cardlist = get_statistics_cardlist()
print(cardlist)

[{'index': 0, 'cardId': 'total-population-prefecture', 'cardTitle': '都道府県の総人口', 'cardIndex': 1, 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1101', 'categoryName': '総人口', 'isSelect': 'TRUE', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110101', 'categoryName': '総人口（男）', 'isSelect': '', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110102', 'categoryName': '総人口（女）', 'isSelect': '', 'type': 'column', 'yAxis': 0}], 'estatParams': {'statsDataId': '0000010101', 'cdCat01': 'A1101,A110101,A110102'}}, {'index': 1, 'cardId': 'japanese-population-prefecture', 'cardTitle': '都道府県の日本人人口', 'cardIndex': '', 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1102', 'categoryName': '日本人人口', 'isSelect': 'TRUE', 'type': 

## 地域一覧

In [None]:
def get_arealist():
  with open('drive/MyDrive/statistics-hyogo/resas/arealist.json') as j:
    arealist = json.load(j)
    
  return arealist

In [None]:
arealist = get_arealist()
preflist = list(filter(lambda x: x['governmentType'] == 'prefecture', arealist))
print(preflist)

[{'index': 0, 'prefCode': 1, 'prefName': '北海道', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '01000', 'areaName': '北海道', 'governmentType': 'prefecture'}, {'index': 1, 'prefCode': 2, 'prefName': '青森県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '02000', 'areaName': '青森県', 'governmentType': 'prefecture'}, {'index': 2, 'prefCode': 3, 'prefName': '岩手県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '03000', 'areaName': '岩手県', 'governmentType': 'prefecture'}, {'index': 3, 'prefCode': 4, 'prefName': '宮城県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '04000', 'areaName': '宮城県', 'governmentType': 'prefecture'}, {'index': 4, 'prefCode': 5, 'prefName': '秋田県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '05000', 'areaName': '秋田県', 'governmentType': 'prefecture'}, {'index': 5, 'prefCode': 6, 'prefName': '山形県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode'

## jsonに保存

In [None]:
for card in cardlist:
  cardId = card['cardId']
  governmentType = card['governmentType']

  df = get_gbq(cardId)

  if governmentType == 'prefecture':

    # GCSへ保存
    gcs_path = "statistics-hyogo/card-data/{}.json".format(cardId)
    df.to_json(gcs_path, orient='records', force_ascii=False)

    print(f'{gcs_path}を保存しました')

  else:
    for pref in preflist:
      df_city = df.copy()
      df_city = df_city[df_city['prefCode'] == str(pref['prefCode'])]

      areacode = pref['areaCode']
      gcs_path = f'statistics-hyogo/card-data/{cardId}_{areacode}.json'
      df.to_json(gcs_path, orient='records', force_ascii=False)
      print(f'{gcs_path}を保存しました')

    # print(areacodes)
    # df = get_gbq(cardId)
    # # df_prefs = df[df['governmentType'] == 'prefecture']
    # print(df)
    # prefs = list(set(df_prefs['areaCode'].tolist()))
    # print(prefs)


statistics-hyogo/card-data/total-population-prefecture.jsonを保存しました
statistics-hyogo/card-data/japanese-population-prefecture.jsonを保存しました
statistics-hyogo/card-data/population-pyramid-prefecture.jsonを保存しました
statistics-hyogo/card-data/median-age-prefecture.jsonを保存しました
statistics-hyogo/card-data/population-age-prefecture .jsonを保存しました
statistics-hyogo/card-data/unmarried-prefecture.jsonを保存しました
statistics-hyogo/card-data/spouse-prefecture.jsonを保存しました
statistics-hyogo/card-data/bereavement-prefecture.jsonを保存しました
statistics-hyogo/card-data/separated-prefecture.jsonを保存しました
statistics-hyogo/card-data/unmarried-rate-pretecture.jsonを保存しました
statistics-hyogo/card-data/foreigner-population-prefecture.jsonを保存しました
statistics-hyogo/card-data/densely-populated-area-population-prefecture.jsonを保存しました
statistics-hyogo/card-data/densely-populated-area-prefecture.jsonを保存しました
statistics-hyogo/card-data/foreigner-prefecture.jsonを保存しました
statistics-hyogo/card-data/foreign-resident-prefecture.jsonを保存しました
statisti