# 実行環境の作成

## Google Drive

Google Driveをマウント

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Google Sheets

ライブラリのインストール

In [2]:
!pip install --upgrade -q gspread

Google Sheetsの認証

In [3]:
from google.colab import auth
from google.auth import default
import gspread

auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

## Google Cloud Storage

下記コードでGCPに接続

In [4]:
from google.colab import auth
auth.authenticate_user()

認証に成功したらgcsfuseをインストール

In [None]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!apt-get -y -q update
!apt-get -y -q install gcsfuse

バケット「statistics-hyogo」をディレクトリ「statistics-hyogo」にマウント

In [None]:
! mkdir -p statistics-hyogo
! gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 statistics-hyogo statistics-hyogo

# 単体テスト

## BigQueryからテーブル取得

In [7]:
import pandas as pd

def get_gbq(table_id):
  project_id='primal-buttress-342908'
  dataset_id = 'contents'  
  query = f'SELECT * FROM `{project_id}.{dataset_id}.{table_id}`'
  
  return pd.read_gbq(query, project_id, dialect='standard')

In [8]:
df = get_gbq('total-population-city')
df

Unnamed: 0,statsDataId,statsDataName,categoryCode,categoryName,timeCode,timeName,areaCode,value,unit,index,prefCode,prefName,cityCode,cityName,bigCityFlag,areaName,governmentType,rankJapan,rankPref
0,0000020201,社会・人口統計体系,A1101,総人口,1980,1980年度,01100,1401757,人,0,1,北海道,01100,札幌市,2,札幌市,city,5,1
1,0000020201,社会・人口統計体系,A1101,総人口,1985,1985年度,01100,1542979,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
2,0000020201,社会・人口統計体系,A1101,総人口,1990,1990年度,01100,1671742,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
3,0000020201,社会・人口統計体系,A1101,総人口,1995,1995年度,01100,1757025,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
4,0000020201,社会・人口統計体系,A1101,総人口,2000,2000年度,01100,1822368,人,0,1,北海道,01100,札幌市,2,札幌市,city,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30628,0000020201,社会・人口統計体系,A110101,総人口（男）,2015,2015年度,47382,1147,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1638,31
30629,0000020201,社会・人口統計体系,A110102,総人口（女）,2000,2000年度,47382,895,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1694,32
30630,0000020201,社会・人口統計体系,A110102,総人口（女）,2005,2005年度,47382,857,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1690,31
30631,0000020201,社会・人口統計体系,A110102,総人口（女）,2010,2010年度,47382,816,人,1921,47,沖縄県,47382,与那国町,0,与那国町,city,1689,31


## 特定地域のランキングデータ作成

In [9]:
def get_rank_dataframe_card(areaCode,cardId):

  # BigQueryからデータ取得
  df = get_gbq(cardId)

  # 地域コードでフィルタ
  df = df[df['areaCode'] == areaCode]

  if df.empty:
    return

  # カテゴリは先頭
  category = list(set(df['categoryCode'].tolist()))[0]
  df = df[df['categoryCode'] == category]
  

  # 年次は最後
  time = list(set(df['timeCode'].tolist()))[-1]
  df = df[df['timeCode'] == time]
  
  # cardIdを追加
  df['cardId'] = cardId
  
  return df

統計カード一覧を取得

In [10]:
import json

def get_statistics_cardlist():
  with open('/content/drive/MyDrive/statistics-hyogo/contents/statistics-cards.json') as j:
    cardlist = json.load(j)

  return cardlist

In [11]:
def save_rank_dataframe(governmentType,areaCode):

  # ランキングのDataFrame
  df_res = pd.DataFrame(index=[], columns=[])

  # 統計カードリストを検索
  cardlist = get_statistics_cardlist()
  for card in cardlist:
    cardId = card['cardId']

    if governmentType in cardId.split('-'):
      df = get_rank_dataframe_card(areaCode,cardId)
      df_res = pd.concat([df_res, df])
  
  return df_res

In [None]:
# 地域コード指定
areaCode = '28100'
governmentType = 'city'

df = save_rank_dataframe(governmentType,areaCode)

from google.cloud import bigquery
from google.cloud.exceptions import NotFound

# BigQueryのテーブル定義
project_id='primal-buttress-342908'
dataset_id = 'ranking'
table_id= areaCode
client = bigquery.Client()

# カード情報をマージ
df_cardlist = pd.DataFrame(get_statistics_cardlist())
df_cardlist = df_cardlist[['cardId','cardTitle','menuId','menuTitle','fieldId','fieldTitle']]
df = pd.merge(df, df_cardlist, on='cardId', how='left')

# BigQueryに保存
df.to_gbq(f'{dataset_id}.{table_id}', project_id=project_id, if_exists="replace")
print(f'{table_id}を保存しました')

1it [00:06,  6.04s/it]

28100を保存しました





# 全地域で繰り返し処理

In [13]:
import json

def get_arealist():
  with open('/content/drive/MyDrive/statistics-hyogo/resas/arealist.json') as j:
    arealist = json.load(j)

  return arealist

In [None]:
from google.cloud import bigquery
from google.cloud.exceptions import NotFound

# BigQueryのテーブル定義
project_id='primal-buttress-342908'
dataset_id = 'ranking'

client = bigquery.Client()
arealist = get_arealist()

# 繰り返し処理
for area in arealist:
  areaCode = area['areaCode']
  governmentType = area['governmentType']

  table_id = f'{project_id}.{dataset_id}.{areaCode}'

  try:
    client.get_table(table_id)  
    print("Table {} already exists.".format(table_id))
  except NotFound:

    # ランキングのDataFrameを作成
    df = save_rank_dataframe(governmentType,areaCode)

    # カード情報をマージ
    df_cardlist = pd.DataFrame(get_statistics_cardlist())
    df_cardlist = df_cardlist[['cardId','cardTitle','menuId','menuTitle','fieldId','fieldTitle']]
    df = pd.merge(df, df_cardlist, on='cardId', how='left')

    # BigQueryに保存
    df.to_gbq(f'{dataset_id}.{table_id}', project_id=project_id, if_exists="replace")
    print(f'{table_id}を保存しました')


Table primal-buttress-342908.ranking.01100 already exists.
Table primal-buttress-342908.ranking.01101 already exists.
Table primal-buttress-342908.ranking.01102 already exists.
Table primal-buttress-342908.ranking.01103 already exists.
Table primal-buttress-342908.ranking.01104 already exists.
Table primal-buttress-342908.ranking.01105 already exists.
Table primal-buttress-342908.ranking.01106 already exists.
Table primal-buttress-342908.ranking.01107 already exists.
Table primal-buttress-342908.ranking.01108 already exists.
Table primal-buttress-342908.ranking.01109 already exists.
Table primal-buttress-342908.ranking.01110 already exists.
Table primal-buttress-342908.ranking.01202 already exists.
Table primal-buttress-342908.ranking.01203 already exists.
Table primal-buttress-342908.ranking.01204 already exists.
Table primal-buttress-342908.ranking.01205 already exists.
Table primal-buttress-342908.ranking.01206 already exists.
Table primal-buttress-342908.ranking.01207 already exist