# 実行環境の作成

## Google Drive

Google Driveをマウント

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Google Sheets

ライブラリのインストール

In [2]:
!pip install --upgrade -q gspread

[?25l[K     |████████                        | 10 kB 23.5 MB/s eta 0:00:01[K     |████████████████▏               | 20 kB 12.2 MB/s eta 0:00:01[K     |████████████████████████▎       | 30 kB 8.7 MB/s eta 0:00:01[K     |████████████████████████████████| 40 kB 1.5 MB/s 
[?25h

Google Sheetsの認証

In [3]:
from google.colab import auth
from google.auth import default
import gspread

auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

## Google Cloud Storage

下記コードでGCPに接続

In [4]:
from google.colab import auth
auth.authenticate_user()

認証に成功したらgcsfuseをインストール

In [None]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!apt-get -y -q update
!apt-get -y -q install gcsfuse

バケット「statistics-hyogo」をディレクトリ「statistics-hyogo」にマウント

In [6]:
! mkdir -p statistics-hyogo
! gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 statistics-hyogo statistics-hyogo

2022/11/25 23:31:15.208807 Start gcsfuse/0.41.8 (Go version go1.18.4) for app "" using mount point: /content/statistics-hyogo
2022/11/25 23:31:15.226840 Opening GCS connection...
2022/11/25 23:31:16.630397 Mounting file system "statistics-hyogo"...
2022/11/25 23:31:16.631229 File system has been successfully mounted.


# 統計カード

## カード一覧

In [7]:
import json

def get_statistics_cardlist():
  with open('/content/drive/MyDrive/statistics-hyogo/contents/statistics-cards.json') as j:
    cardlist = json.load(j)

  return cardlist

In [8]:
cardlist = get_statistics_cardlist()
print(cardlist)

[{'index': 0, 'cardId': 'total-population-prefecture', 'cardTitle': '都道府県の総人口', 'cardIndex': 1, 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1101', 'categoryName': '総人口', 'isSelect': 'TRUE', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110101', 'categoryName': '総人口（男）', 'isSelect': '', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110102', 'categoryName': '総人口（女）', 'isSelect': '', 'type': 'column', 'yAxis': 0}], 'estatParams': {'statsDataId': '0000010101', 'cdCat01': 'A1101,A110101,A110102'}}, {'index': 1, 'cardId': 'japanese-population-prefecture', 'cardTitle': '都道府県の日本人人口', 'cardIndex': '', 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1102', 'categoryName': '日本人人口', 'isSelect': 'TRUE', 'type': 

## 個別カード

In [9]:
def get_statistics_card(cardId):
  with open('/content/drive/MyDrive/statistics-hyogo/contents/statistics-cards.json') as j:
    cardlist = json.load(j)

    card = list(filter(lambda item : item['cardId'] == cardId, cardlist))
    return card[0]

In [10]:
card = get_statistics_card('total-population-prefecture')
print(card)

{'index': 0, 'cardId': 'total-population-prefecture', 'cardTitle': '都道府県の総人口', 'cardIndex': 1, 'governmentType': 'prefecture', 'menuId': 'population', 'menuTitle': '人口', 'fieldId': 'population', 'fieldTitle': '人口・世帯', 'chartComponent': 'TimeChart', 'categories': [{'categoryCode': 'A1101', 'categoryName': '総人口', 'isSelect': 'TRUE', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110101', 'categoryName': '総人口（男）', 'isSelect': '', 'type': 'column', 'yAxis': 0}, {'categoryCode': 'A110102', 'categoryName': '総人口（女）', 'isSelect': '', 'type': 'column', 'yAxis': 0}], 'estatParams': {'statsDataId': '0000010101', 'cdCat01': 'A1101,A110101,A110102'}}


# 地域

## 地域一覧

In [11]:
def get_arealist():
  with open('drive/MyDrive/statistics-hyogo/resas/arealist.json') as j:
    arealist = json.load(j)
    
  return arealist

In [12]:
arealist = get_arealist()
preflist = list(filter(lambda x: x['governmentType'] == 'prefecture', arealist))
print(preflist)

[{'index': 0, 'prefCode': 1, 'prefName': '北海道', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '01000', 'areaName': '北海道', 'governmentType': 'prefecture'}, {'index': 1, 'prefCode': 2, 'prefName': '青森県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '02000', 'areaName': '青森県', 'governmentType': 'prefecture'}, {'index': 2, 'prefCode': 3, 'prefName': '岩手県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '03000', 'areaName': '岩手県', 'governmentType': 'prefecture'}, {'index': 3, 'prefCode': 4, 'prefName': '宮城県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '04000', 'areaName': '宮城県', 'governmentType': 'prefecture'}, {'index': 4, 'prefCode': 5, 'prefName': '秋田県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '05000', 'areaName': '秋田県', 'governmentType': 'prefecture'}, {'index': 5, 'prefCode': 6, 'prefName': '山形県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode'

## 個別地域

In [13]:
def get_area(areaCode):
  with open('drive/MyDrive/statistics-hyogo/resas/arealist.json') as j:
    arealist = json.load(j)
    
    area = list(filter(lambda x: x['areaCode'] == areaCode, arealist))
    return area[0]

In [14]:
area = get_area('28000')
print(area)

{'index': 27, 'prefCode': 28, 'prefName': '兵庫県', 'cityCode': None, 'cityName': None, 'bigCityFlag': None, 'areaCode': '28000', 'areaName': '兵庫県', 'governmentType': 'prefecture'}


# Rechartsデータ作成

## BigQueryからデータ取得

In [15]:
import pandas as pd

def get_gbq(table_id):
  project_id='primal-buttress-342908'
  dataset_id = 'contents'  
  query = f'SELECT * FROM `{project_id}.{dataset_id}.{table_id}`'
  
  return pd.read_gbq(query, project_id, dialect='standard')

In [16]:
df = get_gbq('total-population-city')
df = df[df['areaCode'] == '01695']
df

Unnamed: 0,statsDataId,statsDataName,categoryCode,categoryName,timeCode,timeName,areaCode,value,unit,index,prefCode,prefName,cityCode,cityName,bigCityFlag,areaName,governmentType,rankJapan,rankPref


## rechartsのtimeChartに整形する関数

In [17]:
"""
rechartsのTimeChartデータを返す関数
引数はDataFrame（地域コードで絞り込み済）
"""
def get_dict_timechart(df):

  # categories（辞書型）の作成
  dict_categories = df.copy()[['categoryName',]].drop_duplicates(subset=['categoryName']).to_dict(orient='records')

  # 格納用のDataFrame
  df_res = pd.DataFrame(index=[], columns=['time'])

  # DataFrameの生成
  for c in dict_categories:
    categoryName = c['categoryName']
    df_c = df[df.copy()['categoryName'] == categoryName][['timeCode','value']].sort_values('timeCode').rename(columns={'value': categoryName, 'timeCode': 'time'})

    # 整数値へ変換
    value = df_c.head(1).iat[0, 1]
    if type(value) is str:
      df_c = df_c.astype({categoryName: int})
    elif value.is_integer():
      df_c = df_c.astype({categoryName: int})

    df_res = pd.merge(df_res, df_c,on='time', how='outer')

  return df_res.to_dict(orient='records')

In [18]:
"""
rechartsのPyramidChartデータを返す関数
引数はDataFrame（地域コードで絞り込み済）
"""
def get_dict_pyramidchart(df):

  # categories（辞書型）の作成
  df_categories = df.copy()[['categoryCode','categoryName']].drop_duplicates(subset=['categoryName'])
  df_categories['categoryName'] = df_categories['categoryName'].str.replace('（女）', '').str.replace('（男）', '')
  df_categories = df_categories.sort_values('categoryCode').drop_duplicates(subset=['categoryName'])[['categoryName']]
  dict_categories = df_categories.to_dict(orient='records')

  # 格納用のDataFrame
  df_res = pd.DataFrame(index=[], columns=['time','categoryName','man','woman'])

  # DataFrameの生成
  for c in dict_categories:
    categoryName = c['categoryName']
    
    # 男性
    df_man = df[df.copy()['categoryName'] == categoryName+'（男）'][['timeCode','value']].sort_values('timeCode').rename(columns={'value': 'man', 'timeCode': 'time'})
    df_man['categoryName'] = categoryName
    value = df_man.head(1).iat[0, 1]
    if type(value) is str:
      df_man = df_man.astype({'man': int})
    elif value.is_integer():
      df_man = df_man.astype({'man': int})

    # 女性
    df_woman = df[df.copy()['categoryName'] == categoryName+'（女）'][['timeCode','value']].sort_values('timeCode').rename(columns={'value': 'woman', 'timeCode': 'time'})
    df_woman['categoryName'] = categoryName
    value = df_woman.head(1).iat[0, 1]
    if type(value) is str:
      df_woman = df_woman.astype({'woman': int})  
    elif value.is_integer():
      df_woman = df_woman.astype({'woman': int})
    
    df_woman['woman'] = df_woman['woman']*-1

    # 男性と女性をマージ
    df_c = pd.merge(df_man, df_woman,on=['time','categoryName'], how='outer')

    df_res = df_res.append(df_c)


  return df_res.to_dict(orient='records')

In [19]:
def recharts_timedata(cardId,areaCode):

  # カード情報の取得
  card = get_statistics_card(cardId)

  # 地域情報の取得
  area = get_area(areaCode)

  # GBQからデータ取得
  df = get_gbq(cardId)
  df = df[df['areaCode'] == areaCode]

  if df.empty:
    return {}

  # cardTitle
  cardTitle = area['areaName'] + card['cardTitle'].replace('都道府県', '').replace('市区町村', '')

  # categories
  categories = card['categories']
  for i, c in enumerate(categories):
    df_c = df[df.copy()['categoryName'] == c['categoryName']][['unit']].head(1).iat[0, 0]
    categories[i]['unit'] = df_c
    c['categoryName'] = c['categoryName'].replace('（男）', '').replace('（女）', '')
  

  # recharsデータの作成
  chartComponent = card['chartComponent']  

  if chartComponent == 'TimeChart':
    chartData = get_dict_timechart(df)
  else:
    chartData = get_dict_pyramidchart(df)


  # timesの作成
  dict_times = df.copy()[['timeCode','timeName']].drop_duplicates(subset=['timeCode']).sort_values('timeCode').to_dict(orient='records')

  # 最新データの取得
  c = card['categories'][0]
  latestData = df[df.copy()['categoryCode'] == c['categoryCode']].sort_values('timeCode', ascending=False).head(1).to_dict(orient='records')[0]

  return {
      'cardTitle':cardTitle,
      'categories':categories,
      'times':dict_times,
      'infoTitle':latestData['timeName']+'の'+latestData['categoryName'],
      'infoData':"{:,}".format(int(latestData['value']))+' '+latestData['unit'],
      'infoRankJapan':'国内 第'+latestData['rankJapan']+'位' if latestData['rankJapan'] != None else  '' ,
      'infoRankPref': '県内 第'+latestData['rankPref']+'位' if latestData['governmentType'] == 'city' else  '' ,
      'sourceTitle':'政府統計の総合窓口e-Stat「'+latestData['statsDataName']+'」',
      'sourceURL':'https://www.e-stat.go.jp/dbview?sid='+latestData['statsDataId'],
      'chartData':chartData
  }

In [34]:
res = recharts_timedata('japanese-population-city','47382')
res

IndexError: ignored

# jsonに保存

In [30]:
import os

def save_gcs(cardId,areaCode):
  gcs_path = "statistics-hyogo/recharts/{}_{}.json".format(cardId,areaCode)

  if os.path.isfile(gcs_path):
    print(f'{gcs_path}はすでに存在しています')
    return
  else:
    res = recharts_timedata(cardId,areaCode)
    with open(gcs_path, 'w') as f:
      json.dump(res, f, indent=4, ensure_ascii=False)
      print(f'{gcs_path}を保存しました')
    return

In [32]:
import os

for card in cardlist:
  cardId = card['cardId']
  governmentType = card['governmentType']
  categories = card['categories']

  if governmentType == 'prefecture':
    preflist = list(filter(lambda x: x['governmentType'] == 'prefecture', arealist))
    for pref in preflist:
      areaCode = pref['areaCode']
      
      save_gcs(cardId,areaCode)

  else:
    citylist = list(filter(lambda x: x['governmentType'] == 'city', arealist))
    citylist = list(filter(lambda x: x['bigCityFlag'] != '1', citylist))
    for city in citylist:
      areaCode = city['areaCode']

      save_gcs(cardId,areaCode)


[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
statistics-hyogo/recharts/separated-prefecture_46000.jsonはすでに存在しています
statistics-hyogo/recharts/separated-prefecture_47000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_01000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_02000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_03000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_04000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_05000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_06000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_07000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_08000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_09000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_10000.jsonはすでに存在しています
statistics-hyogo/recharts/unmarried-rate-pretecture_11000.jsonはすでに存在しています
st

ValueError: ignored