# 事前準備

ライブラリのインストール

In [1]:
!pip install numexpr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Googleドライブのマウント

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 港湾統計マスタの前処理

## 姫路港・東播磨港

**前準備**
*   調査港コード.csvの作成

In [46]:
def get_harima(path,columns):

  # csvのインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])

  # 抽出する列のリスト指定
  df = df[columns]
  
  # 調査港をマージ
  df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/調査港コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df2, on='調査港', how='right')

  return df

In [None]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','用途','船舶隻数','船舶総ﾄﾝ数','係留時間','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港']

df_master =  get_harima(path,columns)
df_master

## 播磨地域CNP

東播磨港のうち明石市・播磨町は対象外

**前準備**
*   公共岸壁コード.csvの作成（播磨CNP対象岸壁）
*   専用岸壁コード.csvの作成（播磨CNP対象岸壁）




In [53]:
def get_cnp(path,columns):

  # csvのインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])

  # 抽出する列のリスト指定
  df = df[columns]
  
  # 調査港をマージ
  df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/調査港コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df2, on='調査港', how='right')

  # 公共岸壁・専用岸壁をマージ
  df_public = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/公共岸壁コード.csv', header=0, encoding="shift-jis",dtype=str)
  df_private = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/専用岸壁コード.csv', header=0, encoding="shift-jis",dtype=str)  
  df2 = pd.concat([df_public, df_private])
  df = pd.merge(df, df2, on=['施設','港名'], how='left')
  
  return df

In [None]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港','用途','船舶内外','船舶隻数','隻数A','隻数B','船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B','係留時間']

df_cnp =  get_cnp(path,columns)
df_cnp


# 取扱貨物量

施設ごとに取扱貨物量（輸出入、移出入の内訳含む）を集計する

In [57]:
def get_cargo(df):

  # 整数値に変換
  df = df.astype({'入荷ﾄﾝ数': 'int','出荷ﾄﾝ数': 'int'})

  # 輸出入・移出入に区分
  df['輸入量'] = df.apply(lambda x: x['入荷ﾄﾝ数'] if x['入荷内外'] == '2' else 0, 1)
  df['移入量'] = df.apply(lambda x: x['入荷ﾄﾝ数'] if x['入荷内外'] == '1' else 0, 1)
  df['輸出量'] = df.apply(lambda x: x['出荷ﾄﾝ数'] if x['出荷内外'] == '2' else 0, 1)
  df['移出量'] = df.apply(lambda x: x['出荷ﾄﾝ数'] if x['出荷内外'] == '1' else 0, 1)
  
  # 品名をマージ
  df['品名'] = df.apply(lambda x: x['出荷品名'] if x['出荷ﾄﾝ数'] != 0 else x['入荷品名'], 1)
  df1 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/品名コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df1, on='品名', how='left')

  # 集計  
  df = df.groupby(['施設','施設名','中分類']).agg({'輸入量': 'sum','移入量': 'sum', '輸出量': 'sum','移出量': 'sum' }).reset_index()

  # 合計列の追加
  df['取扱貨物量'] = df['輸入量'] + df['移入量'] +  df['輸出量'] + df['移出量']

  return df

In [58]:
df_cargo = get_cargo(df_cnp)
df_cargo

Unnamed: 0,施設,施設名,中分類,輸入量,移入量,輸出量,移出量,取扱貨物量
0,1153,別府公共物揚場（砂揚場）(-4.0m),石材,0,31300,0,0,31300
1,1153,別府公共物揚場（砂揚場）(-4.0m),砂利・砂,0,406713,0,0,406713
2,1156,高砂公共物揚場(-3.5m),水産品,0,19,0,0,19
3,1156,高砂公共物揚場(-3.5m),砂利・砂,0,375975,0,0,375975
4,1157,高砂公共岸壁(-5.5m),セメント,0,104777,0,0,104777
...,...,...,...,...,...,...,...,...
232,2341,シェルジャパン施設,石油製品,300,11990,1995,4370,18655
233,2350,姫路ＬＮＧ施設,ＬＮＧ（液化天然ガス）,13749118,0,0,0,13749118
234,2351,大阪ガス副桟橋,ＬＮＧ（液化天然ガス）,0,0,0,268520,268520
235,2351,大阪ガス副桟橋,ＬＰＧ（液化石油ガス）,0,84403,0,0,84403


# 係留船舶

In [None]:
import numpy as np
import pandas as pd

def get_ship(path):

  # 港湾統計マスタをインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)
  
  # 必要な列だけ抽出
  columns = ['調査港','施設','用途','公専','船舶内外','船舶隻数','隻数A','隻数B','船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B','係留時間']
  df = df[columns]

  # 欠損値を0に置換
  df = df.fillna('0')
  
  # 隻数のセット
  df = df.astype({'船舶隻数': 'int','隻数A': 'int','隻数B': 'int'})
  df['隻数'] = df[['船舶隻数','隻数A','隻数B']].sum(axis=1)

  # 総トン数のセット
  df = df.astype({'船舶総ﾄﾝ数': 'int','総ﾄﾝ数A': 'int','総ﾄﾝ数B': 'int'})
  df['総トン数'] = df[['船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B']].sum(axis=1)

  # 隻数が0の行を削除
  df['隻数'].replace(0, np.nan, inplace=True)
  df.dropna(subset=['隻数'], inplace=True)

  # # 用途のtをTに変換
  # df = df.replace('t71', 'T71')
  # df = df.replace('t85', 'T85')
  
  # # 船種をマージ
  # df1 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/用途コード.csv', header=0, encoding="shift-jis",dtype=str)
  # df = pd.merge(df, df1, on='用途', how='left')
  
  # # 施設名をマージ
  # df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/施設コード.csv', header=0, encoding="shift-jis",dtype=str)
  # df = pd.merge(df, df2, on='施設', how='right')
  
  # # # 欠損値を除外
  # # df =  df.dropna(subset=['船舶隻数'])

  # # 集計
  # df = df.groupby(['港名','地区名','船舶内外','船種']).agg({'船舶隻数': 'sum', '係留時間': 'mean', '総トン数': 'mean'}).reset_index()


  return df

In [None]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'

df = get_ship(path)

df = df.append(df.sum(numeric_only=True), ignore_index=True)
df

Unnamed: 0,調査港,施設,用途,公専,船舶内外,船舶隻数,隻数A,隻数B,船舶総ﾄﾝ数,総ﾄﾝ数A,総ﾄﾝ数B,係留時間,隻数,総トン数
0,201,2350,T74,2,2,1.0,0.0,0.0,113502.0,0.0,0.0,23,1.0,113502.0
1,202,1241,T84,1,1,0.0,0.0,1.0,0.0,0.0,480.0,3,1.0,480.0
2,202,2308,T97,2,1,0.0,1.0,0.0,0.0,19.0,0.0,13,1.0,19.0
3,202,2352,T71,2,1,0.0,0.0,1.0,0.0,0.0,376.0,28,1.0,376.0
4,202,1212,t85,1,1,1.0,0.0,0.0,749.0,0.0,0.0,4,1.0,749.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40674,14,2488,T86,2,2,1.0,0.0,0.0,9995.0,0.0,0.0,24,1.0,9995.0
40675,14,2488,3,2,1,1.0,0.0,0.0,8858.0,0.0,0.0,26,1.0,8858.0
40676,14,2488,T85,2,1,1.0,0.0,0.0,3118.0,0.0,0.0,5,1.0,3118.0
40677,14,2488,T85,2,2,1.0,0.0,0.0,3215.0,0.0,0.0,11,1.0,3215.0
