# 事前準備

ライブラリのインストール

In [1]:
!pip install numexpr

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# 港湾統計マスタの前処理

## 港湾統計マスタのインポート

CSVファイルを指定して、港湾統計マスタをPandas Dataframeに変換する

データ件数（rows）が港湾統計マスタと一致していることを確認

In [10]:
import pandas as pd

def get_df_master(path):
  # csvファイルをインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])
  return df

In [11]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'

df_master = get_df_master(path)
df_master

Unnamed: 0,様式,調査年,調査月,調査港,申告者,調査票,整理番号,施設,公専,船舶内外,...,車両台数入,最初港,最終港,ｴﾗｰｺｰﾄﾞ,入港日,航路名,種類,種別,長さ,個数
0,A,2021,1,201,310,1,1,2350,2,2,...,0,,,0,3,31,,,,0
1,A,2021,1,202,320,1,1,1241,1,1,...,0,,,0,7,1,,,,0
2,A,2021,1,202,321,1,1,2308,2,1,...,0,,,0,7,2,,,,0
3,A,2021,1,202,322,1,1,2352,2,1,...,0,,,0,10,2,,,,0
4,A,2021,1,202,324,1,1,1212,1,1,...,0,,,0,8,1,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44822,A,2021,12,14,497,2,35,2488,2,2,...,0,,,0,18,31,,,,0
44823,A,2021,12,14,497,3,35,2488,2,1,...,0,,,0,27,2,,,,0
44824,A,2021,12,14,497,4,35,2488,2,1,...,0,,,0,3,2,,,,0
44825,A,2021,12,14,497,5,35,2488,2,1,...,0,,,0,,,,,,0


## 必要な列だけ抽出

CSVをインポートした後に、必要な列だけ抽出する



In [17]:
def get_df_master(path,columns):

  # csvのインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])

  # 抽出する列のリスト指定
  df = df[columns]

  return df

In [18]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','用途','船舶隻数','船舶総ﾄﾝ数','係留時間','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港']

df_master =  get_df_master(path,columns)
df_master

Unnamed: 0,調査港,施設,公専,用途,船舶隻数,船舶総ﾄﾝ数,係留時間,出荷内外,出荷品名,出荷ﾄﾝ数,出荷仕向港,入荷内外,入荷品名,入荷ﾄﾝ数,入荷仕出港
0,201,2350,2,T74,1,113502,23,,,0,,2,322,187122,2101015
1,202,1241,1,T84,0,0,3,,,0,,1,161,1600,28023
2,202,2308,2,T97,0,0,13,,,0,,1,481,713,33003
3,202,2352,2,T71,0,0,28,1,222,469,40002,,,0,
4,202,1212,1,t85,1,749,4,,,0,,1,281,1502,28014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44822,14,2488,2,T86,1,9995,24,,,0,,2,131,9940,5105998
44823,14,2488,2,3,1,8858,26,,,0,,1,191,12860,35032
44824,14,2488,2,T85,1,3118,5,1,281,3514,30998,,,0,
44825,14,2488,2,,0,0,0,1,281,2802,12998,,,0,


## 姫路港・東播磨港を抽出

調査港コード.csvをインポートして、合致するデータのみ抽出

In [45]:
def get_master(path,columns):

  # csvのインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])

  # 抽出する列のリスト指定
  df = df[columns]
  
  # 調査港をマージ
  df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/調査港コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df2, on='調査港', how='right')

  return df

In [46]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','用途','船舶隻数','船舶総ﾄﾝ数','係留時間','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港']

df_master =  get_master(path,columns)
df_master

Unnamed: 0,調査港,施設,公専,用途,船舶隻数,船舶総ﾄﾝ数,係留時間,出荷内外,出荷品名,出荷ﾄﾝ数,出荷仕向港,入荷内外,入荷品名,入荷ﾄﾝ数,入荷仕出港,港名,港コード,港区名
0,201,2350,2,T74,1,113502,23,,,0,,2,322,187122,2101015,姫路港,28002,白浜地区・東部工業港区
1,201,2350,2,T74,1,97897,21,,,0,,2,322,153997,1201998,姫路港,28002,白浜地区・東部工業港区
2,201,2350,2,T74,1,93410,24,,,0,,2,322,134949,8112998,姫路港,28002,白浜地区・東部工業港区
3,201,2350,2,T74,1,94446,20,,,0,,2,322,142597,1209002,姫路港,28002,白浜地区・東部工業港区
4,201,2350,2,T74,1,136739,21,,,0,,2,322,151294,2101022,姫路港,28002,白浜地区・東部工業港区
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33631,406,1182,1,T84,0,0,1,,,0,,1,161,1702,28023,東播磨港,28004,曽根港区
33632,406,1182,1,T84,0,0,2,,,0,,1,161,1203,28023,東播磨港,28004,曽根港区
33633,406,1182,1,T84,0,0,5,,,0,,1,161,1703,28023,東播磨港,28004,曽根港区
33634,406,1182,1,T84,0,0,5,,,0,,1,161,775,28023,東播磨港,28004,曽根港区


## 取扱貨物量のチェック

抽出したデータが間違っていないか、取扱貨物量でチェックしておく。

In [43]:
def get_cargo(df):
  
  df = df.astype({'出荷ﾄﾝ数': 'int','入荷ﾄﾝ数': 'int'})
  df = df.groupby(['港名']).agg({'出荷ﾄﾝ数': 'sum','入荷ﾄﾝ数': 'sum'}).reset_index()

  # 合計列の追加
  df['取扱貨物量'] = df['出荷ﾄﾝ数']+ df['入荷ﾄﾝ数']

  return df

In [47]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港','用途','船舶内外','船舶隻数','隻数A','隻数B','船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B','係留時間']

df_master =  get_df_master(path,columns)
df_cargo = get_cargo(df_master)
df_cargo

Unnamed: 0,港名,出荷ﾄﾝ数,入荷ﾄﾝ数,取扱貨物量
0,姫路港,6080581,23258033,29338614
1,東播磨港,10381485,24689767,35071252


# 播磨地域CNP

**前準備**
*   公共岸壁コード.csvの作成（播磨CNP対象岸壁）
*   専用岸壁コード.csvの作成（播磨CNP対象岸壁）




In [109]:
def get_harima(path,columns):

  # csvのインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)

  # 空白行を削除
  df = df.dropna(subset=['調査年'])

  # 抽出する列のリスト指定
  df = df[columns]
  
  # 調査港をマージ
  df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/調査港コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df2, on='調査港', how='right')

  # 公共岸壁・専用岸壁をマージ
  df_public = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/公共岸壁コード.csv', header=0, encoding="shift-jis",dtype=str)
  df_private = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/専用岸壁コード.csv', header=0, encoding="shift-jis",dtype=str)  
  df2 = pd.concat([df_public, df_private])
  df = pd.merge(df, df2, on=['施設','港名'], how='left')
  
  # 品名をマージ
  df1 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/品名コード.csv', header=0, encoding="shift-jis",dtype=str)
  df = pd.merge(df, df1, left_on='出荷品名', right_on='品名', how='left')

  return df

In [110]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'
columns = ['調査港','施設','公専','出荷内外','出荷品名','出荷ﾄﾝ数','出荷仕向港','入荷内外','入荷品名','入荷ﾄﾝ数','入荷仕出港','用途','船舶内外','船舶隻数','隻数A','隻数B','船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B','係留時間']

df_harima =  get_harima(path,columns)
df_harima


Unnamed: 0,調査港,施設,公専,出荷内外,出荷品名,出荷ﾄﾝ数,出荷仕向港,入荷内外,入荷品名,入荷ﾄﾝ数,...,総ﾄﾝ数B,係留時間,港名,港コード,港区名,施設名,地区名,品名,中分類,大分類
0,201,2350,2,,,0,,2,322,187122,...,0,23,姫路港,28002,白浜地区・東部工業港区,姫路ＬＮＧ施設,,,,
1,201,2350,2,,,0,,2,322,153997,...,0,21,姫路港,28002,白浜地区・東部工業港区,姫路ＬＮＧ施設,,,,
2,201,2350,2,,,0,,2,322,134949,...,0,24,姫路港,28002,白浜地区・東部工業港区,姫路ＬＮＧ施設,,,,
3,201,2350,2,,,0,,2,322,142597,...,0,20,姫路港,28002,白浜地区・東部工業港区,姫路ＬＮＧ施設,,,,
4,201,2350,2,,,0,,2,322,151294,...,0,21,姫路港,28002,白浜地区・東部工業港区,姫路ＬＮＧ施設,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33699,406,1182,1,,,0,,1,161,1702,...,499,1,東播磨港,28004,曽根港区,曽根公共物揚場(-4.0m),伊保地区,,,
33700,406,1182,1,,,0,,1,161,1203,...,410,2,東播磨港,28004,曽根港区,曽根公共物揚場(-4.0m),伊保地区,,,
33701,406,1182,1,,,0,,1,161,1703,...,499,5,東播磨港,28004,曽根港区,曽根公共物揚場(-4.0m),伊保地区,,,
33702,406,1182,1,,,0,,1,161,775,...,499,5,東播磨港,28004,曽根港区,曽根公共物揚場(-4.0m),伊保地区,,,


対象施設コード

In [None]:
# 抽出する港区コードのリスト指定
shisetsu_id = ['1285','1270','1280','1275','1279','1203','1207','1222','1213','1214','1215','1219','1208','1231', \
               '1232','1241','1242','1223','1212','1220','1221','1281','1250','1289','1269','1264','1263', \
               '1262','1258','1252','1251','1254','1255','1256','1153','1152','1154','1197','1178','1179', \
               '1159','1160','1170','1171','1199','1181','1182','1195','1155','1156','1157','1158','1198']

df_matster = df_matster[df_matster['施設'].isin(shisetsu_id)]
df_matster

Unnamed: 0,調査港,施設,公専,用途,船舶隻数,船舶総ﾄﾝ数,係留時間,出荷品名,出荷ﾄﾝ数,入荷品名,入荷ﾄﾝ数
785,202,1219,1,T71,1,4000,9,,,221,1000
846,402,1153,1,T71,,,4,,,162,800
847,402,1153,1,T71,,,5,,,162,700
848,402,1153,1,T71,,,5,,,162,700
849,402,1153,1,T71,,,5,,,161,1252
...,...,...,...,...,...,...,...,...,...,...,...
51717,405,1159,1,T84,,,3,,,161,1520
51718,405,1159,1,T84,,,2,,,161,1180
51719,405,1159,1,T84,,,3,,,161,1750
51720,406,1181,1,T84,,,2,,,161,1700


# 取扱貨物量

In [None]:
df_cargo =  df_matster.copy()

df_cargo = df_cargo.fillna('0')

# 品名と取扱貨物量に変換
df_cargo['品名'] = df_cargo.apply(lambda x: x['出荷品名'] if x['出荷ﾄﾝ数'] != '0' else x['入荷品名'], 1)
df_cargo['取扱貨物量'] = df_cargo.apply(lambda x: x['出荷ﾄﾝ数'] if x['出荷ﾄﾝ数'] != '0' else x['入荷ﾄﾝ数'], 1)

# 必要な列だけ抽出
columns = ['調査港','施設','品名','取扱貨物量']
df_cargo = df_cargo[columns]

# 取扱貨物量の集計
df_cargo = df_cargo.astype({'取扱貨物量': 'int'})
df_cargo = df_cargo.groupby(['施設','品名']).agg({'取扱貨物量': 'sum'}).reset_index()


# 品名をマージ
df1 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/品名コード.csv', header=0, encoding="shift-jis",dtype=str)
df_res = pd.merge(df_cargo, df1, on='品名', how='outer')

# 施設名をマージ
df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/施設コード.csv', header=0, encoding="shift-jis",dtype=str)
df_res = pd.merge(df_res, df2, on='施設', how='right')

# 欠損値を削除
df_res.dropna(subset=['取扱貨物量'], inplace=True)

# 集計
df_res = df_res.groupby(['港名','地区名']).agg({'取扱貨物量': 'sum'}).reset_index()


# CSVに書き出し
# df_res.to_csv('取扱貨物量.csv',encoding='cp932')

df_res

Unnamed: 0,港名,地区名,取扱貨物量
0,姫路港,中島地区,1395493.0
1,姫路港,吉美地区,450163.0
2,姫路港,広畑地区,67398.0
3,姫路港,浜田地区,75392.0
4,姫路港,須賀地区,416403.0
5,姫路港,飾磨地区,1437371.0
6,東播磨港,伊保地区,182494.0
7,東播磨港,別府地区,438013.0
8,東播磨港,曽根地区,362208.0
9,東播磨港,高砂地区,591060.0


# 係留船舶

In [None]:
import numpy as np
import pandas as pd

def get_ship(path):

  # 港湾統計マスタをインポート
  df = pd.read_csv(path, header=0, encoding="shift-jis",dtype=str)
  
  # 必要な列だけ抽出
  columns = ['調査港','施設','用途','公専','船舶内外','船舶隻数','隻数A','隻数B','船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B','係留時間']
  df = df[columns]

  # 欠損値を0に置換
  df = df.fillna('0')
  
  # 隻数のセット
  df = df.astype({'船舶隻数': 'int','隻数A': 'int','隻数B': 'int'})
  df['隻数'] = df[['船舶隻数','隻数A','隻数B']].sum(axis=1)

  # 総トン数のセット
  df = df.astype({'船舶総ﾄﾝ数': 'int','総ﾄﾝ数A': 'int','総ﾄﾝ数B': 'int'})
  df['総トン数'] = df[['船舶総ﾄﾝ数','総ﾄﾝ数A','総ﾄﾝ数B']].sum(axis=1)

  # 隻数が0の行を削除
  df['隻数'].replace(0, np.nan, inplace=True)
  df.dropna(subset=['隻数'], inplace=True)

  # # 用途のtをTに変換
  # df = df.replace('t71', 'T71')
  # df = df.replace('t85', 'T85')
  
  # # 船種をマージ
  # df1 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/用途コード.csv', header=0, encoding="shift-jis",dtype=str)
  # df = pd.merge(df, df1, on='用途', how='left')
  
  # # 施設名をマージ
  # df2 = pd.read_csv('/content/drive/MyDrive/港湾統計（兵庫県）/施設コード.csv', header=0, encoding="shift-jis",dtype=str)
  # df = pd.merge(df, df2, on='施設', how='right')
  
  # # # 欠損値を除外
  # # df =  df.dropna(subset=['船舶隻数'])

  # # 集計
  # df = df.groupby(['港名','地区名','船舶内外','船種']).agg({'船舶隻数': 'sum', '係留時間': 'mean', '総トン数': 'mean'}).reset_index()


  return df

In [None]:
path = '/content/drive/MyDrive/港湾統計（兵庫県）/港湾統計マスタ_2021.csv'

df = get_ship(path)

df = df.append(df.sum(numeric_only=True), ignore_index=True)
df

Unnamed: 0,調査港,施設,用途,公専,船舶内外,船舶隻数,隻数A,隻数B,船舶総ﾄﾝ数,総ﾄﾝ数A,総ﾄﾝ数B,係留時間,隻数,総トン数
0,201,2350,T74,2,2,1.0,0.0,0.0,113502.0,0.0,0.0,23,1.0,113502.0
1,202,1241,T84,1,1,0.0,0.0,1.0,0.0,0.0,480.0,3,1.0,480.0
2,202,2308,T97,2,1,0.0,1.0,0.0,0.0,19.0,0.0,13,1.0,19.0
3,202,2352,T71,2,1,0.0,0.0,1.0,0.0,0.0,376.0,28,1.0,376.0
4,202,1212,t85,1,1,1.0,0.0,0.0,749.0,0.0,0.0,4,1.0,749.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40674,14,2488,T86,2,2,1.0,0.0,0.0,9995.0,0.0,0.0,24,1.0,9995.0
40675,14,2488,3,2,1,1.0,0.0,0.0,8858.0,0.0,0.0,26,1.0,8858.0
40676,14,2488,T85,2,1,1.0,0.0,0.0,3118.0,0.0,0.0,5,1.0,3118.0
40677,14,2488,T85,2,2,1.0,0.0,0.0,3215.0,0.0,0.0,11,1.0,3215.0
