In [1]:
import re
import json
import os
import sys

parent_path = os.path.abspath('..')
sys.path.append(parent_path)
parent_path = os.path.abspath('../../')
sys.path.append(parent_path)
parent_path = os.path.abspath('../../../')
sys.path.append(parent_path)

from core.ShoreNet.conf import get_data_path

DATA_PATH = get_data_path()

# load .env file
project_path = os.path.abspath('../../')
dotenv_path = os.path.join(project_path, 'secret', '.env')

from dotenv import load_dotenv
load_dotenv(dotenv_path)

print(os.getenv("SISI_DB_TYPE"))

# load project variables
from core.ShoreNet.definitions.variables import VariablesManager

var = VariablesManager()
print(var.data_path)


mysql
dict_items([('ship_statics_path', 'D:\\data\\sisi\\statics'), ('output_path', 'output'), ('test_analyze_source_data_path', 'tests\\shared_data\\analyze\\source'), ('test_analyze_result_data_path', 'tests\\shared_data\\analyze\\result')])
D:\data\sisi\


In [2]:
# load dock polygons
from core.ShoreNet.events.generic.tools import load_dock_polygon

dock_polygon_list = load_dock_polygon(var.engine)
print(len(dock_polygon_list))

1518


In [3]:
import pandas as pd

from sqlalchemy import text

from core.ShoreNet.events.filter import clean_up_events
from core.ShoreNet.statics.filter import clean_up_statics
from core.ShoreNet.definitions.parameters import TableNames as tbn

# count all sail logging
stop_event_query = f"""
SELECT 
    mmsi, 
    begin_time, end_time, end_time - begin_time as duration, 
    begin_lng, begin_lat, point_num, event_categories, coal_dock_id
FROM 
    sisi.{tbn.all_stop_events_table_name} t
WHERE
    t.avg_speed < 1
"""

coal_static_query = f"""
SELECT
    mmsi, ship_name, ship_type, length, width, dwt
FROM
    sisi.{tbn.dim_ships_statics_table_name}
"""

events_df = pd.read_sql(
    sql=text(stop_event_query), con=var.engine
)

statics_df = pd.read_sql(
    sql=text(coal_static_query), con=var.engine
)
statics_df = clean_up_statics(statics_df)
print(statics_df.shape)

events_df = clean_up_events(
    df=events_df,
    var=var,
    mmsi_enum_list=statics_df['mmsi'].tolist()
)

(11935, 7)
original event data shape: (13642667, 9)
cleaned event data shape: (24909, 9)


In [4]:
from core.ShoreNet.events.dock import cluster_dock_polygon_dbscan

# find events without polygon and 
# dbscan cluster them
cleaned_evnet_df = cluster_dock_polygon_dbscan(
    events_df=events_df, var=var
)

# dbscan cluster result
print(f"events cluster count : {cleaned_evnet_df['cluster'].nunique()}")

with_polygon_dbscan_df = cleaned_evnet_df.loc[~cleaned_evnet_df['coal_dock_id'].isna()]
without_polygon_dbscan_df = cleaned_evnet_df.loc[cleaned_evnet_df['coal_dock_id'].isna()]
print(with_polygon_dbscan_df.shape, without_polygon_dbscan_df.shape, cleaned_evnet_df.shape)
print(f"Event with polygon percentage is {100 * with_polygon_dbscan_df.shape[0] / cleaned_evnet_df.shape[0]} %")

events cluster count : 5
(3131, 10) (9, 10) (3140, 10)
Event with polygon percentage is 99.71337579617834 %


In [5]:
# get departure & destination dock pairs
from core.ShoreNet.analyze.departure_destination_docks import map_dock_pairs
event_dd_df = map_dock_pairs(with_polygon_dbscan_df)
event_dd_df.describe()

Unnamed: 0,mmsi,departure_dock_id,departure_lng,departure_lat,destination_dock_id,destination_lng,destination_lat,sail_duration
count,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0
mean,413841400.0,247.521951,120.764283,31.736671,235.346341,120.737195,31.760156,7964337.0
std,60706.55,62.140778,0.569488,0.20162,52.557178,0.543785,0.174437,6492552.0
min,413592800.0,194.0,120.135,31.3486,194.0,120.135,31.3485,45052.0
25%,413803000.0,194.0,120.141,31.6805,194.0,120.163,31.681,2555702.0
50%,413830300.0,246.0,121.139,31.697,246.0,121.139,31.6969,6326833.0
75%,413860300.0,246.0,121.171,31.9198,246.0,121.17,31.9201,11725640.0
max,413998800.0,376.0,121.525,31.9258,376.0,121.525,31.9258,26529050.0
