In [1]:
import os, sys, json
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
from folium.plugins import FastMarkerCluster
from ipywidgets import interact
from tqdm import tqdm

sys.path.append(os.path.join(os.path.abspath(''), "../"))
from greenferries.db import get_connection

In [2]:
import warnings
warnings.filterwarnings('ignore')

# define dataframes

## df_ships_www

In [3]:
df_ships_www = pd.read_sql_query("""
    SELECT ships.imo, ships.name, companies.country AS company_country, companies.name AS company_name
    FROM ships
    INNER JOIN companies ON companies.slug = ships.company_slug
    ORDER BY companies.country, companies.name, ships.name
""", get_connection("www"), dtype={"imo": int})
df_ships_www

Unnamed: 0,imo,name,company_country,company_name
0,9223796,Gotland,AU,Destination Gotland
1,9328015,HSC Gotlandia II,AU,Destination Gotland
2,7802794,M/S Gute,AU,Destination Gotland
3,9223784,M/S Visby,AU,Destination Gotland
4,9763655,Visborg,AU,Destination Gotland
...,...,...,...,...
535,8420878,WIND STAR,US,Windstar Cruises
536,8700785,WIND SURF,US,Windstar Cruises
537,9050137,ORIANA,ZH,Astro Ocean
538,9482902,MV Finlaggan,gb,Caledonian MacBrayne


## df_ferry_terminals

In [4]:
df_ferry_terminals = gpd.read_file("ferry_terminals_seashore.with_geonames_ids.geojson")

def tag_to_name(tag):
    for key in ["name", "addr:city"]:
        if key in tag:
            return str(tag[key])
    return ""

df_ferry_terminals["name"] = df_ferry_terminals.apply(lambda row: tag_to_name(row["tags"]), axis=1).astype("string")

df_ferry_terminals = gpd.GeoDataFrame(
    df_ferry_terminals, 
    geometry=gpd.points_from_xy(df_ferry_terminals.lon, df_ferry_terminals.lat),
    crs="EPSG:4326"
).drop(columns=["lat", "lon", "type", "tags"])

df_ferry_terminals


Unnamed: 0,id,min_distance_to_coastline_km,country_code,city_name,city_geonames_id,geometry,name
0,295294,7,SE,Norrkrog,3327910,POINT (16.59929 58.44200),
1,2383584,0,GB,Yarmouth,2633414,POINT (-1.50064 50.70651),Yarmouth Quay
2,5530966,0,SE,Köpmanholm,2699778,POINT (18.93209 59.65987),Köpmanholm
3,5530979,0,SE,Furusund,2713372,POINT (18.92547 59.66308),Furusund
4,7142214,0,GB,Devonport,2651289,POINT (-4.18386 50.37577),
...,...,...,...,...,...,...,...
5584,11128509970,46,NL,Ballingbuur,2759461,POINT (5.75332 52.99318),Rufus
5585,11135534717,0,DE,Helgoland,3208547,POINT (7.89018 54.17825),
5586,11141512068,6,DK,Skælskør,2613694,POINT (11.29111 55.25372),Skjelskør V
5587,11148811652,39,DE,Wiesmoor,2809134,POINT (7.73426 53.41328),MS Wiesmoor


## df_trackpoints

In [5]:
df_trackpoints = pd.read_sql_query("""
    SELECT ships.imo, trackpoints.latitude, trackpoints.longitude, trackpoints.time
    FROM ships
    INNER JOIN trackpoints ON trackpoints.imo = ships.imo
""", get_connection("aishub"), dtype={"imo": int, "time": "datetime64[ns]"})

df_trackpoints = gpd.GeoDataFrame(
    df_trackpoints, 
    geometry=gpd.points_from_xy(df_trackpoints.longitude, df_trackpoints.latitude),
    crs="EPSG:4326"
).drop(columns=["latitude", "longitude"])

df_trackpoints = gpd.sjoin_nearest(
    df_trackpoints.to_crs(epsg=6933),
    df_ferry_terminals.add_prefix("terminal_").rename(columns={"terminal_geometry": "geometry"}).to_crs(epsg=6933),
    how="left",
    max_distance=(1 * 500),
    distance_col="distance_to_terminal"
).merge(
    df_ferry_terminals[["id", "geometry"]],
    how="left",
    suffixes=("", "_terminal"),
    left_on="terminal_id",
    right_on="id"
).drop(columns=["id"]) \
.to_crs(epsg=4326)

df_trackpoints

Unnamed: 0,imo,time,geometry,index_right,terminal_id,terminal_min_distance_to_coastline_km,terminal_country_code,terminal_city_name,terminal_city_geonames_id,terminal_name,distance_to_terminal,geometry_terminal
0,9150030,2023-08-18 10:22:57,POINT (12.69012 56.04302),24.0,1.540818e+07,1.0,SE,Helsingborg,2706767.0,Helsingborg,136.727281,POINT (12.69152 56.04331)
1,9233258,2023-08-18 10:21:31,POINT (19.08706 74.29201),,,,,,,,,
2,9586617,2023-08-18 10:22:51,POINT (7.98710 58.14299),3493.0,3.789991e+09,2.0,NO,Kristiansand,3149318.0,Kristiansand,145.355538,POINT (7.98590 58.14429)
3,9144421,2023-08-18 10:22:52,POINT (11.34047 54.64803),,,,,,,,,
4,9791028,2023-08-18 10:22:57,POINT (-1.10765 50.79291),1258.0,5.668430e+08,0.0,GB,Old Portsmouth,6458606.0,Portsmouth Car Ferry terminal,138.705514,POINT (-1.10621 50.79289)
...,...,...,...,...,...,...,...,...,...,...,...,...
64106,9217242,2023-09-02 13:32:55,POINT (12.65689 54.95234),,,,,,,,,
64107,9551363,2023-09-02 13:50:08,POINT (-1.98963 50.70734),2855.0,2.327213e+09,0.0,GB,Poole,2640101.0,Ro-Ro 2,84.158139,POINT (-1.99050 50.70745)
64108,7350090,2023-09-02 13:46:05,POINT (10.32260 42.81065),3610.0,4.205481e+09,0.0,IT,Portoferraio,3170094.0,FerryTerminal Mole 2,28.229840,POINT (10.32232 42.81072)
64109,9351505,2023-09-02 13:50:12,POINT (9.83909 41.96552),,,,,,,,,


## df_routes

In [6]:
MIN_STOPOVER_DURATION = pd.Timedelta(minutes=0)

class Ship(object):
    def __init__(self, imo, df_trackpoints):
        self.imo = imo
        self.df = df_trackpoints

    def get_next_stopover(self, from_index=0):
        df_from_index = self.df.iloc[from_index:]
        df_from_index_with_terminal = df_from_index[~df_from_index.terminal_id.isna()]
        if df_from_index_with_terminal.shape[0] == 0:
            return None
        start_index = df_from_index_with_terminal.index[0]
        terminal_id = self.df.iloc[start_index].terminal_id
        df_from_start_index = self.df.iloc[start_index:]
        df_from_start_index = df_from_start_index[df_from_start_index.terminal_id == terminal_id]
        end_index = start_index
        while end_index + 1 in df_from_start_index.index:
            end_index = end_index + 1
        duration = self.df.iloc[end_index].time - self.df.iloc[start_index].time
        if duration >= MIN_STOPOVER_DURATION:
            return {
                "start_index": start_index,
                "end_index": end_index,
                "terminal_id": terminal_id,
                "duration": duration
            }
        elif df_from_index.shape[0] > end_index + 1:
            return self.get_next_stopover(from_index=end_index + 1)
        else:
            return None

    def find_next_route(self, from_index=0):
        stopover_departure = self.get_next_stopover(from_index=from_index)
        if stopover_departure is None:
            return
        else:
            stopover_destination = self.get_next_stopover(from_index=stopover_departure["end_index"] + 1)
            if stopover_destination is None:
                return
            else:
                trip_duration = self.df.iloc[stopover_destination["start_index"]].time - self.df.iloc[stopover_departure["end_index"]].time
                return {
                    "imo": self.imo,
                    "departure_terminal_id": stopover_departure["terminal_id"],
                    "destination_terminal_id": stopover_destination["terminal_id"],
                    "trackpoints_start_index": stopover_departure["end_index"],
                    "trackpoints_end_index": stopover_destination["start_index"],
                    "departure_stopover_duration": stopover_departure["duration"],
                    "trip_duration": trip_duration
                }
                

    def route_is_valid(self, route):
        if route["departure_terminal_id"] == route["destination_terminal_id"]:
            return False
        if route["trip_duration"] < pd.Timedelta(minutes=10) or route["trip_duration"] > pd.Timedelta(hours=48):
            return False

        df_6933 = self.df.to_crs(epsg=6933)
        if df_6933.iloc[route["trackpoints_start_index"]].geometry.distance(df_6933.iloc[route["trackpoints_end_index"]].geometry) < 50 * 1000:
            return False
        return True
            
    
    def get_routes(self):
        self.raw_routes = []
        from_index = 0
        while from_index < self.df.shape[0]:
            route = self.find_next_route(from_index=from_index)
            if route is None:
                break
            else:
                if self.route_is_valid(route):
                    self.raw_routes.append(route)
                from_index = route["trackpoints_end_index"] + 1
        return self.raw_routes

def get_routes(imo):
    return Ship(imo, df_trackpoints[df_trackpoints.imo == imo].reset_index(drop=True)).get_routes()

# display(get_routes(8306486))
def flatten(l):
    return [item for sublist in l for item in sublist]

df_routes = pd.DataFrame(flatten([get_routes(imo) for imo in tqdm(df_trackpoints.imo.unique())]))
df_routes = df_routes.merge(df_ferry_terminals[["id", "country_code", "name", "city_name", "geometry"]].add_prefix("departure_terminal_"), how="left", left_on="departure_terminal_id", right_on="departure_terminal_id")
df_routes = df_routes.merge(df_ferry_terminals[["id", "country_code", "name", "city_name", "geometry"]].add_prefix("destination_terminal_"), how="left", left_on="destination_terminal_id", right_on="destination_terminal_id")
df_routes



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 316/316 [00:25<00:00, 12.41it/s]


Unnamed: 0,imo,departure_terminal_id,destination_terminal_id,trackpoints_start_index,trackpoints_end_index,departure_stopover_duration,trip_duration,departure_terminal_country_code,departure_terminal_name,departure_terminal_city_name,departure_terminal_geometry,destination_terminal_country_code,destination_terminal_name,destination_terminal_city_name,destination_terminal_geometry
0,9233258,2.779650e+08,2.166949e+09,84,119,0 days 04:59:52,1 days 10:59:00,NO,Hurtigruten Finnsnes,Finnsnes,POINT (17.97333 69.22904),NO,Brønnøysund hurtigbåtkai,Brønnøysund,POINT (12.21002 65.47501)
1,9233258,2.166949e+09,2.822786e+08,123,144,0 days 02:59:05,0 days 20:59:39,NO,Brønnøysund hurtigbåtkai,Brønnøysund,POINT (12.21002 65.47501),NO,Ålesund hurtigbåtkai Langevåg,Ålesund,POINT (6.15454 62.47004)
2,9233258,2.822786e+08,3.314844e+09,147,165,0 days 02:00:01,0 days 18:01:45,NO,Ålesund hurtigbåtkai Langevåg,Ålesund,POINT (6.15454 62.47004),NO,Bergen,Laksevåg,POINT (5.30657 60.39166)
3,9233258,7.510283e+07,5.817832e+09,250,262,0 days 03:00:03,0 days 12:01:52,NO,Hurtigruten Stokmarknes,Stokmarknes,POINT (14.91173 68.57101),NO,Tromsø Hurtigruten,Tromsø,POINT (18.96029 69.64654)
4,9586617,3.789991e+09,1.117218e+09,0,1,0 days 00:00:00,0 days 07:54:12,NO,Kristiansand,Kristiansand,POINT (7.98590 58.14429),DK,Hirtshals,Hirtshals,POINT (9.97559 57.59589)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2214,7633143,8.107484e+08,4.921431e+08,1,6,0 days 00:00:00,0 days 04:50:43,DE,,Reetz,POINT (13.58849 54.48368),DK,Hurtigfærgerne,Rønne,POINT (14.69132 55.09977)
2215,7633143,8.107484e+08,4.921431e+08,8,11,0 days 00:00:00,0 days 10:46:38,DE,,Reetz,POINT (13.58849 54.48368),DK,Hurtigfærgerne,Rønne,POINT (14.69132 55.09977)
2216,9265419,4.551073e+09,2.298277e+09,17,33,0 days 04:42:01,1 days 18:17:48,FR,accès ferry maroc,Sète,POINT (3.70291 43.40024),ES,,Babel,POINT (-0.50122 38.32563)
2217,9265419,2.298277e+09,9.502703e+09,38,53,0 days 04:02:04,1 days 19:59:58,ES,,Babel,POINT (-0.50122 38.32563),FR,Porte 11,La Calade,POINT (5.34685 43.33484)


## df_ships (with trackpoints count and display_name)


In [7]:
df_ships = pd.merge(df_ships_www, df_trackpoints, on="imo", how="inner") \
    .groupby(["imo", "name", "company_country", "company_name"]).agg({"time": "count"}) \
    .rename(columns={"time": "trackpoints_count"}) \
    .reset_index() \
    .sort_values(["company_country", "company_name", "name"])

def display_name(ship):
    return f"{ship['company_country']} {ship['company_name']} - {ship['name']} ({ship['trackpoints_count']} trackpoints)"

df_ships["display_name"] = df_ships.apply(display_name, axis=1)
df_ships

Unnamed: 0,imo,name,company_country,company_name,trackpoints_count,display_name
206,9223796,Gotland,AU,Destination Gotland,21,AU Destination Gotland - Gotland (21 trackpoints)
21,7802794,M/S Gute,AU,Destination Gotland,94,AU Destination Gotland - M/S Gute (94 trackpoi...
205,9223784,M/S Visby,AU,Destination Gotland,106,AU Destination Gotland - M/S Visby (106 trackp...
305,9763655,Visborg,AU,Destination Gotland,154,AU Destination Gotland - Visborg (154 trackpoi...
27,7826788,MS Rigel II,AU,Ventouris Ferries,85,AU Ventouris Ferries - MS Rigel II (85 trackpo...
...,...,...,...,...,...,...
190,9214991,MS Ulysses,US,Irish Ferries,314,US Irish Ferries - MS Ulysses (314 trackpoints)
309,9809679,W. B. YEATS,US,Irish Ferries,210,US Irish Ferries - W. B. YEATS (210 trackpoints)
144,9141065,NORWEGIAN SPIRIT,US,Norwegian Cruise Line,56,US Norwegian Cruise Line - NORWEGIAN SPIRIT (5...
286,9482902,MV Finlaggan,gb,Caledonian MacBrayne,137,gb Caledonian MacBrayne - MV Finlaggan (137 tr...


# display maps

## display all ferry terminals

In [8]:
m = folium.Map(location=[51.0, -11.0], zoom_start=4)
FastMarkerCluster(
    df_ferry_terminals.geometry.apply(lambda p: [p.y, p.x]).values.tolist()
).add_to(m)
display(m)

## display all trackpoints of all ships

In [9]:
def fit_map_to_df_bounds(map, df):
    bounds = df.total_bounds
    map.fit_bounds([[bounds[1],bounds[0]], [bounds[3],bounds[2]]])

In [10]:
m = folium.Map(location=[0, 0], zoom_start=2)
for imo in df_trackpoints.imo.unique():
    trackpoints = df_trackpoints[df_trackpoints.imo == imo].geometry.apply(lambda p: [p.y, p.x]).values.tolist()
    ship = df_ships_www[df_ships_www.imo == imo]
    folium.PolyLine(trackpoints, tooltip=f"{ship.name} {ship.imo}").add_to(m)

fit_map_to_df_bounds(m, df_trackpoints)

m

## [interactive] display trackpoints of a single ship

In [11]:
def display_ship(display_name):
    m = folium.Map(location=[0, 0], zoom_start=2)
    ship = df_ships[df_ships.display_name == display_name].iloc[0]
    df_ship_trackpoints = df_trackpoints[df_trackpoints.imo == ship["imo"]]
    trackpoints = df_ship_trackpoints.geometry.apply(lambda p: [p.y, p.x]).values.tolist()
    folium.PolyLine(trackpoints).add_to(m)
    for ferry_row in df_ship_trackpoints.groupby("terminal_id").first().itertuples():
        folium.Marker(
            location=[ferry_row.geometry_terminal.y, ferry_row.geometry_terminal.x],
            tooltip=ferry_row.terminal_name
        ).add_to(m)
    for route in df_routes[df_routes.imo == ship["imo"]].itertuples():
        folium.PolyLine(
            [
                [route.departure_terminal_geometry.y, route.departure_terminal_geometry.x],
                [route.destination_terminal_geometry.y, route.destination_terminal_geometry.x]
            ],
            color="red"
        ).add_to(m)
    fit_map_to_df_bounds(m, df_ship_trackpoints)
    display(m)

interact(display_ship, display_name=list(df_ships.display_name.tolist()))

interactive(children=(Dropdown(description='display_name', options=('AU Destination Gotland - Gotland (21 trac…

<function __main__.display_ship(display_name)>

In [12]:
df = pd.DataFrame(
    [
        {"first_name": "John", "last_name": "Doe", "position": 1},
        {"first_name": "marc", "last_name": "Doe", "position": 2},
        {"first_name": "pat", "last_name": "Smith", "position": 3},
        {"first_name": "phil", "last_name": "Smith", "position": 4},
        {"first_name": "marcus", "last_name": "Doe", "position": 5},
    ]
)
df[df.last_name=="Smith"].iloc[0:].index[0]

2