In [1]:
pip install plotly_express

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import cos, asin, sqrt
import geopandas as gpd
import plotly_express as px
from shapely.geometry import Polygon
pd.options.mode.chained_assignment = None

In [3]:
# Path to the directory
path = r"C:\Users\firoj\OneDrive\Desktop\thesis_test\Thesis_test_01"
os.chdir(path)

In [4]:
def read_and_prepare_geofences(path_poi):
    """
    Reads and prepares geofences from a given CSV file.

    Parameters:
        path_poi (str): Path to the 'point_of_interest.csv' file.

    Returns:
        tuple: a tuple containing:
            - geo_list (list): List of GeoDataFrames.
            - geo_name (list): List of location names.
            - name_to_id (dict): Dictionary mapping location names to IDs.
    """

    # Reading POI file
    POI_df = pd.read_csv(path_poi)
    # Assuming 'geofence_extended' is a column with the path to a file
    # that can be read by gpd.read_file()

    lagen_name = []
    lagen_araylist = []
    name_to_id = {}

    for index, row in POI_df.iterrows():
        _lagen = row['name']
        df_lagen = gpd.read_file(row['geofence_extended'])

        lagen_name.append(_lagen)
        lagen_araylist.append(df_lagen)
        name_to_id[_lagen] = row['id']  # Mapping name to ID

    geo_empty = pd.DataFrame()

    geo_list = [geo_empty] + lagen_araylist
    geo_name = ["Empty"] + lagen_name

    return geo_list, geo_name, name_to_id

In [5]:
def extract_geofence_data_for_vessel(df_weder, vessel_mmsi, geo_list):
    """
    Extracts geofence data for a specific vessel from the provided DataFrame.

    Parameters:
        df_weder (pd.DataFrame): Filtered AIS DataFrame.
        vessel_mmsi (int): MMSI of the vessel to extract data for.
        geo_list (list): List of GeoDataFrames representing geofences.

    Returns:
        pd.DataFrame: DataFrame containing geofence data for the specified vessel.
    """

    df_s = df_weder[df_weder["vessel_mmsi"] == vessel_mmsi]
    df_s["ais_timestamp"] = pd.to_datetime(df_s["timestamp"]) - pd.to_timedelta(df_s['time_elapsed'], unit='m')
    df_s = df_s.set_index("ais_timestamp", drop=True)
    df_s = df_s.sort_index()

    # Create geodataframe from original one
    gdf = gpd.GeoDataFrame(df_s, geometry=gpd.points_from_xy(df_s.lon, df_s.lat))
    gdf = gdf.reset_index()

    gdf_ls = []

    # Check if each point lies within any geofence
    for geofence_idx in range(1, len(geo_list)):  # Start from 1 to skip the 'Empty' geofence
        mask1 = geo_list[geofence_idx].loc[0, 'geometry']
        pip_mask_geofence = gdf.within(mask1)
        gdf.loc[:, 'geofence'] = pip_mask_geofence

        gdf_extract = gdf[gdf.geofence == True]
        gdf_extract.geofence = geofence_idx
        gdf_ls.append(gdf_extract)

    result = pd.concat(gdf_ls)
    df_s = df_s.reset_index()
    df_geo = pd.merge(result, df_s, on=list(df_s.columns), how='right')
    df_geo = df_geo.set_index("ais_timestamp", drop=True)
    df_geo = df_geo.sort_index()

    return df_geo

In [6]:
def split_records(df, diff_list):
    """
    Splits the DataFrame based on the indices provided in diff_list.

    Parameters:
        df (pd.DataFrame): DataFrame to be split.
        diff_list (list): List of indices where the DataFrame should be split.

    Returns:
        list: List of split DataFrames.
    """

    segments = []
    start_idx = 0

    for end_idx in diff_list:
        segment = df.iloc[start_idx:end_idx]
        segments.append(segment)
        start_idx = end_idx

    # Appending the last segment
    segments.append(df.iloc[start_idx:])

    return segments

In [7]:
def split_and_save_data(df_geo, geo_name, vessel_mmsi, name_to_id):
    """
    Splits the DataFrame based on geofence entries and exits, then saves the data to CSV files.

    Parameters:
        df_geo (pd.DataFrame): DataFrame containing geofence data.
        geo_name (list): List of geofence names.
        vessel_mmsi (int): MMSI of the vessel.
        name_to_id (dict): Dictionary mapping geofence names to their corresponding IDs.

    Returns:
        list: List of information about the segments saved.
    """

    df_geo["before"] = df_geo["geofence"].shift(1)
    df_geo["after"] = df_geo["geofence"].shift(-1)
    df_geo['diffs'] = df_geo['geofence'].fillna(500).diff()

    dif_lt = [index for index, value in enumerate(df_geo['diffs']) if value != 0]
    itg = split_records(df_geo, dif_lt)  # Assumes split_records is defined elsewhere
    itg_list = []

    for itg_idx, segment in enumerate(itg):
        if segment.shape[0] > 0:
            con_start = pd.isna(segment['before'].iloc[0]) and pd.isna(segment['after'].iloc[0])
            con_end = pd.isna(segment['before'].iloc[-1]) and pd.isna(segment['after'].iloc[-1])

            if (con_start or con_end) and (segment['geofence'].isnull().values.any()):
                continue
            else:
                if segment['geofence'].isnull().values.any():
                    # Handle transitions between geofences
                    geofence_start_name = geo_name[int(segment['before'].iloc[0])] if pd.notna(segment['before'].iloc[0]) else 'Unknown'
                    geofence_end_name = geo_name[int(segment['after'].iloc[-1])] if pd.notna(segment['after'].iloc[-1]) else 'Unknown'
                    geofence_start_id = name_to_id.get(geofence_start_name, 'Unknown')
                    geofence_end_id = name_to_id.get(geofence_end_name, 'Unknown')
                    file_name = f"{geofence_start_id}-{geofence_end_id}_{vessel_mmsi}_{itg_idx}.csv"
                else:
                    # Handle presence within a single geofence
                    geofence_value = segment['geofence'].iloc[0]
                    if pd.notna(geofence_value):  # Check if geofence_value is not NaN
                        geofence_name = geo_name[int(geofence_value)]
                        geofence_id = name_to_id.get(geofence_name, 'Unknown')
                        file_name = f"{geofence_id}_{vessel_mmsi}_{itg_idx}.csv"
                    else:
                        # Handle NaN case - skip this iteration
                        continue

                # Construct file path
                file_path = os.path.join(r"C:\Users\firoj\OneDrive\Desktop\thesis_test\Thesis_test_01\Trips", file_name)
                # Create the directory if it doesn't exist
                directory = os.path.dirname(file_path)
                if not os.path.exists(directory):
                    os.makedirs(directory)

                # Save the segment to CSV
                segment.to_csv(file_path, sep=',')
                # Collect information about the segment
                data_list = [segment['vessel_mmsi'].iloc[0], segment['vessel_name'].iloc[0], segment['geofence'].iloc[0], segment.index[0], segment.index[-1]]
                itg_list.append(data_list)

    return itg_list

In [8]:
def main():
    """Main routine to process the data for all vessels."""
    # Read the AIS data
    path = r"df_weser.csv"
    AIS_df = pd.read_csv(path, sep=",")

    # Path to the 'point_of_interest.csv' file
    path_poi = r"point_of_interest_updated_short.csv"
    # Read and prepare geofences, getting the name to ID mapping as well
    geo_list, geo_name, name_to_id = read_and_prepare_geofences(path_poi)

    # Get the list of unique vessels in AIS_df
    unique_vessels = AIS_df['vessel_mmsi'].unique()
    all_itg_lists = []

    for vessel_mmsi in unique_vessels:
        # Filter data for each vessel
        df_weder = AIS_df[AIS_df['vessel_mmsi'] == vessel_mmsi]

        # Extract geofence data for this vessel
        df_geo = extract_geofence_data_for_vessel(df_weder, vessel_mmsi, geo_list)
        # Split the data into segments and save to CSV files
        itg_list = split_and_save_data(df_geo, geo_name, vessel_mmsi, name_to_id)
        # Extend the all_itg_lists with the information from itg_list
        all_itg_lists.extend(itg_list)

    # Print the collected information about all segments
    print(all_itg_lists)

# Make sure the required functions are defined before calling main
if __name__ == "__main__":
    main()

[[211513440.0, 'NABUCCO', 12.0, Timestamp('2023-05-03 08:38:01'), Timestamp('2023-05-03 08:38:01')], [211513440.0, 'NABUCCO', nan, Timestamp('2023-05-03 08:41:01'), Timestamp('2023-05-03 09:02:01')], [211513440.0, 'NABUCCO', 11.0, Timestamp('2023-05-03 09:05:01'), Timestamp('2023-05-03 13:41:02')], [211513440.0, 'NABUCCO', nan, Timestamp('2023-05-03 13:44:01'), Timestamp('2023-05-03 14:27:01')], [211513440.0, 'NABUCCO', 12.0, Timestamp('2023-05-03 14:29:01'), Timestamp('2023-05-03 14:32:01')], [211513440.0, 'NABUCCO', nan, Timestamp('2023-05-03 14:35:01'), Timestamp('2023-05-25 11:38:01')], [211513440.0, 'NABUCCO', 11.0, Timestamp('2023-05-25 11:41:01'), Timestamp('2023-05-25 13:08:01')], [211513440.0, 'NABUCCO', nan, Timestamp('2023-05-25 13:11:01'), Timestamp('2023-05-25 13:53:01')], [211513440.0, 'NABUCCO', 12.0, Timestamp('2023-05-25 13:56:01'), Timestamp('2023-05-25 13:56:01')], [211513440.0, 'NABUCCO', nan, Timestamp('2023-05-25 13:59:01'), Timestamp('2023-05-31 11:14:01')], [211