This script is only for demo. You can modify the visualizing function to fit your own use.

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
train = pd.read_csv("../Datasets/ais_train.csv", sep="|")
train["time"] = pd.to_datetime(train["time"])

In [3]:
test = pd.read_csv("../Datasets/ais_test.csv", sep=",")
test["time"] = pd.to_datetime(test["time"])

In [4]:
eval = pd.read_csv("eval_predictions.csv", sep=",")
eval["time"] = pd.to_datetime(eval["time"])
print(eval.head())

   Unnamed: 0  latitude_predicted  longitude_predicted  latitude  longitude  \
0           0            8.172972            76.867065   8.15948   76.75688   
1           1          -29.605520            31.380581 -29.73632   31.32603   
2           2          -29.806127            31.123894 -29.81090   31.12018   
3           3          -29.806127            31.137304 -29.83246   31.08940   
4           4          -34.532867            18.298450 -34.44986   18.18804   

                   vesselId                       time  
0  61e9f38eb937134a3c4bfd8b 1970-01-01 00:28:25.082256  
1  61e9f38eb937134a3c4bfd8b 1970-01-01 00:28:26.243934  
2  61e9f38eb937134a3c4bfd8b 1970-01-01 00:28:26.253403  
3  61e9f38eb937134a3c4bfd8b 1970-01-01 00:28:26.254530  
4  61e9f38eb937134a3c4bfd8b 1970-01-01 00:28:26.526189  


In [180]:
predictions = pd.read_csv("predictions_transformer.csv", sep=",")
predictions["time"] = test["time"]
predictions["vesselId"] = test["vesselId"]
predictions["latitude"] = predictions["latitude_predicted"]
predictions["longitude"] = predictions["longitude_predicted"]
predictions = predictions.drop(columns=["latitude_predicted", "longitude_predicted"])

In [156]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go


def visualize_vessel_movements(training_data, predictions, latitude, longitude):
    """
    Visualize vessel movements on a map with lines and markers for each data point.

    Parameters:
    - training_data (pandas.DataFrame): A DataFrame with columns ['time', 'latitude', 'longitude', 'vesselId'] for training data.
    - predictions (pandas.DataFrame): A DataFrame with columns ['time', 'latitude', 'longitude', 'vesselId'] for prediction data.
    - latitude (str): The column name for latitude.
    - longitude (str): The column name for longitude.

    Returns:
    - A Plotly interactive figure.
    """
    # Ensure 'time' is in datetime format for better tooltip handling
    training_data["time"] = pd.to_datetime(training_data["time"])
    predictions["time"] = pd.to_datetime(predictions["time"])

    # Sorting the DataFrames by time to ensure lines are drawn correctly
    training_data = training_data.sort_values(by=["vesselId", "time"])
    predictions = predictions.sort_values(by=["vesselId", "time"])

    # Define colors for training and prediction data
    colors = {"Training": "blue", "Prediction": "red"}

    # Create the base map with training data lines and markers in blue
    fig = px.line_geo(
        training_data,
        lat=latitude,
        lon=longitude,
        color_discrete_sequence=[colors["Training"]],
        hover_name="vesselId",
        hover_data={"time": True, latitude: ":.3f", longitude: ":.3f"},
        projection="natural earth",
        title="Vessel Movements Over Time",
    )

    # Add markers for training data points
    fig.add_trace(
        go.Scattergeo(
            lon=training_data[longitude],
            lat=training_data[latitude],
            mode="markers",
            marker=dict(size=6, color=colors["Training"], opacity=0.7),
            name="Training Data",
            hoverinfo="text",
            text=training_data.apply(
                lambda row: f'ID: {row["vesselId"]}<br>Time: {row["time"]}<br>Lat: {row[latitude]:.3f}<br>Lon: {row[longitude]:.3f}',
                axis=1,
            ),
        )
    )

    # Add lines and markers for prediction data in red
    fig.add_trace(
        go.Scattergeo(
            lon=predictions[longitude],
            lat=predictions[latitude],
            mode="lines+markers",
            line=dict(color=colors["Prediction"], width=2),
            marker=dict(size=6, color=colors["Prediction"], opacity=0.8),
            name="Predictions",
            hoverinfo="text",
            text=predictions.apply(
                lambda row: f'ID: {row["vesselId"]}<br>Time: {row["time"]}<br>Lat: {row[latitude]:.3f}<br>Lon: {row[longitude]:.3f}',
                axis=1,
            ),
        )
    )

    # Enhancing map and layout details
    fig.update_geos(
        fitbounds="locations", showcountries=True, countrycolor="RebeccaPurple"
    )
    fig.update_layout(
        margin={"r": 0, "t": 30, "l": 0, "b": 0},
        title_font_size=20,
        legend_title_text="Data Type",
    )

    return fig

In [181]:
vessel_id_counts = train["vesselId"].value_counts()

# Sort the vessel IDs by frequency
sorted_vessel_ids = vessel_id_counts.sort_values(ascending=False)
filtered_vessel_ids = sorted_vessel_ids[
    sorted_vessel_ids.index.isin(predictions["vesselId"])
]
print(filtered_vessel_ids.tail(20))

vesselId
61e9f452b937134a3c4c01e9    939
61e9f422b937134a3c4c00c3    936
61e9f3b8b937134a3c4bfe87    897
61e9f40bb937134a3c4c002f    871
61e9f3d7b937134a3c4bff2d    856
61e9f426b937134a3c4c00d7    853
61e9f43cb937134a3c4c0167    823
61e9f402b937134a3c4c0009    799
61e9f40eb937134a3c4c003f    785
61e9f3acb937134a3c4bfe2d    776
61e9f435b937134a3c4c012d    763
61e9f3c1b937134a3c4bfea9    758
61e9f424b937134a3c4c00cd    697
61e9f3f1b937134a3c4bff97    626
61e9f457b937134a3c4c0205    617
61e9f45ab937134a3c4c021b    605
61e9f45fb937134a3c4c0239    509
61e9f47ab937134a3c4c02f9    508
6326eed6c46d6a20d22ca319    451
61e9f3bcb937134a3c4bfe91    328
Name: count, dtype: int64


In [184]:
vesselId = "61e9f426b937134a3c4c00d7"
fig = visualize_vessel_movements(

    train[train["vesselId"] == vesselId],
    predictions[predictions["vesselId"] == vesselId],
    "latitude",
    "longitude",
)
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

