In [1]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import ipywidgets as widgets
from IPython.display import display, clear_output
import threading
import time
from datetime import timedelta
import queue

# Set plotting style
%matplotlib inline

In [2]:
# Load the Parquet file
parquet_file = "ais_data_20240911.parquet"
df = pd.read_parquet(parquet_file)

# Ensure time column is in datetime format
df["time"] = pd.to_datetime(df["time"], errors="coerce")

# Sort by time to simulate a stream
df = df.sort_values("time")

# Create a queue to hold incoming data
data_queue = queue.Queue()

# Simulate a streaming function
def simulate_stream(data, delay=0.1):
    for _, row in data.iterrows():
        data_queue.put(row)
        time.sleep(delay)  # Simulate real-time delay (adjust as needed)

# Start the streaming in a separate thread
stream_thread = threading.Thread(target=simulate_stream, args=(df, 0.1))
stream_thread.daemon = True
stream_thread.start()

In [3]:
# Define the sliding window (e.g., last 5 minutes)
time_window = timedelta(minutes=5)

# Create a widget output for the map
output = widgets.Output()

# Store recent data
recent_data = []

# Function to update the map
def update_map(change=None):
    with output:
        clear_output(wait=True)
        
        # Get current time (simulated as the latest timestamp in the queue)
        if not data_queue.empty():
            while not data_queue.empty():
                new_row = data_queue.get()
                recent_data.append(new_row)
        
        if not recent_data:
            print("No data to plot yet...")
            return
        
        # Convert recent data to DataFrame
        recent_df = pd.DataFrame(recent_data)
        current_time = recent_df["time"].max()
        
        # Filter for the sliding window
        recent_df = recent_df[recent_df["time"] >= current_time - time_window]
        
        # Filter out rows with missing or invalid lat/lon
        valid_positions = recent_df.dropna(subset=["lat", "lon"]).copy()
        
        if not valid_positions.empty:
            # Calculate the center of the map (mean lat/lon)
            center_lat = valid_positions["lat"].mean()
            center_lon = valid_positions["lon"].mean()
            
            # Create a Folium map centered on the mean position
            ais_map = folium.Map(location=[center_lat, center_lon], zoom_start=10)
            
            # Add a marker cluster
            marker_cluster = MarkerCluster().add_to(ais_map)
            
            # Add markers for each vessel position
            for _, row in valid_positions.iterrows():
                popup_text = f"MMSI: {row['mmsi']}<br>Vessel: {row['vessel_name']}<br>Speed: {row['sog']} knots<br>Status: {row['nas']}"
                folium.Marker(
                    location=[row["lat"], row["lon"]],
                    popup=popup_text,
                    tooltip=row["vessel_name"]
                ).add_to(marker_cluster)
            
            # Display the map
            display(ais_map)
        else:
            print("No valid lat/lon data to plot in the current time window.")

# Create a button to manually refresh the map
button = widgets.Button(description="Refresh Map")
button.on_click(update_map)

# Set up a periodic update (every 5 seconds)
def periodic_update():
    while True:
        update_map()
        time.sleep(5)

# Start periodic updates in a separate thread
update_thread = threading.Thread(target=periodic_update)
update_thread.daemon = True
update_thread.start()

# Display the button and output
display(button)
display(output)

# Initial map update
update_map()