In [None]:
import pandas as pd
import numpy as np
from math import radians
from scipy.spatial import cKDTree

# Load your CSV data
data = pd.read_csv('/content/sample_data.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Define proximity threshold in kilometers and convert it to radians
proximity_threshold_km = 1.0  # 1 km threshold
proximity_threshold_rad = proximity_threshold_km / 6371.0  # Convert km to radians (Earth's radius in km)

# Convert lat/lon to radians for vectorized Haversine computation
data['lat_rad'] = np.radians(data['lat'])
data['lon_rad'] = np.radians(data['lon'])

# Prepare coordinates for spatial tree
coords = np.vstack((data['lat_rad'], data['lon_rad'])).T

# Build a spatial tree using cKDTree for efficient querying
tree = cKDTree(coords)

# Query the tree for all vessel pairs within the proximity threshold
proximity_pairs = tree.query_pairs(proximity_threshold_rad)

# Create an empty list to store proximity events
proximity_events_vectorized = []

# Iterate over the proximity pairs
for pair in proximity_pairs:
    vessel_1 = data.iloc[pair[0]]
    vessel_2 = data.iloc[pair[1]]

    # Ensure that vessels have different MMSIs and are close in time
    if vessel_1['mmsi'] != vessel_2['mmsi']:
        time_difference = abs((vessel_1['timestamp'] - vessel_2['timestamp']).total_seconds())

        # You can adjust the time range for proximity (e.g., within 1 hour)
        if time_difference <= 3600:  # 1 hour in seconds
            proximity_events_vectorized.append({
                'mmsi': vessel_1['mmsi'],
                'vessel_proximity': vessel_2['mmsi'],
                'timestamp': vessel_1['timestamp']
            })

# Convert the proximity events to a DataFrame
proximity_df_vectorized = pd.DataFrame(proximity_events_vectorized)

# Output the first few proximity events
print(proximity_df_vectorized.head())


        mmsi  vessel_proximity                 timestamp
0  564780000         563014650 2023-03-22 18:17:16+00:00
1  564780000         352002300 2023-03-13 03:49:09+00:00
2  218719092         232006548 2023-03-15 09:58:20+00:00
3  563078430         565761000 2023-03-14 16:20:40+00:00
4  563014650         565761000 2023-03-04 21:42:55+00:00
