In [None]:
# Importing required libraries

import pandas as pd

# Geopandas for working with geospatial data.
import geopandas as gpd

# Provides the Point class to create point geometries.
from shapely.geometry import Point
import math

# Function for Haversine formula :Calculating Distance(kms) between two points on the Earth using their latitude and longitude.

# lat1, lon1: Latitude and Longitude of one point
# lat2, lon2: Latitude and Longitude of another point

def haversine(lat1, lon1, lat2, lon2):
    # dLat, dLog: Difference in latitude and longitude in radians.
    dLat = (lat2 - lat1) * math.pi / 180.0
    dLon = (lon2 - lon1) * math.pi / 180.0
    lat1 = lat1 * math.pi / 180.0           #Converting into radians
    lat2 = lat2 * math.pi / 180.0           #Converting into radians
    a = (pow(math.sin(dLat / 2), 2) +
         pow(math.sin(dLon / 2), 2) *
         math.cos(lat1) * math.cos(lat2))
    #Earth's Radius
    rad = 6371
    c = 2 * math.asin(math.sqrt(a))

    #Returning distance(kms)
    return rad * c



# Loading the input sample data
data = pd.read_csv('sample_data.csv')


# Converting timestamp column to datetime with UTC(Coordinated Universal Time) timezone information
data['timestamp'] = pd.to_datetime(data['timestamp'], utc=True)

# Defining Time frame for which proximity events have to be determined, with timezone information
t1 = pd.to_datetime('2023-03-05 00:00:00', utc=True)
t2 = pd.to_datetime('2023-03-06 23:59:59', utc=True)

# Filtering data based on Time frame
filtered_data = data[(data['timestamp'] >= t1) & (data['timestamp'] <= t2)]



# Creating a GeoDataFrame, a type of DataFrame used for geographic data.
geometry = [Point(xy) for xy in zip(filtered_data['lon'], filtered_data['lat'])]

# zip(filtered_data['lon'], filtered_data['lat']) pairs each longitude with its corresponding latitude, creating tuples like (lon1, lat1), (lon2, lat2), etc.
# [Point(xy) for xy in zip(filtered_data['lon'], filtered_data['lat'])] converts each tuple (lon, lat) into a Point object from the shapely.geometry module.

geo_df = gpd.GeoDataFrame(filtered_data, geometry=geometry)



# Function to calculate vessel proximity
def calculate_proximities(row, geo_df, threshold_km=100):
    # row: A single row from the GeoDataFrame, representing one vessel at a specific timestamp
    # threshold_km: Distance threshold(kms) within which we want to find other vessels

    # Calculating Buffer Radius(kilometers to degrees) ...approximation
    buffer_radius = threshold_km / 111  # 1 degree ≈ 111 km

    # Creating a circular area (buffer) around the vessel
    buffer = row.geometry.buffer(buffer_radius)

    # Finding vessels within the circular area(buffer)
    close_vessels = geo_df[geo_df.geometry.within(buffer) & (geo_df['mmsi'] != row['mmsi'])]

    # Calculating distances: For each vessel within the buffer
    close_vessels['distance'] = close_vessels.apply(
        lambda x: haversine(row['lat'], row['lon'], x['lat'], x['lon']), axis=1)

    # Filtering vessels within the threshold distance
    close_vessels = close_vessels[close_vessels['distance'] < threshold_km]

    # Store unique MMSI values
    close_mmsi = close_vessels['mmsi'].unique().tolist()

    #Return the output
    return pd.Series({
        'mmsi': row['mmsi'],
        'vessel_proximity': close_mmsi,
        'timestamp': row['timestamp']
    })

# Apply the function to each row in the GeoDataFrame
proximity_events = geo_df.apply(lambda row: calculate_proximities(row, geo_df), axis=1)

# Combining the proximity_events results into a single DataFrame.
proximity_events = pd.concat([proximity_events], ignore_index=True)

# # Display the results
print(proximity_events)

# #Saving Results to CSV File
proximity_events.to_csv('vessel_proximity_events_vectorized.csv', index=False)


In [None]:
import folium
import pandas as pd


# Initializing a map.
map_center = [filtered_data['lat'].mean(), filtered_data['lon'].mean()]
mymap = folium.Map(location=map_center, zoom_start=2)


for idx, row in filtered_data.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=row['mmsi']
    ).add_to(mymap)

# Display map
mymap