# Close Encounters Calculator
### Preamble: Session details
Start a Cloudera Machine Learning (CML) session on Cloudera which has the following sessions settings:

![Cloudera Machine Learning Session Settings](close-encounters/media/CloseEncountersSessionCML.JPG "Cloudera Machine Learning Session Settings")

### 1. Install requirements 
It might be you need to install some additional Python packages first time you run this code. Run the cell below. 

In [1]:
#!pip install close-encounters==0.1.0

### 2. Library imports

In [2]:
# Python
import pandas as pd
pd.DataFrame.iteritems = pd.DataFrame.items # Hotfix since iteritems is deprecated
import numpy as np
from time import time
from close_encounters import CloseEncounters
import os
from pyspark.sql import SparkSession
from IPython.display import display, HTML
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

  from pkg_resources import resource_string


### 2. Close encounter algorithm settings

In [3]:
## Set Minimal Horizontal Separation in Nautical Miles (NM) 
h_dist_NM = 5

## Set Minimal Vertical Separation in Flight Levels (FL) 
v_dist_ft = 1000

# Set Minimal Flight Level (FL)
# Note: All flight sections below this altitude are pruned before close encounter algorithm is applied.
v_cutoff_FL = 245

# Set resampling frequency 
freq_s = 10

# Set Maximal Interpolation Time in Minutes (min) 
# Note: Whenever a trajectory is missing a portion of the flight which takes longer than this time, it will not be interpolated. 
t_max = 10

### 3. Spark Session Initialization

In [4]:
# Initialize the Spark Session
spark = SparkSession.builder \
    .appName("CloseEncounters") \
    .config("spark.executor.memory", "12g") \
    .config("spark.driver.memory", "10g") \
    .config("spark.executor.cores", "1") \
    .config("spark.executor.instances", "5") \
    .config("spark.sql.shuffle.partitions", "100") \
    .config("spark.default.parallelism", "100") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .config("spark.rpc.message.maxSize", "512") \
    .getOrCreate()

# Display the Spark URL to monitor the process
# Get environment variables
engine_id = os.getenv('CDSW_ENGINE_ID')
domain = os.getenv('CDSW_DOMAIN')

# Format the URL
url = f"https://spark-{engine_id}.{domain}"

# Display the clickable URL
display(HTML(f'<a href="{url}">{url}</a>'))

Setting spark.hadoop.yarn.resourcemanager.principal to quinten.goens


### 4. Run on sample data

In [5]:
%%time
# Initiate Close Encounters with Spark
ce = CloseEncounters(spark = spark)

# Load trajectories into close encounters
#ce = ce.load_parquet_trajectories(
#    parquet_path = 'data/flight_profiles_cpf_20240701_filtered.parquet',
#    flight_id_col = 'FLIGHT_ID', 
#    icao24_col = 'ICAO24',
#    longitude_col = 'LONGITUDE',
#    latitude_col = 'LATITUDE',
#    time_over_col = 'TIME_OVER',
#    flight_level_col = 'FLIGHT_LEVEL'
#)

ce = ce.load_sample_trajectories(nrows = 1000000)

[2025-07-15 14:31:49,154] INFO - Initialized CloseEncounters class.
[2025-07-15 14:31:49,223] INFO - Loaded bundled sample trajectories.
[2025-07-15 14:32:37,487] INFO - Loaded trajectory data from pandas DataFrame.
[2025-07-15 14:32:37,729] INFO - Loaded trajectory data from Spark DataFrame.


CPU times: user 42.1 s, sys: 3.1 s, total: 45.2 s
Wall time: 48.6 s


In [6]:
%%time
ce = ce.resample(freq_s = freq_s, t_max=t_max)

[2025-07-15 14:33:07,486] INFO - Resampling complete. Total segments: 2608395   


CPU times: user 224 ms, sys: 62.3 ms, total: 286 ms
Wall time: 29.7 s


In [7]:
%%time
# Find close encounters
ce_sdf = ce.find_close_encounters(
    h_dist_NM=h_dist_NM,
    v_dist_ft=v_dist_ft,
    v_cutoff_FL=v_cutoff_FL,
    freq_s=freq_s,
    t_max=t_max,
    method = 'half_disk'
)

# Convert from a Spark DataFrame (sdf) to Pandas Dataframe (pdf)
ce_pdf_half_disk = ce_sdf.toPandas()

print(ce_pdf_half_disk.shape)

[2025-07-15 14:33:07,496] INFO - Starting close encounter detection with method='half_disk'
[2025-07-15 14:33:07,498] INFO - Skipping resample: already done (freq_s=10, t_max=10)
[2025-07-15 14:35:18,929] INFO - Found 15438 candidate close encounters         
                                                                                

(15438, 50)
CPU times: user 4.92 s, sys: 237 ms, total: 5.15 s
Wall time: 2min 17s


In [11]:
ce_pdf_half_disk.to_csv('test.csv')

In [10]:
ce_pdf_half_disk[ce_pdf_half_disk.ce_id == 'ID_273716177_ID_273716539'].sort_values('time_over')

Unnamed: 0,ID2,ID1,time_over,h3_group,ID,lat1,lon1,time1,altitude_ft1,flight_id1,icao241,lat2,lon2,time2,altitude_ft2,flight_id2,icao242,time_diff_s,v_dist_ft,h_dist_NM,ce_id,3D_dist_NM,start_time,end_time,start_3D_dist_NM,end_3D_dist_NM,start_v_dist_ft,end_v_dist_ft,start_h_dist_NM,end_h_dist_NM,min_3D_dist_NM,max_3D_dist_NM,min_v_dist_ft,max_v_dist_ft,min_h_dist_NM,max_h_dist_NM,min_3D_dist_NM_time,max_3D_dist_NM_time,min_v_dist_ft_time,max_v_dist_ft_time,min_h_dist_NM_time,max_h_dist_NM_time,is_start_time,is_end_time,is_min_3D_dist_NM,is_max_3D_dist_NM,is_min_v_dist_ft,is_max_v_dist_ft,is_min_h_dist_NM,is_max_h_dist_NM
15424,412316910508,300647762925,2024-07-01 13:38:30,8439445ffffffff,300647762925_412316910508,41.014722,1.0775,2024-07-01 13:38:30,36000.0,ID_273716177,4CADF2,41.007778,1.183889,2024-07-01 13:38:30,35000.0,ID_273716539,4CAD3F,0,1000.0,4.843304,ID_273716177_ID_273716539,4.846099,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,True,False,False,True,True,True,False,True
15426,412316910509,300647762926,2024-07-01 13:38:40,8439469ffffffff,300647762926_412316910509,41.034352,1.083426,2024-07-01 13:38:40,36000.0,ID_273716177,4CADF2,41.026204,1.174722,2024-07-01 13:38:40,35000.0,ID_273716539,4CAD3F,0,1000.0,4.168432,ID_273716177_ID_273716539,4.17168,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15428,412316910510,300647762927,2024-07-01 13:38:50,8439469ffffffff,300647762927_412316910510,41.053981,1.089352,2024-07-01 13:38:50,36000.0,ID_273716177,4CADF2,41.04463,1.165556,2024-07-01 13:38:50,35000.0,ID_273716539,4CAD3F,0,1000.0,3.499671,ID_273716177_ID_273716539,3.503538,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15430,412316910511,300647762928,2024-07-01 13:39:00,8439717ffffffff,300647762928_412316910511,41.073611,1.095278,2024-07-01 13:39:00,36000.0,ID_273716177,4CADF2,41.063056,1.156389,2024-07-01 13:39:00,35000.0,ID_273716539,4CAD3F,0,1000.0,2.84105,ID_273716177_ID_273716539,2.845813,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15432,412316910512,300647762929,2024-07-01 13:39:10,8439713ffffffff,300647762929_412316910512,41.093426,1.101204,2024-07-01 13:39:10,36000.0,ID_273716177,4CADF2,41.081481,1.146204,2024-07-01 13:39:10,35000.0,ID_273716539,4CAD3F,0,1000.0,2.161344,ID_273716177_ID_273716539,2.167601,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15434,412316910513,300647762930,2024-07-01 13:39:20,8439713ffffffff,300647762930_412316910513,41.113241,1.10713,2024-07-01 13:39:20,36000.0,ID_273716177,4CADF2,41.099907,1.136019,2024-07-01 13:39:20,35000.0,ID_273716539,4CAD3F,0,1000.0,1.534303,ID_273716177_ID_273716539,1.543105,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15436,412316910514,300647762931,2024-07-01 13:39:30,8439717ffffffff,300647762931_412316910514,41.133056,1.113056,2024-07-01 13:39:30,36000.0,ID_273716177,4CADF2,41.118333,1.125833,2024-07-01 13:39:30,35000.0,ID_273716539,4CAD3F,0,1000.0,1.057235,ID_273716177_ID_273716539,1.069968,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15437,412316910515,300647762932,2024-07-01 13:39:40,8439717ffffffff,300647762932_412316910515,41.15287,1.118981,2024-07-01 13:39:40,36000.0,ID_273716177,4CADF2,41.138194,1.116111,2024-07-01 13:39:40,35000.0,ID_273716539,4CAD3F,0,1000.0,0.891634,ID_273716177_ID_273716539,0.906696,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,True,False,True,True,True,False
15435,412316910516,300647762933,2024-07-01 13:39:50,8439469ffffffff,300647762933_412316910516,41.172685,1.124907,2024-07-01 13:39:50,36000.0,ID_273716177,4CADF2,41.158056,1.106389,2024-07-01 13:39:50,35000.0,ID_273716539,4CAD3F,0,1000.0,1.214651,ID_273716177_ID_273716539,1.22575,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False
15433,412316910517,300647762934,2024-07-01 13:40:00,8439717ffffffff,300647762934_412316910517,41.1925,1.130833,2024-07-01 13:40:00,36000.0,ID_273716177,4CADF2,41.173796,1.097593,2024-07-01 13:40:00,35000.0,ID_273716539,4CAD3F,0,1000.0,1.877489,ID_273716177_ID_273716539,1.884689,2024-07-01 13:38:30,2024-07-01 13:40:40,4.846099,4.577322,1000.0,1000.0,4.843304,4.574363,0.906696,4.846099,1000.0,1000.0,0.891634,4.843304,2024-07-01 13:39:40,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:38:30,2024-07-01 13:39:40,2024-07-01 13:38:30,False,False,False,False,True,True,False,False


%%time
# Find close encounters
ce_sdf = ce.find_close_encounters(
    h_dist_NM=h_dist_NM,
    v_dist_ft=v_dist_ft,
    v_cutoff_FL=v_cutoff_FL,
    freq_s=freq_s,
    t_max=t_max,
    method = 'brute_force'
)

# Convert from a Spark DataFrame (sdf) to Pandas Dataframe (pdf)
ce_pdf_bf = ce_sdf

print(ce_pdf_bf.shape)

In [None]:
%%time
# Find close encounters
ce_duckdb = ce.find_close_encounters_duckdb(
    h_dist_NM=h_dist_NM,
    v_dist_ft=v_dist_ft,
    v_cutoff_FL=v_cutoff_FL,
    freq_s=freq_s,
    t_max=t_max
)

print(ce_duckdb.shape)


In [None]:
ce_duckdb["ID_combined"] = ce_duckdb["ID1"].astype(str).str.cat(ce_duckdb["ID2"].astype(str), sep="_")

In [None]:
ce_duckdb['isin_spark'] = ce_duckdb.ID_combined.isin(ce_pdf_half_disk.ID.to_list())

In [None]:
ce_duckdb[ce_duckdb['isin_spark'] == False]

In [None]:
ce_pdf_half_disk.altitude_ft1.values[0]

In [None]:
%%time
# Find close encounters
ce_sdf = ce.find_close_encounters(
    h_dist_NM=h_dist_NM,
    v_dist_ft=v_dist_ft,
    v_cutoff_FL=v_cutoff_FL,
    freq_s=freq_s,
    t_max=t_max,
    method = 'half_disk'
)

# Convert from a Spark DataFrame (sdf) to Pandas Dataframe (pdf)
ce_pdf_half_disk = ce_sdf.toPandas()

print(ce_pdf_half_disk.shape)

In [None]:
%%time
# Find close encounters
ce_sdf = ce.find_close_encounters_duckdb(
    h_dist_NM=h_dist_NM,
    v_dist_ft=v_dist_ft,
    v_cutoff_FL=v_cutoff_FL,
    freq_s=freq_s,
    t_max=t_max
)

print(ce_sdf.shape)


In [None]:
import math
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql.functions import col, lag, udf
from pyspark.sql.types import DoubleType

def calculate_bearing(lat1, lon1, lat2, lon2):
    """
    Calculate the initial bearing (forward azimuth) between two points
    specified in decimal degrees using the great-circle formula.

    Parameters:
        lat1 (float): Latitude of the first point.
        lon1 (float): Longitude of the first point.
        lat2 (float): Latitude of the second point.
        lon2 (float): Longitude of the second point.

    Returns:
        float: Initial bearing in degrees, normalized to [0, 360).
    """
    if None in (lat1, lon1, lat2, lon2):
        return None

    lat1_rad = math.radians(lat1)
    lat2_rad = math.radians(lat2)
    delta_lon_rad = math.radians(lon2 - lon1)

    x = math.sin(delta_lon_rad) * math.cos(lat2_rad)
    y = (math.cos(lat1_rad) * math.sin(lat2_rad) -
         math.sin(lat1_rad) * math.cos(lat2_rad) * math.cos(delta_lon_rad))

    bearing_rad = math.atan2(x, y)
    bearing_deg = math.degrees(bearing_rad)

    return (bearing_deg + 360) % 360


# Register UDF
calculate_bearing_udf = udf(calculate_bearing, DoubleType())

# Assume `resampled_sdf` is your existing DataFrame
# Define window for each flight ordered by timestamp
window_spec = Window.partitionBy("flight_id").orderBy("time_over")

# Add previous point's latitude and longitude
resampled_sdf = resampled_sdf.withColumn(
    "prev_latitude", lag("latitude").over(window_spec)
)
resampled_sdf = resampled_sdf.withColumn(
    "prev_longitude", lag("longitude").over(window_spec)
)

# Compute heading using the UDF
resampled_sdf = resampled_sdf.withColumn(
    "heading",
    calculate_bearing_udf(
        col("prev_latitude"),
        col("prev_longitude"),
        col("latitude"),
        col("longitude")
    )
)


In [None]:
resampled_pdf = resampled_sdf.limit(20000).toPandas()

In [None]:
resampled_pdf

In [None]:
!pip uninstall -f plotly

In [None]:
import plotly.express as px
px.scatter(resampled_pdf, x = 'lat', y = 'lon')

In [None]:
ce.

In [None]:
ce = ce.find_close_encounters()

In [None]:
ce.show()

In [None]:
df = load_sample_trajectories()
encounters_df = CloseEncountersH3HalfDisk(
    df, 
    distance_nm = horizontal_separation_NM, 
    FL_diff = vertical_separation_FL, 
    FL_min = minimal_FL, 
    deltaT_min = deltaT_min, 
    pnumb = 100, 
    spark = spark)

In [None]:
create_keplergl_html(encounters_df)