In [None]:
from helpers import *
import numpy as np
10.837435124897937*3/2*np.sqrt(3)

np.float64(28.15648239008218)

In [1]:
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------

# PySpark libraries
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.functions import (
    udf, col, explode, radians,
    sin, cos, sqrt, atan2, lit, monotonically_increasing_id
)
from pyspark.sql.types import (
    StringType, ArrayType )
from pyspark.sql import Window
from tempo import *

# Data libraries
import h3
import pandas as pd
import numpy as np
from datetime import datetime

# Custom libraries
from helpers import select_resolution

# -----------------------------------------------------------------------------
# Spark Configuration
# -----------------------------------------------------------------------------
spark = SparkSession.builder \
    .appName("CloseEncountersH3") \
    .config("spark.driver.memory", "12g") \
    .config("spark.executor.memory", "10g") \
    .config("spark.rpc.message.maxSize", 1028) \
    .getOrCreate()

# -----------------------------------------------------------------------------
# Close encounter parameters
# -----------------------------------------------------------------------------
# Minimal horizontal distance before close encounter (NM)
distance_nm = 5

# Minimal vertical distance before close encounter (flight levels - FL)
FL_diff = 9

# The minimum flight level for assessment of the trajectory (lower sections are not analyzed)
FL_min = 250

# The maximum period we should interpolate in case of missing state-vectors (deltaT in minutes)
deltaT_min = 10

# -----------------------------------------------------------------------------
# Default / Automatic parameters
# -----------------------------------------------------------------------------
resolution = select_resolution(distance_nm)
earth_radius_km = 6378
print(f"The selected resolution for a distance of {distance_nm} NM is: {resolution}")


25/05/07 15:15:54 WARN Utils: Your hostname, Quintens-Laptop.local resolves to a loopback address: 127.0.0.1; using 10.130.77.145 instead (on interface en0)
25/05/07 15:15:54 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/07 15:15:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


The selected resolution for a distance of 5 NM is: 5


In [2]:
# -----------------------------------------------------------------------------
# Load and Filter Data
# -----------------------------------------------------------------------------
coords_df = pd.read_parquet('~/Repos/close-encounters/data/flight_profiles_cpf_20240701_filtered.parquet')
f = np.logical_and(coords_df.TIME_OVER >= datetime(2024,7,1,12,0,0), coords_df.TIME_OVER<= datetime(2024,7,1,13,0,0))
coords_df = coords_df[f]
coords_df = coords_df[coords_df.FLIGHT_LEVEL > FL_min]
coords_df = coords_df[['FLIGHT_ID', 'LONGITUDE', 'LATITUDE', 'TIME_OVER', 'FLIGHT_LEVEL']].rename(
    columns={
        'LATITUDE': 'latitude', 
        'LONGITUDE': 'longitude'
        }
    )
coords_df.columns = [x.lower() for x in coords_df.columns]

print(f"Number of rows as input: {coords_df.shape}")
coords_df = spark.createDataFrame(coords_df)

# -----------------------------------------------------------------------------
# Resample and interpolate
# -----------------------------------------------------------------------------

coords_df = TSDF(coords_df, ts_col="time_over", partition_cols = ["flight_id"])
coords_df = coords_df.resample(freq="5 sec", func="mean").interpolate(method='linear', freq="5 sec", show_interpolated = True).df
coords_df = coords_df.repartition(100, ["flight_id"])
print(f"Number of rows after resamplin and interpolating: {coords_df.count()}")

# -----------------------------------------------------------------------------
# Delete resampled periods which are longer than DeltaT = 10 min
# -----------------------------------------------------------------------------

# Define a window partitioned by flight and segment and ordered by time
w = Window.partitionBy("flight_id").orderBy("time_over")

# Flag changes in interpolation status (start of new group)
coords_df = coords_df.withColumn(
    "interpolation_group_change",
    (F.col("is_ts_interpolated") != F.lag("is_ts_interpolated", 1).over(w)).cast("int")
)

# Fill nulls in the first row with 1 (new group)
coords_df = coords_df.withColumn(
    "interpolation_group_change",
    F.when(F.col("interpolation_group_change").isNull(), 1).otherwise(F.col("interpolation_group_change"))
)

# Create a cumulative sum over the changes to assign group IDs
coords_df = coords_df.withColumn(
    "interpolation_group_id",
    F.sum("interpolation_group_change").over(w)
)

# Add min and max timestamp per interpolation group
group_window = Window.partitionBy("flight_id", "interpolation_group_id")

coords_df = coords_df.withColumn("group_start_time", F.min("time_over").over(group_window))
coords_df = coords_df.withColumn("group_end_time", F.max("time_over").over(group_window))

# Calculate duration in seconds for each interpolation group
coords_df = coords_df.withColumn(
    "interpolation_group_duration_sec",
    F.col("group_end_time").cast("long") - F.col("group_start_time").cast("long")
)

# Filter logic:
# - If not interpolated, keep
# - If interpolated, keep only if group duration <= deltaT_min * 60 seconds
coords_df = coords_df.filter(
    (~F.col("is_ts_interpolated")) |
    ((F.col("is_ts_interpolated")) & (F.col("interpolation_group_duration_sec") <= deltaT_min*60))
)

# Drop helper columns
coords_df = coords_df.drop("interpolation_group_change", "interpolation_group_id",
                           "group_start_time", "group_end_time", "interpolation_group_duration_sec")

# Add a segment ID
coords_df = coords_df.withColumn("segment_id", monotonically_increasing_id())
coords_df = coords_df.repartition(100, ["flight_id", "segment_id"])

#coords_df = coords_df.filter(col('time_over')==datetime(2024,7,1,12,1,0))
coords_df.cache()
coords_df.count()

Number of rows as input: (296944, 5)


25/05/07 15:16:05 WARN TaskSetManager: Stage 0 contains a task of very large size (1401 KiB). The maximum recommended task size is 1000 KiB.
25/05/07 15:16:06 WARN TaskSetManager: Stage 6 contains a task of very large size (1401 KiB). The maximum recommended task size is 1000 KiB.
                Earliest Timestamp: 2024-07-01 12:00:00
                Latest Timestamp: 2024-07-01 13:00:00
                No. of Unique Partitions: 4243
                Resampled Min No. Values in Single a Partition: 1.0
                Resampled Max No. Values in Single a Partition: 721.0
                Resampled P25 No. Values in Single a Partition: 230.0
                Resampled P50 No. Values in Single a Partition: 496.0
                Resampled P75 No. Values in Single a Partition: 712.0
                Resampled Total No. Values Across All Partitions: 1931331.0
        
25/05/07 15:16:07 WARN TaskSetManager: Stage 12 contains a task of very large size (1401 KiB). The maximum recommended task size

Number of rows after resamplin and interpolating: 1931331


25/05/07 15:16:09 WARN TaskSetManager: Stage 27 contains a task of very large size (1401 KiB). The maximum recommended task size is 1000 KiB.
                                                                                

1913376

In [3]:
# -----------------------------------------------------------------------------
# Define UDFs for H3
# -----------------------------------------------------------------------------
def lat_lon_to_h3(lat, lon, resolution):
    return h3.latlng_to_cell(lat, lon, resolution)

def grid_disk_k1(cell):
    return h3.grid_disk(cell, k=1)

lat_lon_to_h3_udf = udf(lat_lon_to_h3, StringType())
grid_disk_k1_udf = udf(grid_disk_k1, ArrayType(StringType()))

# Add H3 index and neighbors
coords_df = coords_df.withColumn("h3_index", lat_lon_to_h3_udf(col("latitude"), col("longitude"), lit(resolution)))
coords_df = coords_df.withColumn("h3_neighbours", grid_disk_k1_udf(col("h3_index")))


In [4]:
coords_df.show()

25/05/07 15:16:13 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors
                                                                                

+-------------------+-----------------+------------------+-------------------+------------+------------------+----------------------------+------------------------+-------------------------+----------+---------------+--------------------+
|          time_over|     flight_level|          latitude|          longitude|   flight_id|is_ts_interpolated|is_interpolated_flight_level|is_interpolated_latitude|is_interpolated_longitude|segment_id|       h3_index|       h3_neighbours|
+-------------------+-----------------+------------------+-------------------+------------+------------------+----------------------------+------------------------+-------------------------+----------+---------------+--------------------+
|2024-07-01 12:21:50|            370.0| 39.75847222222221|  63.84916666666666|2.73696721E8|              true|                        true|                    true|                     true|       257|85219c6ffffffff|[85219c6ffffffff,...|
|2024-07-01 12:24:40|            370.0| 39.6

In [5]:
# -----------------------------------------------------------------------------
# Explode neighbors and group by h3_neighbour to collect IDs when there's multiple FLIGHT_ID in a cell
# -----------------------------------------------------------------------------
exploded_df = coords_df.withColumn("h3_neighbour", explode(col("h3_neighbours")))

grouped_df = (exploded_df.groupBy(["time_over", "h3_neighbour"])
              .agg(F.countDistinct("flight_id").alias("flight_count"),
                   F.collect_list("segment_id").alias("id_list"))
              .filter(F.col("flight_count") > 1)
              .drop("flight_count"))

grouped_df = grouped_df.filter(F.size("id_list") > 1)

In [6]:
# -----------------------------------------------------------------------------
# Create pairwise combinations using self-join on indexed exploded DataFrame
# -----------------------------------------------------------------------------
# Explode id_list to individual rows and add index within each h3 group
df_exploded = grouped_df.withColumn("segment_id", explode("id_list")).drop('id_list')
window_spec = Window.partitionBy(["time_over","h3_neighbour"]).orderBy("segment_id")
df_indexed = df_exploded.withColumn("idx", F.row_number().over(window_spec))

# Self-join to form unique unordered ID pairs
df_pairs = (
    df_indexed.alias("df1")
    .join(
        df_indexed.alias("df2"),
        (F.col("df1.time_over") == F.col("df2.time_over")) &
        (F.col("df1.h3_neighbour") == F.col("df2.h3_neighbour")) &
        (F.col("df1.idx") < F.col("df2.idx"))
    )
    .select(
        F.col("df1.time_over").alias("time_over"),
        F.col("df1.h3_neighbour").alias("h3_group"),
        F.col("df1.segment_id").alias("ID1"),
        F.col("df2.segment_id").alias("ID2")
    )
)

In [7]:
# -----------------------------------------------------------------------------
# Clean Pairs, Create Unique Pair ID
# -----------------------------------------------------------------------------
df_pairs = df_pairs.filter(col("ID1") != col("ID2")) # should not be necessary as we join on < not <=
df_pairs = df_pairs.withColumn(
    "ID",
    F.concat_ws("_", F.array_sort(F.array(col("ID1"), col("ID2"))))
)

In [8]:
# Define a window partitioned by ID, ordering arbitrarily (or by some column if needed)
window_spec = Window.partitionBy("ID").orderBy(F.monotonically_increasing_id())

# Add row number to each partition
df_pairs = df_pairs.withColumn("row_num", F.row_number().over(window_spec))

# Keep only the first row per ID
df_pairs = df_pairs.filter(F.col("row_num") == 1).drop("row_num")

# -----------------------------------------------------------------------------
# Join with Original Coordinates for Each ID
# -----------------------------------------------------------------------------
coords_sdf1 = coords_df.withColumnRenamed("segment_id", "ID1") \
    .withColumnRenamed("latitude", "lat1") \
    .withColumnRenamed("longitude", "lon1") \
    .withColumnRenamed("time_over", "time1") \
    .withColumnRenamed("flight_level", 'flight_lvl1') \
    .withColumnRenamed("flight_id", "flight_id1") \
    .select("ID1", "lat1", "lon1", "time1", "flight_lvl1", "flight_id1")

coords_sdf2 = coords_df.withColumnRenamed("segment_id", "ID2") \
    .withColumnRenamed("latitude", "lat2") \
    .withColumnRenamed("longitude", "lon2") \
    .withColumnRenamed("time_over", "time2") \
    .withColumnRenamed("flight_level", 'flight_lvl2') \
    .withColumnRenamed("flight_id", "flight_id2") \
    .select("ID2", "lat2", "lon2", "time2", "flight_lvl2", "flight_id2")

coords_sdf1 = coords_sdf1.repartition(100, "ID1")
coords_sdf2 = coords_sdf2.repartition(100, "ID2")

df_pairs = df_pairs.join(coords_sdf1, on="ID1", how="left")
df_pairs = df_pairs.join(coords_sdf2, on="ID2", how="left")
df_pairs.cache()
print(f"Number of pairs (raw): {df_pairs.count()}")
# -----------------------------------------------------------------------------
# Calculate and filter based on time differense (s)
# -----------------------------------------------------------------------------
df_pairs = df_pairs.withColumn('time_diff_s', F.unix_timestamp(F.col("time1")) - F.unix_timestamp(F.col("time2")))
df_pairs = df_pairs.filter(F.abs(F.col('time_diff_s')) == 0)
df_pairs.cache()
print(f"Number of pairs after time filter {df_pairs.count()}")
# -----------------------------------------------------------------------------
# Calculate and filter based on height differense (s)
# -----------------------------------------------------------------------------
df_pairs = df_pairs.withColumn('FL_diff', F.col("flight_lvl1") - F.col("flight_lvl2"))
df_pairs = df_pairs.filter(F.abs(F.col('FL_diff')) < lit(FL_diff))
df_pairs.cache()
print(f"Number of pairs after FL filter {df_pairs.count()}")

# -----------------------------------------------------------------------------
# Calulate and filter based on distance (km)
# -----------------------------------------------------------------------------
df_pairs.cache()
df_pairs = df_pairs.withColumn(
    "distance_nm",
    0.539957 * 2 * earth_radius_km * atan2(
        sqrt(
            (sin(radians(col("lat2")) - radians(col("lat1"))) / 2)**2 +
            cos(radians(col("lat1"))) * cos(radians(col("lat2"))) *
            (sin(radians(col("lon2")) - radians(col("lon1"))) / 2)**2
        ),
        sqrt(1 - (
            (sin(radians(col("lat2")) - radians(col("lat1"))) / 2)**2 +
            cos(radians(col("lat1"))) * cos(radians(col("lat2"))) *
            (sin(radians(col("lon2")) - radians(col("lon1"))) / 2)**2
        ))
    )
)

df_pairs = df_pairs.filter(col('distance_nm') <= lit(distance_nm))

# -----------------------------------------------------------------------------
# Fetch sample
# -----------------------------------------------------------------------------

df_pairs.cache()
df = df_pairs.toPandas()
print(f"Number of unique ID pairs: {df_pairs.count()}")


                                                                                

Number of pairs (raw): 1552574


                                                                                

Number of pairs after time filter 1552574
Number of pairs after FL filter 149922


25/05/07 15:16:56 WARN CacheManager: Asked to cache already cached data.


Number of unique ID pairs: 247


In [10]:
df

Unnamed: 0,ID2,ID1,time_over,h3_group,ID,lat1,lon1,time1,flight_lvl1,flight_id1,lat2,lon2,time2,flight_lvl2,flight_id2,time_diff_s,FL_diff,distance_nm
0,68719661628,42949827276,2024-07-01 12:41:10,851f8babfffffff,42949827276_68719661628,47.000516,12.227540,2024-07-01 12:41:10,310.000000,273713103.0,47.001319,12.225116,2024-07-01 12:41:10,310.000000,273714985.0,0,0.000000,0.110479
1,68719661730,42949827378,2024-07-01 12:49:40,851e1677fffffff,42949827378_68719661730,46.645556,13.746389,2024-07-01 12:49:40,330.000000,273713103.0,46.644537,13.745417,2024-07-01 12:49:40,330.000000,273714985.0,0,0.000000,0.073193
2,68719661688,42949827336,2024-07-01 12:46:10,851e1613fffffff,42949827336_68719661688,46.795556,13.118056,2024-07-01 12:46:10,322.000000,273713103.0,46.795741,13.119028,2024-07-01 12:46:10,321.833333,273714985.0,0,0.166667,0.041526
3,68719661563,42949827211,2024-07-01 12:35:45,851f880ffffffff,42949827211_68719661563,47.231852,11.286157,2024-07-01 12:35:45,289.166667,273713103.0,47.234815,11.276204,2024-07-01 12:35:45,288.333333,273714985.0,0,0.833333,0.443565
4,68719661656,42949827304,2024-07-01 12:43:30,851e16c3fffffff,42949827304_68719661656,46.908657,12.635509,2024-07-01 12:43:30,310.000000,273713103.0,46.907546,12.637083,2024-07-01 12:43:30,310.000000,273714985.0,0,0.000000,0.092941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,68719661681,42949827329,2024-07-01 12:45:35,851e16d7fffffff,42949827329_68719661681,46.820833,13.015833,2024-07-01 12:45:35,316.000000,273713103.0,46.820000,13.015278,2024-07-01 12:45:35,316.000000,273714985.0,0,0.000000,0.055055
243,68719661735,42949827383,2024-07-01 12:50:05,851e1607fffffff,42949827383_68719661735,46.626111,13.825787,2024-07-01 12:50:05,330.000000,273713103.0,46.626944,13.821111,2024-07-01 12:50:05,330.000000,273714985.0,0,0.000000,0.199407
244,68719661755,42949827403,2024-07-01 12:51:45,851e162bfffffff,42949827403_68719661755,46.545648,14.135833,2024-07-01 12:51:45,330.000000,273713103.0,46.547685,14.123796,2024-07-01 12:51:45,330.000000,273714985.0,0,0.000000,0.512442
245,68719661643,42949827291,2024-07-01 12:42:25,851f8bb7fffffff,42949827291_68719661643,46.949286,12.452183,2024-07-01 12:42:25,310.000000,273713103.0,46.951481,12.445463,2024-07-01 12:42:25,310.000000,273714985.0,0,0.000000,0.305669


In [9]:
df_f = df[df.distance_nm > 250]
df_f

Unnamed: 0,ID2,ID1,h3_group,ID,lat1,lon1,time1,flight_lvl1,flight_id1,lat2,lon2,time2,flight_lvl2,flight_id2,time_diff_s,FL_diff,distance_nm


In [28]:
max_distance

np.float64(37.54197652010957)

In [26]:
lat1, lon1 = 41.029008, 23.695000
lat2, lon2 = 41.465093, 23.449722

s1 = set(h3.grid_disk(h3.latlng_to_cell(lat1,lon1, 5), 1))
s2 = set(h3.grid_disk(h3.latlng_to_cell(lat2,lon2, 5), 1))

print(s1.intersection(s2))

print(s1)
print(s2)


{'851ec237fffffff'}
{'851ec237fffffff', '851ec227fffffff', '851ec35bfffffff', '851ec22ffffffff', '851ec3cbfffffff', '851ec353fffffff', '851ec223fffffff'}
{'851ec2a7fffffff', '851ec237fffffff', '851ec3dbfffffff', '851ec2abfffffff', '851ec2a3fffffff', '851ec233fffffff', '851ec2affffffff'}


In [12]:
df

Unnamed: 0,ID2,ID1,time_over,h3_group,ID,lat1,lon1,time1,flight_lvl1,flight_id1,lat2,lon2,time2,flight_lvl2,flight_id2,time_diff_s,FL_diff,distance_nm
0,68719661628,42949827276,2024-07-01 12:41:10,851f8babfffffff,42949827276_68719661628,47.000516,12.227540,2024-07-01 12:41:10,310.000000,273713103.0,47.001319,12.225116,2024-07-01 12:41:10,310.000000,273714985.0,0,0.000000,0.110479
1,68719661730,42949827378,2024-07-01 12:49:40,851e1677fffffff,42949827378_68719661730,46.645556,13.746389,2024-07-01 12:49:40,330.000000,273713103.0,46.644537,13.745417,2024-07-01 12:49:40,330.000000,273714985.0,0,0.000000,0.073193
2,68719661688,42949827336,2024-07-01 12:46:10,851e1613fffffff,42949827336_68719661688,46.795556,13.118056,2024-07-01 12:46:10,322.000000,273713103.0,46.795741,13.119028,2024-07-01 12:46:10,321.833333,273714985.0,0,0.166667,0.041526
3,68719661563,42949827211,2024-07-01 12:35:45,851f880ffffffff,42949827211_68719661563,47.231852,11.286157,2024-07-01 12:35:45,289.166667,273713103.0,47.234815,11.276204,2024-07-01 12:35:45,288.333333,273714985.0,0,0.833333,0.443565
4,68719661656,42949827304,2024-07-01 12:43:30,851e16c3fffffff,42949827304_68719661656,46.908657,12.635509,2024-07-01 12:43:30,310.000000,273713103.0,46.907546,12.637083,2024-07-01 12:43:30,310.000000,273714985.0,0,0.000000,0.092941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,68719661681,42949827329,2024-07-01 12:45:35,851e16d7fffffff,42949827329_68719661681,46.820833,13.015833,2024-07-01 12:45:35,316.000000,273713103.0,46.820000,13.015278,2024-07-01 12:45:35,316.000000,273714985.0,0,0.000000,0.055055
243,68719661735,42949827383,2024-07-01 12:50:05,851e1607fffffff,42949827383_68719661735,46.626111,13.825787,2024-07-01 12:50:05,330.000000,273713103.0,46.626944,13.821111,2024-07-01 12:50:05,330.000000,273714985.0,0,0.000000,0.199407
244,68719661755,42949827403,2024-07-01 12:51:45,851e162bfffffff,42949827403_68719661755,46.545648,14.135833,2024-07-01 12:51:45,330.000000,273713103.0,46.547685,14.123796,2024-07-01 12:51:45,330.000000,273714985.0,0,0.000000,0.512442
245,68719661643,42949827291,2024-07-01 12:42:25,851f8bb7fffffff,42949827291_68719661643,46.949286,12.452183,2024-07-01 12:42:25,310.000000,273713103.0,46.951481,12.445463,2024-07-01 12:42:25,310.000000,273714985.0,0,0.000000,0.305669


In [11]:
import plotly.express as px
max_distance = np.sqrt(3)/2*10.837435124897937*4
fig = px.histogram(df, x='distance_nm')

fig.add_vline(x=max_distance, line_dash = 'dash', line_color = 'firebrick')

In [None]:
coords_df = pd.read_parquet('~/Repos/close-encounters/data/flight_profiles_cpf_20240701_filtered.parquet')
coords_df['SEGMENT_ID'] = coords_df.index
coords_df = coords_df[coords_df.FLIGHT_LEVEL > 250]
coords_df = coords_df[['FLIGHT_ID', 'SEGMENT_ID', 'LONGITUDE', 'LATITUDE', 'TIME_OVER', 'FLIGHT_LEVEL', 'AIRCRAFT_TYPE']].rename(
    columns={
        'LATITUDE': 'latitude', 
        'LONGITUDE': 'longitude'
        }
    )

c1 = coords_df[['FLIGHT_ID', 'SEGMENT_ID', 'AIRCRAFT_TYPE']].rename(
    {'FLIGHT_ID':'FLIGHT_ID1', 'SEGMENT_ID':'ID1', 'AIRCRAFT_TYPE':'AC1'},axis=1)
c2 = coords_df[['FLIGHT_ID', 'SEGMENT_ID', 'AIRCRAFT_TYPE']].rename(
    {'FLIGHT_ID':'FLIGHT_ID2', 'SEGMENT_ID':'ID2', 'AIRCRAFT_TYPE':'AC2'},axis=1)

df = df.merge(c1, how='left').merge(c2, how='left')

In [None]:
coords_df['FLIGHT_ID'] = coords_df['FLIGHT_ID'].apply(str) + '_id'
df['FLIGHT_ID1'] = df['FLIGHT_ID1'].apply(str) + '_id'
df['FLIGHT_ID2'] = df['FLIGHT_ID2'].apply(str) + '_id'
coords_df = coords_df[coords_df.FLIGHT_ID.isin(df.FLIGHT_ID1.to_list() + df.FLIGHT_ID2.to_list())]

In [None]:
coords_df.to_parquet('coords.parquet')

In [None]:
df.to_parquet('pairs.parquet')

In [None]:
print(df[df.ID2 == 2557313].lon2.values[0])

In [None]:
print(coords_df[coords_df.SEGMENT_ID == 2557313].longitude.values[0])

In [None]:
df_pairs.toPandas()