In [None]:
import pandas as pd
import traffic
from traffic.data import opensky
import h3

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Trajectories
df = pd.read_parquet('../data/2023-08-02-11.parquet')
df['id'] = df['icao24'] + '-' + df['callsign'] + '-' + df['time'].dt.date.apply(str)
df = df[['id', 'time', 'icao24', 'callsign', 'lat', 'lon', 'baroaltitude']]

# Add hex_ids 

df['hex_id_5'] = df.apply(lambda l:h3.geo_to_h3(l['lat'],l['lon'], 5),axis=1)
df['hex_id_11'] = df.apply(lambda l:h3.geo_to_h3(l['lat'],l['lon'], 11),axis=1)

# Convert altitudes to ft and FL
baroaltitude'
df['baroaltitude_ft'] = df['baroaltitude']*3.28084
df['baroaltitude_fl'] = df['baroaltitude']*3.28084/100

# Filter out low altitude statevectors

df_low = df[df['baroaltitude_ft'] < 5000] 

# Read airport_hexagonifications

apt_hex = pd.read_parquet('../data/airport_hex/airport_hex_res_5.parquet')
apt_hex = apt_hex[apt_hex['type']=='large_airport']

# Create list of possible arrival / departure airports
arr_dep_apt = df_low.merge(apt_hex, left_on='hex_id_5', right_on='hex_id', how='inner')
arr_dep_apt = arr_dep_apt.groupby('id_x')['ident'].apply(set).reset_index()
arr_dep_apt['ident'] = arr_dep_apt['ident'].apply(list)
arr_dep_apt = arr_dep_apt.rename({'id_x':'id', 'ident':'potential_apt'},axis=1)

df_low = df_low.merge(arr_dep_apt, on = 'id', how = 'left')
id_example = '4009d8-BAW3ET-2023-08-02' 
apts_example = ['EGLL', 'EHAM']

def tracks_to_hex(df_low, id_example, apts_example):
    #print(df_low)
    #print(id_example)
    #print(apts_example)
    df_single = df_low[df_low['id'] == id_example]

    core_cols_single = ['id', 'time', 'lat', 'lon', 'hex_id_11', 'baroaltitude_fl']

    df_single = df_single[core_cols_single]
    df_single = df_single.reset_index()

    core_cols_rwy = ['id', 'airport_ref', 'airport_ident', 'gate_id', 'hex_id', 'gate_id_nr','le_ident','he_ident']

    df_rwys = []

    for apt in apts_example:
        df_rwy = pd.read_parquet(f'../data/runway_hex/{apt}.parquet')
        df_rwys.append(df_rwy)

    df_rwys = pd.concat(df_rwys)
    df_rwys = df_rwys[core_cols_rwy]

    df_hex_rwy = df_single.merge(df_rwys,left_on='hex_id_11', right_on='hex_id', how='left')

    result = df_hex_rwy.groupby(['id_x','airport_ident', 'gate_id','le_ident','he_ident'])['time'].agg([min,max]).reset_index().sort_values('min')
    return result

dfs = arr_dep_apt.apply(lambda l: tracks_to_hex(df_low, l['id'],l['potential_apt']),axis=1).to_list()

result = pd.concat(dfs)

def clean_gate(gate_id):
    if gate_id == 'runway_hexagons':
        return 'runway_hexagons',0
    else:
        return '_'.join(gate_id.split('_')[:4]), int(gate_id.split('_')[4])

result['gate_type'], result['gate_distance_from_rwy_nm'] = zip(*result.gate_id.apply(clean_gate))

## Determining arrival / departure... 

result = result.reset_index(drop=True)

result_min = result.loc[result.groupby(['id_x', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].idxmin()]
result_max = result.loc[result.groupby(['id_x', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].idxmax()] 

# Copy the DataFrame to avoid modifying the original unintentionally
result_copy = result.copy()

# Compute the minimum and maximum 'gate_distance_from_rwy_nm' for each group
min_values = result.groupby(['id_x', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].transform('min')
max_values = result.groupby(['id_x', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].transform('max')

# Add these as new columns to the DataFrame
result_copy['min_gate_distance'] = min_values
result_copy['max_gate_distance'] = max_values

# Now, you can filter rows where 'gate_distance_from_rwy_nm' matches the min or max values
# To specifically keep rows with the minimum value:
result_min = result_copy[result_copy['gate_distance_from_rwy_nm'] == result_copy['min_gate_distance']]

# To specifically keep rows with the maximum value:
result_max = result_copy[result_copy['gate_distance_from_rwy_nm'] == result_copy['max_gate_distance']]

cols_of_interest = ['id_x', 'airport_ident', 'le_ident', 'he_ident', 'min', 'gate_distance_from_rwy_nm']
result_min = result_min[cols_of_interest].rename({'min':'time_entry_min_distance', 'gate_distance_from_rwy_nm':'min_gate_distance_from_rwy_nm'},axis=1)
result_max = result_max[cols_of_interest].rename({'min':'time_entry_max_distance', 'gate_distance_from_rwy_nm':'max_gate_distance_from_rwy_nm'},axis=1)

det = result_min.merge(result_max, on=['id_x', 'airport_ident', 'le_ident', 'he_ident'], how='outer')

det['time_since_minimum_distance'] = det['time_entry_min_distance']-det['time_entry_max_distance']

det['time_since_minimum_distance_s'] = det['time_since_minimum_distance'].dt.total_seconds()

det['status'] = det['time_since_minimum_distance_s'].apply(lambda l: 'arrival' if l > 0 else 'departure')
det['status'] = det['status'].fillna('undetermined')

det = det[['id_x', 'airport_ident', 'le_ident', 'he_ident','status']]

gb_cols = ['id_x', 'airport_ident', 'le_ident', 'he_ident', 'gate_type']
result = result.groupby(gb_cols).agg(
    entry_time_approach_area=('min', 'min'),
    exit_time_approach_area=('max', 'max'),
    intersected_subsections=('gate_distance_from_rwy_nm', 'count'),
    minimal_distance_runway=('gate_distance_from_rwy_nm', 'min'),
    maximal_distance_runway=('gate_distance_from_rwy_nm', 'max')
)
result = result.reset_index()

rwy_result_cols = ['id_x', 'airport_ident', 'le_ident', 'he_ident']

rwy_result = result[rwy_result_cols + ['gate_type']]
rwy_result = rwy_result[rwy_result['gate_type']=='runway_hexagons']
rwy_result = rwy_result[rwy_result_cols]
rwy_result['runway_detected'] = True

result = result.merge(rwy_result, on=rwy_result_cols, how = 'left')

result['runway_detected'] = result['runway_detected'].fillna(False)

result = result[result['gate_type']!='runway_hexagons']

result['high_number_intersections'] = result['intersected_subsections']>5

result['low_minimal_distance'] = result['minimal_distance_runway']<5

result['touched_closest_segment_to_rw'] = result['minimal_distance_runway']==1

result['touched_second_closest_segment_to_rw'] = result['minimal_distance_runway']<=2 

approach_detected_weight = 0.3
rwy_detected_weight = 2
high_number_intersections_weight = 1 
low_minimal_distance_weight = 1
touched_closest_segment_to_rw_weight = 1.5
touched_second_closest_segment_to_rw_weight = 0.75

max_score = approach_detected_weight + rwy_detected_weight + high_number_intersections_weight + low_minimal_distance_weight + touched_closest_segment_to_rw_weight + touched_second_closest_segment_to_rw_weight

result['score'] = (
                   1*approach_detected_weight + # For all flights in this dataset an approach is detected (i.e., they entered the approach cone)
                   result['runway_detected'].apply(int)*rwy_detected_weight + 
                   result['high_number_intersections'].apply(int)*high_number_intersections_weight + 
                   result['low_minimal_distance'].apply(int)*touched_closest_segment_to_rw_weight + 
                   result['touched_closest_segment_to_rw'].apply(int)*touched_closest_segment_to_rw_weight + 
                   result['touched_second_closest_segment_to_rw'].apply(int)*touched_second_closest_segment_to_rw_weight
                  ) / max_score * 100

result = result.reset_index(drop=True)

result = result.merge(det,on=['id_x','airport_ident','le_ident','he_ident'], how ='left')

result['status'] = result['status'].fillna('undetermined')

result['rwy'] = result['le_ident'] + '/' + result['he_ident']

rwy_winner = result.loc[result.groupby(['id_x','airport_ident'])['score'].idxmax()]
rwy_winner['score'] = rwy_winner['score'].apply(str)
rwy_winner = rwy_winner.groupby(['id_x','airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()
rwy_winner = rwy_winner.rename({
    'id_x':'id',
    'rwy' : 'likely_rwy',
    'score': 'likely_rwy_score',
    'status': 'likely_rwy_status'
    }, axis=1)

id_cols = ['id', 'airport_ident', 'le_ident', 'he_ident']
rwy_winner_flag = rwy_winner[id_cols]

rwy_winner_flag['winner'] = True

result = result.rename({'id_x':'id'}, axis=1)
result = result.merge(rwy_winner_flag, on = id_cols, how='left') 
result['winner'] = result['winner'].fillna(False)

rwy_losers = result[result['winner']==False]

rwy_losers['score'] = rwy_losers['score'].apply(str)
rwy_losers = rwy_losers.groupby(['id','airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()

rwy_losers = rwy_losers.rename({
    'rwy' : 'potential_other_rwys',
    'score': 'potential_other_rwy_scores',
    'status': 'potential_other_rwy_status'
    }, axis=1)[['id', 'airport_ident', 'potential_other_rwys', 'potential_other_rwy_scores', 'potential_other_rwy_status']]

rwy_determined = rwy_winner.merge(rwy_losers, on=['id','airport_ident'], how='left')

In [None]:
rwy_determined[~rwy_determined.potential_other_rwys.isna()]

In [None]:
id_detect = '3c6dd6-EWG14NR-2023-08-02'
rwy_determined_f = rwy_determined[rwy_determined['id'] == id_detect]

In [None]:
df = df[df.id == id_detect]

In [None]:
result[result['id']==id_detect]

In [None]:
rwy_determined_f

In [None]:
import h3_viz
import folium

In [None]:
egll = pd.read_parquet(f'../data/runway_hex/EGLL.parquet')
map_viz = h3_viz.choropleth_map(
        egll,
        column_name='gate_id_nr',
        border_color='black',
        fill_opacity=0.7,
        color_map_name='Reds',
        initial_map=None,
        initial_location=[df.lat.values[0], df.lon.values[0]],
        initial_zoom = 13,
        tooltip_columns = ['id', 'airport_ref', 'airport_ident', 'le_ident', 'he_ident', 'length_ft', 'width_ft',
   'surface', 'lighted', 'closed', 'gate_id']
    )

# Function to add a trajectory to the map
def add_trajectory(map_object, dataframe):
    """
    Adds an aircraft trajectory to a Folium map based on coordinates in a Pandas DataFrame.

    Parameters:
    - map_object: Folium Map instance where the trajectory will be added.
    - dataframe: Pandas DataFrame containing the trajectory coordinates with columns 'lat' and 'lon'.
    """
    # Extracting coordinates from DataFrame
    coordinates = dataframe[['lat', 'lon']].values.tolist()
    # Adding a PolyLine to the map to represent the trajectory
    folium.PolyLine(coordinates, color="blue", weight=2.5, opacity=1).add_to(map_object)

# Add the aircraft trajectory to the map
add_trajectory(map_viz, df)

# Display the map
map_viz


In [None]:
df

In [None]:
rwy_determined['id'].value_counts()

In [66]:
import pandas as pd
import traffic
from traffic.data import opensky
import h3

# Limitation: The current id should represent a flight from ADEP to ADES. If the ID does not represent this, max score vote would mess up the result.
# Solution: Create a new ID which checks whether the flight is one flight, otherwise it would detect and split the id in multiple ids.  

# Limitation: For each track there is not necessarily and airport found
# Solution: Work with larger Radius or Height in airport detection?

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Add hex_ids 

def add_hex_ids(df, longitude_col='lon', latitude_col ='lat', resolutions=[5, 11]):
    """
    Adds hexagonal IDs to the DataFrame for specified resolutions.
    """
    
    for res in resolutions:
        df[f'hex_id_{res}'] = df.apply(lambda row: h3.geo_to_h3(row[latitude_col], row[longitude_col], res), axis=1)
    return df

# Convert altitudes to ft and FL

def convert_baroalt_in_m_to_ft_and_FL(df, baroaltitude_col = 'baroaltitude'):
    """
    Converts barometric altitudes (in meter) to feet (ft) and flight levels (FL).
    """
    df['baroaltitude_ft'] = df[baroaltitude_col] * 3.28084
    df['baroaltitude_fl'] = df['baroaltitude_ft'] / 100
    return df

# Filter out low altitude statevectors

def filter_low_altitude_statevectors(df, baroalt_ft_col = 'baroaltitude_ft', threshold=5000):
    """
    Filters out aircraft states below a specified altitude threshold.
    """
    return df[df[baroalt_ft_col] < threshold]

# Read airport_hexagonifications

def identify_potential_airports(df, track_id_col = 'id', hex_id_col='hex_id', apt_types = ['large_airport', 'medium_airport']):
    """
    Merges aircraft states with airport data based on hex ID (resolution 5).
    """
    airports_df = pd.read_parquet('../data/airport_hex/airport_hex_res_5.parquet')
    airports_df = airports_df[airports_df['type'].isin(apt_types)]

    # Create list of possible arrival / departure airports
    arr_dep_apt = df.merge(airports_df, left_on='hex_id_5', right_on=hex_id_col, how='left')
    arr_dep_apt = arr_dep_apt[~arr_dep_apt['ident'].isna()]
    arr_dep_apt = arr_dep_apt.groupby('id_x')['ident'].apply(set).reset_index()
    arr_dep_apt['ident'] = arr_dep_apt['ident'].apply(list)
    arr_dep_apt = arr_dep_apt.rename({'id_x':'id', 'ident':'potential_apt'},axis=1)

    return arr_dep_apt

def identify_runways(df, track_id_col = 'id', longitude_col = 'lon', latitude_col = 'lat', baroaltitude_col = 'baroaltitude'):
    
    df_w_hex = add_hex_ids(df, longitude_col=longitude_col, latitude_col=latitude_col,  resolutions=[5, 11])
    
    df_w_baroalt_ft_fl = convert_baroalt_in_m_to_ft_and_FL(df_w_hex, baroaltitude_col = baroaltitude_col)
    
    df_f_low_alt = filter_low_altitude_statevectors(df_w_baroalt_ft_fl, baroalt_ft_col = 'baroaltitude_ft', threshold=25000)
    
    #df = identify_potential_airports(df_f_low_alt, track_id_col = track_id_col, hex_id_col='hex_id', apt_types = ['large_airport'])

    return df_f_low_alt
    
# Trajectories
df = pd.read_parquet('../data/2023-08-02-11.parquet')
df['id'] = df['icao24'] + '-' + df['callsign'] + '-' + df['time'].dt.date.apply(str)
df = df[['id', 'time', 'icao24', 'callsign', 'lat', 'lon', 'baroaltitude']]

df = identify_runways(df)

airports_df = pd.read_parquet('../data/airport_hex/airport_hex_res_5.parquet')
airports_df = airports_df[airports_df['type'].isin(apt_types)]

# Create list of possible arrival / departure airports
arr_dep_apt = df.merge(airports_df, left_on='hex_id_5', right_on=hex_id_col, how='left')
arr_dep_apt = arr_dep_apt[~arr_dep_apt['ident'].isna()]
arr_dep_apt = arr_dep_apt.groupby('id_x')['ident'].apply(set).reset_index()
arr_dep_apt['ident'] = arr_dep_apt['ident'].apply(list)
arr_dep_apt = arr_dep_apt.rename({'id_x':'id', 'ident':'potential_apt'},axis=1)

In [253]:
import pandas as pd
import traffic
from traffic.data import opensky
import h3

# Limitation: The current id should represent a flight from ADEP to ADES. If the ID does not represent this, max score vote would mess up the result.
# Solution: Create a new ID which checks whether the flight is one flight, otherwise it would detect and split the id in multiple ids.  

# Limitation: For each track there is not necessarily and airport found
# Solution: Work with larger Radius or Height in airport detection?

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Add hex_ids 
def add_statevector_id(df):
    """
    Create a numeric ID for each statevector by using the row index + 1 (to start from 1 instead of 0)
    """
    df['statevector_id'] = range(1, len(df) + 1)
    return df


def add_hex_ids(df, longitude_col='lon', latitude_col ='lat', resolutions=[5, 11]):
    """
    Adds hexagonal IDs to the DataFrame for specified resolutions.
    """
    
    for res in resolutions:
        df[f'hex_id_{res}'] = df.apply(lambda row: h3.geo_to_h3(row[latitude_col], row[longitude_col], res), axis=1)
    return df

# Convert altitudes to ft and FL

def convert_baroalt_in_m_to_ft_and_FL(df, baroaltitude_col = 'baroaltitude'):
    """
    Converts barometric altitudes (in meter) to feet (ft) and flight levels (FL).
    """
    df['baroaltitude_ft'] = df[baroaltitude_col] * 3.28084
    df['baroaltitude_fl'] = df['baroaltitude_ft'] / 100
    return df

# Filter out low altitude statevectors

def filter_low_altitude_statevectors(df, baroalt_ft_col = 'baroaltitude_ft', threshold=5000):
    """
    Filters out aircraft states below a specified altitude threshold.
    """
    return df[df[baroalt_ft_col] < threshold]

# Read airport_hexagonifications

def identify_potential_airports(df, track_id_col = 'id', hex_id_col='hex_id', apt_types = ['large_airport', 'medium_airport']):
    """
    Merges aircraft states with airport data based on hex ID (resolution 5).
    """
    
    airports_df = pd.read_parquet('../data/airport_hex/airport_hex_res_5.parquet')
    airports_df = airports_df[airports_df['type'].isin(apt_types)]
    
    airports_df = airports_df.rename({'id':'apt_id'},axis=1)
    
    # Create list of possible arrival / departure airports
    arr_dep_apt = df.merge(airports_df, left_on='hex_id_5', right_on=hex_id_col, how='left')

    # Convert the 'time' column to datetime format if it's not already
    arr_dep_apt['time'] = pd.to_datetime(arr_dep_apt['time'])

    # Initialize the 'segment_status' column with an empty string
    arr_dep_apt['segment_status'] = ''

    # Group by 'id_x' and 'ident'
    grouped = arr_dep_apt.groupby([track_id_col, 'ident'])

    # For each group, find the index of the min and max time and assign 'start' and 'end' respectively
    for name, group in grouped:
        start_index = group['time'].idxmin()
        end_index = group['time'].idxmax()

        # Assign 'start' to the row with the minimum time
        arr_dep_apt.at[start_index, 'segment_status'] = 'start'

        # Assign 'end' to the row with the maximum time
        arr_dep_apt.at[end_index, 'segment_status'] = 'end'

    # Step 1: Filter to only include 'start' or 'end'
    filtered_df = arr_dep_apt[arr_dep_apt['segment_status'].isin(['start', 'end'])]

    # Merge the start and end DataFrames on 'id_x' and 'ident'
    apt_detections_df = pd.merge(start_df, end_df, on=[track_id_col, 'ident'], how='outer')

    core = [track_id_col, 'ident', 'start_time',  'start_statevector_id', 'end_time', 'end_statevector_id']
    apt_detections_df = apt_detections_df[core]

    return apt_detections_df


def identify_runways_from_low_trajectories(apt_detections_df, df_f_low_alt):

    # Step 0: Creation of an ID & renaming cols
    apt_detections_df = apt_detections_df.reset_index()
    apt_detections_df['apt_detection_id'] = apt_detections_df['id'] + '_' + apt_detections_df['index'].apply(str)
    apt_detections_df = apt_detections_df[['id', 'ident', 'start_time', 'end_time', 'apt_detection_id']]
    apt_detections_df.columns = ['id', 'apt_det_ident', 'apt_det_start_time', 'apt_det_end_time', 'apt_det_id']

    # Step 1: Convert datetime columns to datetime format if they are not already
    apt_detections_df['apt_det_start_time'] = pd.to_datetime(apt_detections_df['apt_det_start_time'])
    apt_detections_df['apt_det_end_time'] = pd.to_datetime(apt_detections_df['apt_det_end_time'])
    df_f_low_alt['time'] = pd.to_datetime(df_f_low_alt['time'])

    # Step 2: Merge the data frames on 'id'
    merged_df = pd.merge(df_f_low_alt, apt_detections_df, on='id', how='inner')

    # Step 3: Filter rows where 'time' is between 'apt_det_start_time' and 'apt_det_end_time'
    result_df = merged_df[(merged_df['time'] >= merged_df['apt_det_start_time']) & 
                          (merged_df['time'] <= merged_df['apt_det_end_time'])]
    
    # Step 5: Match with runways

    def match_runways_to_hex(df_low, apt_det_id, apt):

        df_single = df_low[df_low['apt_det_id'] == apt_det_id]

        core_cols_single = ['apt_det_id', 'id', 'time', 'lat', 'lon', 'hex_id_11', 'baroaltitude_fl']

        df_single = df_single[core_cols_single]
        df_single = df_single.reset_index()

        core_cols_rwy = ['id', 'airport_ref', 'airport_ident', 'gate_id', 'hex_id', 'gate_id_nr','le_ident','he_ident']

        df_rwys = pd.read_parquet(f'../data/runway_hex/{apt}.parquet')
        df_rwys = df_rwys[core_cols_rwy]

        df_hex_rwy = df_single.merge(df_rwys,left_on='hex_id_11', right_on='hex_id', how='left')

        result = df_hex_rwy.groupby(['apt_det_id', 'id_x','airport_ident', 'gate_id','le_ident','he_ident'])['time'].agg([min,max]).reset_index().sort_values('min')
        return result

    dfs = apt_detections_df.apply(lambda l: match_runways_to_hex(result_df, l['apt_det_id'],l['apt_det_ident']),axis=1).to_list()

    result = pd.concat(dfs)
    return result

def identify_runways(df, track_id_col = 'id', longitude_col = 'lon', latitude_col = 'lat', baroaltitude_col = 'baroaltitude'):
    
    df_w_id = add_statevector_id(df)
    
    df_w_hex = add_hex_ids(df_w_id, longitude_col=longitude_col, latitude_col=latitude_col,  resolutions=[5, 11])
    
    df_w_baroalt_ft_fl = convert_baroalt_in_m_to_ft_and_FL(df_w_hex, baroaltitude_col = baroaltitude_col)
    
    df_f_low_alt = filter_low_altitude_statevectors(df_w_baroalt_ft_fl, baroalt_ft_col = 'baroaltitude_ft', threshold=5000)
    
    apt_detections_df = identify_potential_airports(df_f_low_alt, track_id_col = track_id_col, hex_id_col='hex_id', apt_types = ['large_airport'])
    
    rwy_detections_df = identify_runways_from_low_trajectories(apt_detections_df,df_f_low_alt)
    
    return rwy_detections_df
    
# Trajectories
df = pd.read_parquet('../data/2023-08-02-11.parquet')
df['id'] = df['icao24'] + '-' + df['callsign'] + '-' + df['time'].dt.date.apply(str)
df = df[['id', 'time', 'icao24', 'callsign', 'lat', 'lon', 'baroaltitude']]

rwy_detections_df = identify_runways(df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f_low_alt['time'] = pd.to_datetime(df_f_low_alt['time'])


In [254]:
rwy_detections_df

Unnamed: 0,apt_det_id,id_x,airport_ident,gate_id,le_ident,he_ident,min,max
2,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,runway_hexagons,09R,27L,2023-08-02 11:21:46+00:00,2023-08-02 11:22:01+00:00
0,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,high_numbered_approach_hexagons_1_nm,09R,27L,2023-08-02 11:22:02+00:00,2023-08-02 11:22:24+00:00
1,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,high_numbered_approach_hexagons_2_nm,09R,27L,2023-08-02 11:22:25+00:00,2023-08-02 11:22:29+00:00
0,407463-SHT7C-2023-08-02_1,407463-SHT7C-2023-08-02,EGLL,low_numbered_approach_hexagons_10_nm,09L,27R,2023-08-02 11:12:29+00:00,2023-08-02 11:12:53+00:00
9,407463-SHT7C-2023-08-02_1,407463-SHT7C-2023-08-02,EGLL,low_numbered_approach_hexagons_9_nm,09L,27R,2023-08-02 11:12:54+00:00,2023-08-02 11:13:17+00:00
...,...,...,...,...,...,...,...,...
1,aaba9a-UAL919-2023-08-02_82,aaba9a-UAL919-2023-08-02,EGLL,high_numbered_approach_hexagons_2_nm,09L,27R,2023-08-02 11:37:14+00:00,2023-08-02 11:37:14+00:00
2,aaba9a-UAL919-2023-08-02_82,aaba9a-UAL919-2023-08-02,EGLL,high_numbered_approach_hexagons_3_nm,09L,27R,2023-08-02 11:37:15+00:00,2023-08-02 11:37:25+00:00
2,40087c-BAW5HL-2023-08-02_83,40087c-BAW5HL-2023-08-02,EGLL,runway_hexagons,09R,27L,2023-08-02 11:35:17+00:00,2023-08-02 11:35:33+00:00
0,40087c-BAW5HL-2023-08-02_83,40087c-BAW5HL-2023-08-02,EGLL,high_numbered_approach_hexagons_1_nm,09R,27L,2023-08-02 11:35:34+00:00,2023-08-02 11:35:58+00:00


In [239]:
rwy_detections_df

Unnamed: 0,apt_det_id,id_x,airport_ident,gate_id,le_ident,he_ident,min,max
2,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,runway_hexagons,09R,27L,2023-08-02 11:21:46+00:00,2023-08-02 11:22:01+00:00
0,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,high_numbered_approach_hexagons_1_nm,09R,27L,2023-08-02 11:22:02+00:00,2023-08-02 11:22:24+00:00
1,405a46-BAW904N-2023-08-02_0,405a46-BAW904N-2023-08-02,EGLL,high_numbered_approach_hexagons_2_nm,09R,27L,2023-08-02 11:22:25+00:00,2023-08-02 11:22:29+00:00
0,407463-SHT7C-2023-08-02_1,407463-SHT7C-2023-08-02,EGLL,low_numbered_approach_hexagons_10_nm,09L,27R,2023-08-02 11:12:29+00:00,2023-08-02 11:12:53+00:00
9,407463-SHT7C-2023-08-02_1,407463-SHT7C-2023-08-02,EGLL,low_numbered_approach_hexagons_9_nm,09L,27R,2023-08-02 11:12:54+00:00,2023-08-02 11:13:17+00:00
...,...,...,...,...,...,...,...,...
1,aaba9a-UAL919-2023-08-02_82,aaba9a-UAL919-2023-08-02,EGLL,high_numbered_approach_hexagons_2_nm,09L,27R,2023-08-02 11:37:14+00:00,2023-08-02 11:37:14+00:00
2,aaba9a-UAL919-2023-08-02_82,aaba9a-UAL919-2023-08-02,EGLL,high_numbered_approach_hexagons_3_nm,09L,27R,2023-08-02 11:37:15+00:00,2023-08-02 11:37:25+00:00
2,40087c-BAW5HL-2023-08-02_83,40087c-BAW5HL-2023-08-02,EGLL,runway_hexagons,09R,27L,2023-08-02 11:35:17+00:00,2023-08-02 11:35:33+00:00
0,40087c-BAW5HL-2023-08-02_83,40087c-BAW5HL-2023-08-02,EGLL,high_numbered_approach_hexagons_1_nm,09R,27L,2023-08-02 11:35:34+00:00,2023-08-02 11:35:58+00:00


In [244]:
result = rwy_detections_df

In [245]:
def clean_gate(gate_id):
    if gate_id == 'runway_hexagons':
        return 'runway_hexagons',0
    else:
        return '_'.join(gate_id.split('_')[:4]), int(gate_id.split('_')[4])

result['gate_type'], result['gate_distance_from_rwy_nm'] = zip(*result.gate_id.apply(clean_gate))

## Determining arrival / departure... 

result = result.reset_index(drop=True)

result_min = result.loc[result.groupby(['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].idxmin()]
result_max = result.loc[result.groupby(['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].idxmax()] 

# Copy the DataFrame to avoid modifying the original unintentionally
result_copy = result.copy()

# Compute the minimum and maximum 'gate_distance_from_rwy_nm' for each group
min_values = result.groupby(['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].transform('min')
max_values = result.groupby(['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident'])['gate_distance_from_rwy_nm'].transform('max')

# Add these as new columns to the DataFrame
result_copy['min_gate_distance'] = min_values
result_copy['max_gate_distance'] = max_values

# Now, you can filter rows where 'gate_distance_from_rwy_nm' matches the min or max values
# To specifically keep rows with the minimum value:
result_min = result_copy[result_copy['gate_distance_from_rwy_nm'] == result_copy['min_gate_distance']]

# To specifically keep rows with the maximum value:
result_max = result_copy[result_copy['gate_distance_from_rwy_nm'] == result_copy['max_gate_distance']]


cols_of_interest = ['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident', 'min', 'gate_distance_from_rwy_nm']
result_min = result_min[cols_of_interest].rename({'min':'time_entry_min_distance', 'gate_distance_from_rwy_nm':'min_gate_distance_from_rwy_nm'},axis=1)
result_max = result_max[cols_of_interest].rename({'min':'time_entry_max_distance', 'gate_distance_from_rwy_nm':'max_gate_distance_from_rwy_nm'},axis=1)

det = result_min.merge(result_max, on=['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident'], how='outer')

det['time_since_minimum_distance'] = det['time_entry_min_distance']-det['time_entry_max_distance']

det['time_since_minimum_distance_s'] = det['time_since_minimum_distance'].dt.total_seconds()

det['status'] = det['time_since_minimum_distance_s'].apply(lambda l: 'arrival' if l > 0 else 'departure')
det['status'] = det['status'].fillna('undetermined')

det = det[['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident','status']]

gb_cols = ['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident', 'gate_type']
result = result.groupby(gb_cols).agg(
    entry_time_approach_area=('min', 'min'),
    exit_time_approach_area=('max', 'max'),
    intersected_subsections=('gate_distance_from_rwy_nm', 'count'),
    minimal_distance_runway=('gate_distance_from_rwy_nm', 'min'),
    maximal_distance_runway=('gate_distance_from_rwy_nm', 'max')
)
result = result.reset_index()

rwy_result_cols = ['id_x', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident']

rwy_result = result[rwy_result_cols + ['gate_type']]
rwy_result = rwy_result[rwy_result['gate_type']=='runway_hexagons']
rwy_result = rwy_result[rwy_result_cols]
rwy_result['runway_detected'] = True

result = result.merge(rwy_result, on=rwy_result_cols, how = 'left')

result['runway_detected'] = result['runway_detected'].fillna(False)

result = result[result['gate_type']!='runway_hexagons']

result['high_number_intersections'] = result['intersected_subsections']>5

result['low_minimal_distance'] = result['minimal_distance_runway']<5

result['touched_closest_segment_to_rw'] = result['minimal_distance_runway']==1

result['touched_second_closest_segment_to_rw'] = result['minimal_distance_runway']<=2 

approach_detected_weight = 0.3
rwy_detected_weight = 2
high_number_intersections_weight = 1 
low_minimal_distance_weight = 1
touched_closest_segment_to_rw_weight = 1.5
touched_second_closest_segment_to_rw_weight = 0.75

max_score = approach_detected_weight + rwy_detected_weight + high_number_intersections_weight + low_minimal_distance_weight + touched_closest_segment_to_rw_weight + touched_second_closest_segment_to_rw_weight

result['score'] = (
                   1*approach_detected_weight + # For all flights in this dataset an approach is detected (i.e., they entered the approach cone)
                   result['runway_detected'].apply(int)*rwy_detected_weight + 
                   result['high_number_intersections'].apply(int)*high_number_intersections_weight + 
                   result['low_minimal_distance'].apply(int)*touched_closest_segment_to_rw_weight + 
                   result['touched_closest_segment_to_rw'].apply(int)*touched_closest_segment_to_rw_weight + 
                   result['touched_second_closest_segment_to_rw'].apply(int)*touched_second_closest_segment_to_rw_weight
                  ) / max_score * 100

result = result.reset_index(drop=True)

result = result.merge(det,on=['id_x', 'apt_det_id', 'airport_ident','le_ident','he_ident'], how ='left')

result['status'] = result['status'].fillna('undetermined')

result['rwy'] = result['le_ident'] + '/' + result['he_ident']

rwy_winner = result.loc[result.groupby(['id_x','apt_det_id','airport_ident'])['score'].idxmax()]
rwy_winner['score'] = rwy_winner['score'].apply(str)
rwy_winner = rwy_winner.groupby(['id_x', 'apt_det_id', 'airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()
rwy_winner = rwy_winner.rename({
    'id_x':'id',
    'rwy' : 'likely_rwy',
    'score': 'likely_rwy_score',
    'status': 'likely_rwy_status'
    }, axis=1)

id_cols = ['id', 'apt_det_id', 'airport_ident', 'le_ident', 'he_ident']
rwy_winner_flag = rwy_winner[id_cols]

  rwy_winner = rwy_winner.groupby(['id_x', 'apt_det_id', 'airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()


In [247]:
rwy_winner_flag['winner'] = True

result = result.rename({'id_x':'id'}, axis=1)
result = result.merge(rwy_winner_flag, on = id_cols, how='left') 
result['winner'] = result['winner'].fillna(False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rwy_winner_flag['winner'] = True


In [249]:
rwy_losers = result[result['winner']==False]

rwy_losers['score'] = rwy_losers['score'].apply(str)
rwy_losers = rwy_losers.groupby(['id', 'apt_det_id','airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rwy_losers['score'] = rwy_losers['score'].apply(str)
  rwy_losers = rwy_losers.groupby(['id', 'apt_det_id','airport_ident'])['le_ident', 'he_ident', 'rwy','score', 'status'].agg(', '.join).reset_index()


In [251]:
rwy_losers = rwy_losers.rename({
    'rwy' : 'potential_other_rwys',
    'score': 'potential_other_rwy_scores',
    'status': 'potential_other_rwy_status'
    }, axis=1)[['id','apt_det_id', 'airport_ident', 'potential_other_rwys', 'potential_other_rwy_scores', 'potential_other_rwy_status']]

rwy_determined = rwy_winner.merge(rwy_losers, on=['id','apt_det_id','airport_ident'], how='left')

In [252]:
rwy_determined

Unnamed: 0,id,apt_det_id,airport_ident,le_ident,he_ident,likely_rwy,likely_rwy_score,likely_rwy_status,potential_other_rwys,potential_other_rwy_scores,potential_other_rwy_status
0,0a0048-DAH2054-2023-08-02,0a0048-DAH2054-2023-08-02_59,EGLL,09L,27R,09L/27R,77.09923664122137,arrival,,,
1,3472cc-IBE31ZZ-2023-08-02,3472cc-IBE31ZZ-2023-08-02_11,EGLL,09R,27L,09R/27L,92.36641221374046,departure,,,
2,39e684-AFR79ZE-2023-08-02,39e684-AFR79ZE-2023-08-02_79,LFPG,08L,26R,08L/26R,42.74809160305343,arrival,08R/26L,38.93129770992366,arrival
3,3c00af-KAY51-2023-08-02,3c00af-KAY51-2023-08-02_31,EGLL,09R,27L,09R/27L,92.36641221374046,departure,,,
4,3c65cb-DLH5U-2023-08-02,3c65cb-DLH5U-2023-08-02_38,EGLL,09R,27L,09R/27L,92.36641221374046,departure,,,
...,...,...,...,...,...,...,...,...,...,...,...
78,ab60e3-DAL31-2023-08-02,ab60e3-DAL31-2023-08-02_32,EGLL,09R,27L,09R/27L,61.832061068702295,departure,09L/27R,38.93129770992366,departure
79,ab79ca-AAL47-2023-08-02,ab79ca-AAL47-2023-08-02_33,EGLL,09R,27L,09R/27L,92.36641221374046,departure,09L/27R,38.93129770992366,departure
80,c0103a-ACA903-2023-08-02,c0103a-ACA903-2023-08-02_5,EGLL,09R,27L,09R/27L,61.832061068702295,departure,09L/27R,38.93129770992366,departure
81,c01723-ACA858-2023-08-02,c01723-ACA858-2023-08-02_36,EGLL,09L,27R,09L/27R,61.832061068702295,arrival,,,


In [None]:
rwy_winner

In [None]:
rwy_losers['score'] = rwy_losers['score'].apply(str)

rwy_losers.groupby(['id_x','airport_ident'])['rwy','score'].agg(', '.join)

In [None]:
df.id_x.value_counts()

In [None]:
result.groupby()[''].agg(max)

In [None]:
result['minimal_dist_rwy_<_5nm']

In [None]:
result[result['id_x']=='4009d8-BAW3ET-2023-08-02']

In [None]:
result

In [None]:
result['id_x'].value_counts()

In [None]:
gb_cols = ['id_x','airport_ident','le_ident','he_ident','gate_type']
result.groupby(gb_cols)['min','max','gate_distance_from_rwy_nm'].agg(['count', min, max])

In [None]:
result['type_gate'] = result.gate_id.apply(lambda l:)

In [None]:
pd.concat(,axis=1)

In [None]:
result

In [None]:
#res_gb = res_gb.explode('ident')

In [None]:
df_m = df_f.merge(res_gb, left_on='id', right_on='id_x', how='left')

In [None]:
df_m['ident']

In [None]:
import plotly.express as px

px.line(df[df['id'].isin(ids)], x='time', y='baroaltitude_fl', color='id')

In [None]:
import h3_viz

In [None]:
latitude = apt_hex['latitude_deg'].values[0]
longitude = apt_hex['longitude_deg'].values[0]

m = h3_viz.choropleth_map(
        apt_hex,
        column_name='elevation_ft',
        border_color='black',
        fill_opacity=0.7,
        color_map_name='Reds',
        initial_map=None,
        initial_location=[latitude, longitude],
        initial_zoom = 14,
        tooltip_columns = ['ident', 'latitude_deg', 'longitude_deg']
)

m#.save('airport_hex.html')

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf,from_unixtime, min, max, to_date, pandas_udf, col, PandasUDFType, lit, round
from pyspark.sql.types import DoubleType, ArrayType, StructType, StructField, StringType
from pyspark.sql import functions as F
from pyspark.sql import Window

import os, time
import subprocess
import os,shutil
from datetime import datetime
import pandas as pd
import numpy as np
from IPython.display import display, HTML

import requests
from shapely.geometry import LineString, Polygon
from shapely.ops import transform
import pyproj
from functools import partial
from shapely.geometry import LineString
from shapely.ops import transform
from pyproj import Proj, Transformer
import pandas as pd
import folium
from shapely.geometry import Polygon
from shapely.ops import unary_union
import shapely.geometry
import h3
import h3_viz
from sklearn.preprocessing import LabelEncoder

# Settings
project = "project_aiu"


# Getting today's date
today = datetime.today().strftime('%d %B %Y')

# Spark Session Initialization
shutil.copy("/runtime-addons/cmladdon-2.0.40-b150/log4j.properties", "/etc/spark/conf/") # Setting logging properties

spark = SparkSession.builder \
    .appName("OSN ADEP ADES Identification") \
    .config("spark.log.level", "ERROR")\
    .config("spark.hadoop.fs.azure.ext.cab.required.group", "eur-app-aiu-dev") \
    .config("spark.kerberos.access.hadoopFileSystems", "abfs://storage-fs@cdpdldev0.dfs.core.windows.net/data/project/aiu.db/unmanaged") \
    .config("spark.driver.cores", "1") \
    .config("spark.driver.memory", "8G") \
    .config("spark.executor.memory", "5G") \
    .config("spark.executor.cores", "1") \
    .config("spark.executor.instances", "2") \
    .config("spark.dynamicAllocation.maxExecutors", "6") \
    .config("spark.network.timeout", "800s") \
    .config("spark.executor.heartbeatInterval", "400s") \
    .enableHiveSupport() \
    .getOrCreate()

# Get environment variables
engine_id = os.getenv('CDSW_ENGINE_ID')
domain = os.getenv('CDSW_DOMAIN')

# Format the URL
url = f"https://spark-{engine_id}.{domain}"

# Display the clickable URL
display(HTML(f'<a href="{url}">{url}</a>'))

airports_df = spark.sql(f"""
    SELECT id, ident, iso_country, continent, latitude_deg, longitude_deg, elevation_ft, type
    FROM {project}.oa_airports
    WHERE (ident LIKE 'E%' OR ident LIKE 'L%' OR ident LIKE 'U%')
    AND (type = 'large_airport' OR type = 'medium_airport');
""")

import math
import json

def generate_circle_polygon(lat, lon, radius_nautical_miles, num_points=360):
    """
    Generate a polygon in GeoJSON format around a given latitude and longitude
    with a specified radius in nautical miles.
    
    :param lat: Latitude of the center point
    :param lon: Longitude of the center point
    :param radius_nautical_miles: Radius in nautical miles
    :param num_points: Number of points to generate for the polygon
    :return: A dictionary representing the polygon in GeoJSON format
    """
    # Convert radius from nautical miles to kilometers
    radius_km = radius_nautical_miles * 1.852
    
    # Function to convert from degrees to radians
    def degrees_to_radians(degrees):
        return degrees * math.pi / 180
    
    # Function to calculate the next point given a distance and bearing
    def calculate_point(lat, lon, distance_km, bearing):
        R = 6371.01  # Earth's radius in kilometers
        lat_rad = degrees_to_radians(lat)
        lon_rad = degrees_to_radians(lon)
        distance_rad = distance_km / R
        bearing_rad = degrees_to_radians(bearing)
        
        lat_new_rad = math.asin(math.sin(lat_rad) * math.cos(distance_rad) +
                                math.cos(lat_rad) * math.sin(distance_rad) * math.cos(bearing_rad))
        lon_new_rad = lon_rad + math.atan2(math.sin(bearing_rad) * math.sin(distance_rad) * math.cos(lat_rad),
                                           math.cos(distance_rad) - math.sin(lat_rad) * math.sin(lat_new_rad))
                                           
        lat_new = math.degrees(lat_new_rad)
        lon_new = math.degrees(lon_new_rad)
        return [lon_new, lat_new]
    
    # Generate points
    points = []
    for i in range(num_points):
        bearing = 360 / num_points * i
        point = calculate_point(lat, lon, radius_km, bearing)
        points.append(point)
    #points.append(points[0])  # Close the polygon by repeating the first point
    
    # Create GeoJSON
    geojson = {
        "type": "Polygon",
        "coordinates": [points]
    }
    
    geojson_str = json.dumps(geojson)
    
    return geojson_str

def fill_circle_with_hexagons(polygon_json, resolution=8):
    polygon = json.loads(polygon_json)
    hexagons = h3.polyfill(polygon, resolution, geo_json_conformant=True)
    return list(hexagons)

import folium
from shapely.ops import unary_union

geojson = generate_circle_polygon(lat=27.994402, lon=-93, radius_nautical_miles=35, num_points=360)

In [None]:
h3.polyfill?

In [None]:
folium.GeoJson?

In [None]:
h3.h3_to_geo_boundary?

In [None]:
h3.h3_to_geo?

In [None]:
h3.polyfill?

In [None]:
hexagons = fill_circle_with_hexagons(geojson, resolution=8)

In [None]:
df = pd.DataFrame.from_dict({'hex_id':hexagons, 'color': 1})

In [None]:
import folium
folium.GeoJson?

In [None]:
import h3
h3.h3_to_geo?

In [None]:
m = h3_viz.choropleth_map(
        df,
        column_name='color',
        border_color='black',
        fill_opacity=0.7,
        color_map_name='Reds',
        initial_map=None,
        initial_location=[latitude, longitude],
        initial_zoom = 14,
        tooltip_columns = []
)

m

In [None]:
import folium
import json
import h3

def plot_hexagons(hexagons, map_object):
    """
    Plot hexagons on a Folium map.

    Parameters:
    - hexagons: A list of hexagon IDs.
    - map_object: A Folium Map object to which the hexagons will be added.
    """
    for hexagon in hexagons:
        hex_boundary = h3.h3_to_geo_boundary(hexagon, geo_json=False)
        #print(hex_boundary)
        #print(hex_boundary)
        folium.Polygon(locations=hex_boundary, color="blue", fill=True).add_to(map_object)

# Assuming `geojson` contains the circular area and `fill_circle_with_hexagons` has been called appropriately
hexagons = fill_circle_with_hexagons(geojson, resolution=8)

# Initialize a Folium map at a specific location
m = folium.Map(location=[27.994402, -88.760254], zoom_start=6)

# Plot the circular area
folium.GeoJson(geojson, style_function=lambda x: {'fillColor': 'red', 'color': 'red'}).add_to(m)

# Plot hexagons within the circle
plot_hexagons(hexagons, m)

# Display the map
m


In [None]:
len(eurocontrol_countries_iso2)

In [None]:
airports_df

In [None]:
airports_df.continent.value_counts()

In [None]:
airports_df.longitude_deg.value_counts()

In [None]:
import pandas as pd 

In [None]:
import h3_viz

In [None]:
airports_df = pd.read_parquet('../data/airport_hex/airport_hex_res_6.parquet')

In [None]:
import h3

In [None]:
h3.h3_to_geo?

In [None]:
a, b = 

In [None]:
airports_df['hex_id'].isna().value_counts()

In [None]:
airports_dff = airports_df[~airports_df['hex_id'].isna()]

In [None]:
airports_dff.latitude_deg.value_counts()

In [None]:
airports_dff.longitude_deg.value_counts()

In [None]:
map_viz = h3_viz.choropleth_map(
      airports_dff.explode('hex_id'),
      column_name='latitude_deg',
      border_color='black',
      fill_opacity=0.7,
      color_map_name='Reds',
      initial_map=None,
      initial_location=[airports_df.latitude_deg.values[0], airports_df.longitude_deg.values[0]],
      initial_zoom = 14,
      tooltip_columns = []
  )
map_viz

In [None]:
json

In [None]:
fill_polygon_with_hexagons()