# Formatting Data  
Use this file to format cleaned data into a rolling initial state that can be used

In [13]:
import pandas as pd

# Load data
operations = pd.read_csv('../../Data/AnalysisData/operations_cleaned_20250501.csv')

operations.head()

Unnamed: 0,time,operation,flight,ac_type,runway
0,2025-05-01 00:00:08,takeoff,AAL1951,A320,27.0
1,2025-05-01 00:00:39,takeoff,N950DM,GA6C,27.0
2,2025-05-01 00:01:46,takeoff,UAL2486,B738,27.0
3,2025-05-01 00:03:05,takeoff,SWA1356,B38M,27.0
4,2025-05-01 00:06:20,landing,UPS2636,B763,27.0


In [14]:
# Convert time to datetime format
operations['time'] = pd.to_datetime(operations['time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Convert runway to integer, keeping NaN as NaN
operations['runway'] = pd.to_numeric(operations['runway'], errors='coerce').astype('Int64')

operations['flight'] = operations['flight'].astype(str)

seen = {}
for idx, op in operations.iterrows():
    if op['flight'] not in seen.keys():
        seen[op['flight']] = 1
    else:
        operations.loc[idx, 'flight'] = f"{op['flight']}_{seen[op['flight']]}"
        seen[op['flight']] += 1

operations.head()

Unnamed: 0,time,operation,flight,ac_type,runway
0,2025-05-01 00:00:08,takeoff,AAL1951,A320,27
1,2025-05-01 00:00:39,takeoff,N950DM,GA6C,27
2,2025-05-01 00:01:46,takeoff,UAL2486,B738,27
3,2025-05-01 00:03:05,takeoff,SWA1356,B38M,27
4,2025-05-01 00:06:20,landing,UPS2636,B763,27


In [15]:
# gather rolling initial state for takeoff operations
rolling_initial_state = []
for i in range(len(operations)):
    if operations['operation'][i] == 'takeoff':
        runway_val = operations['runway'].iloc[i]
        if pd.isna(runway_val):
            # handle missing value
            raise ValueError(f"Missing runway value at index {i}")
        elif runway_val == 9:
            pass # temporarily disabling runway 9 processing
            time_val = operations['time'][i]
            if pd.notnull(time_val):
                seconds_of_day = time_val.hour * 3600 + time_val.minute * 60 + time_val.second
            else:
                raise ValueError(f"Invalid time value at index {i}: {time_val}")
            rolling_initial_state.append({
                'callsign': operations['flight'][i],
                'runway': 9,
                'lat': 0.0,
                'lon': 0.0,
                'alt': 0.0,
                'v_z': 0.0,
                'gspd': 0.0,
                'hdg': 0.0,
                'model': operations['ac_type'][i],
                'time_added': seconds_of_day,
                'state': 'takeoff'
            })
        elif runway_val == 27:
            time_val = operations['time'][i]
            if pd.notnull(time_val):
                seconds_of_day = time_val.hour * 3600 + time_val.minute * 60 + time_val.second
            else:
                raise ValueError(f"Invalid time value at index {i}: {time_val}")
            rolling_initial_state.append({
                'callsign': operations['flight'][i],
                'runway': 27,
                'lat': 0.0,
                'lon': 0.0,
                'alt': 0.0,
                'v_z': 0.0,
                'gspd': 0.0,
                'hdg': 0.0,
                'model': operations['ac_type'][i],
                'time_added': seconds_of_day,
                'state': 'takeoff'
            })
        else:
            # handle unexpected value
            raise ValueError(f"Unexpected runway value at index {i}: {runway_val}")

print(rolling_initial_state[:5])  # Display the first 5 entries for verification

[{'callsign': 'AAL1951', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'A320', 'time_added': 8, 'state': 'takeoff'}, {'callsign': 'N950DM', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'GA6C', 'time_added': 39, 'state': 'takeoff'}, {'callsign': 'UAL2486', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B738', 'time_added': 106, 'state': 'takeoff'}, {'callsign': 'SWA1356', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B38M', 'time_added': 185, 'state': 'takeoff'}, {'callsign': 'SWA2316', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B737', 'time_added': 450, 'state': 'takeoff'}]


In [16]:
print(rolling_initial_state[-1])  # Display last state; should be a takeoff

{'callsign': 'ASA1490', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B39M', 'time_added': 85965, 'state': 'takeoff'}


In [18]:
# Add landing planes to rolling initial state

aircraft = pd.read_csv('../../Data/AnalysisData/aircraft_cleaned_20250501.csv')

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-05-01 00:00:00,AAL2361,A321,9.8,,,32.735208,-117.202317,0
1,2025-05-01 00:00:00,N950DM,GA6C,7.8,,,32.730192,-117.175453,0
2,2025-05-01 00:00:00,UAL2486,B738,0.0,,,32.729565,-117.17677,0
3,2025-05-01 00:00:00,SWA1356,B38M,1.4,,,32.729347,-117.177525,0
4,2025-05-01 00:00:00,,B763,0.0,,,32.73821,-117.186314,0


In [None]:

# here, manually remove any entries that you don't want for the initial state

# aircraft = aircraft[aircraft['flight'] != 'SWA4157']
# aircraft = aircraft[aircraft['flight'] != 'SWA2605']

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-03-01 00:00:00,ACA1013,A321,0.0,314.0,0.0,32.733467,-117.206653,0
2,2025-03-01 00:00:00,SKW3378,E75L,0.0,,,32.729517,-117.176939,0
3,2025-03-01 00:00:00,EJA838,C700,0.0,14.0,0.0,32.73345,-117.178396,0
4,2025-03-01 00:00:00,SWA3095,B737,0.0,,,32.733112,-117.195735,0
5,2025-03-01 00:00:00,SWA3523,B737,0.0,,,32.732706,-117.196978,0


In [19]:
# convert time to datetime format
aircraft['timestamp'] = pd.to_datetime(aircraft['timestamp'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Convert ground speed to float, track to float, baro rate to float, lat to float, lon to float, alt to float
aircraft['flight'] = aircraft['flight'].astype(str)
aircraft['gs'] = pd.to_numeric(aircraft['gs'], errors='coerce').apply(float)
aircraft['track'] = pd.to_numeric(aircraft['track'], errors='coerce').apply(float)
aircraft['baro_rate'] = pd.to_numeric(aircraft['baro_rate'], errors='coerce').apply(float)
aircraft['lat'] = pd.to_numeric(aircraft['lat'], errors='coerce').apply(float)
aircraft['lon'] = pd.to_numeric(aircraft['lon'], errors='coerce').apply(float)
aircraft['alt'] = pd.to_numeric(aircraft['alt'], errors='coerce').apply(float)

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-05-01,AAL2361,A321,9.8,,,32.735208,-117.202317,0.0
1,2025-05-01,N950DM,GA6C,7.8,,,32.730192,-117.175453,0.0
2,2025-05-01,UAL2486,B738,0.0,,,32.729565,-117.17677,0.0
3,2025-05-01,SWA1356,B38M,1.4,,,32.729347,-117.177525,0.0
4,2025-05-01,,B763,0.0,,,32.73821,-117.186314,0.0


In [20]:
# Add landing states to rolling initial state
seen = set()
for idx, op in operations.iterrows():
    if op['operation'] == 'landing':
        callsign = str(op['flight'])
        if callsign in seen or pd.isna(callsign):
            continue
        # Extract base callsign (before any _N suffix)
        base_callsign = callsign.split('_')[0]
        op_time = op['time']
        # Find aircraft rows with base callsign, timestamp within 10 minutes, and heading within ±10 of 286
        ac_rows = aircraft[(aircraft['flight'] == base_callsign) &
                           (aircraft['timestamp'].notna()) &
                           (abs((aircraft['timestamp'] - op_time).dt.total_seconds()) <= 600) &
                           (aircraft['track'].notna()) &
                           (abs(aircraft['track'] - 286) <= 20)]
        if ac_rows.empty:
            print(f"No aircraft data found for {callsign} (base {base_callsign}) within 10 min of {op_time} and heading near 286")
            continue
        ac_row = ac_rows.iloc[0]
        state = {
            'callsign': callsign,
            'lat': float(ac_row['lat']) if not pd.isna(ac_row['lat']) else None,
            'lon': float(ac_row['lon']) if not pd.isna(ac_row['lon']) else None,
            'alt': (float(ac_row['alt']) * 0.3048) if not pd.isna(ac_row['alt']) else None, # FEET TO METERS
            'v_z': 0.0,
            'gspd': (float(ac_row['gs']) / 1.94384) if not pd.isna(ac_row['gs']) else None, # KNOTS to METERS PER SECOND
            'hdg': 286.0, #float(ac_row['track']) if not pd.isna(ac_row['track']) else None,
            'model': str(ac_row['t']),
            'state': 'landing',
            'time_added': (
                int(ac_row['timestamp'].hour) * 3600 +
                int(ac_row['timestamp'].minute) * 60 +
                int(ac_row['timestamp'].second)
            ) if not pd.isna(ac_row['timestamp']) else None,
            'runway': op['runway'] if not pd.isna(op['runway']) else None
        }
        rolling_initial_state.append(state)
        seen.add(callsign)
        print(f"Added landing state for {callsign} (base {base_callsign}) at {ac_row['timestamp']} with runway {state['runway']}")

print(rolling_initial_state[-1])  # Display last state; should be a landing

Added landing state for UPS2636 (base UPS2636) at 2025-05-01 00:01:45 with runway 27
Added landing state for ACA1014 (base ACA1014) at 2025-05-01 00:05:55 with runway 27
Added landing state for SKW3607 (base SKW3607) at 2025-05-01 00:08:50 with runway 27
Added landing state for VJA536 (base VJA536) at 2025-05-01 00:11:50 with runway 27
Added landing state for SWA495 (base SWA495) at 2025-05-01 00:21:50 with runway 27
Added landing state for SWA1568 (base SWA1568) at 2025-05-01 00:25:15 with runway 27
Added landing state for SWA1397 (base SWA1397) at 2025-05-01 00:28:20 with runway 27
Added landing state for SKW3378 (base SKW3378) at 2025-05-01 00:31:20 with runway 27
Added landing state for SKW5415 (base SKW5415) at 2025-05-01 00:33:10 with runway 27
Added landing state for PCM7684 (base PCM7684) at 2025-05-01 00:34:55 with runway None
Added landing state for DAL2613 (base DAL2613) at 2025-05-01 00:39:00 with runway 27
Added landing state for BAW82P (base BAW82P) at 2025-05-01 00:42:15

In [21]:
# sort by time_added
rolling_initial_state.sort(key=lambda x: x['time_added'])

In [22]:
# round altitudes down below 3000 feet
for state in rolling_initial_state:
    if state['alt'] is not None and state['alt'] > 914.4:  # 3000 feet in meters
        state['alt'] = 914.4

In [23]:
# if runway is not 27, drop the state
rolling_initial_state = [state for state in rolling_initial_state if state['runway'] == 27]

In [24]:
import copy

def create_rolling_initial_states(original_states, window_size=10):
    rolling_states = []
    
    for i in range(len(original_states) - window_size + 1):
        # Create a deep copy of the window
        window = copy.deepcopy(original_states[i:i+window_size])
        
        # Get the time_added of the first aircraft in this window
        first_time = window[0]['time_added']
        
        # Adjust time_added for all aircraft relative to the first one
        for aircraft in window:
            aircraft['time_added'] = aircraft['time_added'] - first_time
            
        rolling_states.append(window)
    
    return rolling_states

# Use the new function to create rolling initial state chunks
chunks = create_rolling_initial_states(rolling_initial_state, window_size=10)

In [25]:
# remove noisy chunks that have abnormal time added values
chunks = [chunk for chunk in chunks if all(aircraft['time_added'] <= 1800 for aircraft in chunk)]

In [26]:
# export the rolling initial state dictionary to a python file
with open("../rolling_initial_state_20250501.py", "w") as f:
    for i in range(len(chunks)):
        rolling_initial_state = chunks[i]
        f.write(f"rolling_initial_state_{i:02d} = {rolling_initial_state}\n")