# Formatting Data  
Use this file to format cleaned data into a rolling initial state that can be used

In [12]:
import pandas as pd

# Load data
operations = pd.read_csv('../../Data/AnalysisData/operations_cleaned_20250301.csv')

operations.head()

Unnamed: 0,time,operation,flight,ac_type,runway
0,2025-03-01 00:00:38,landing,SWA4157,B38M,27.0
1,2025-03-01 00:02:05,takeoff,SKW3378,E75L,27.0
2,2025-03-01 00:04:27,landing,BAW82P,A35K,27.0
3,2025-03-01 00:08:37,landing,SWA3338,B737,27.0
4,2025-03-01 00:09:33,takeoff,EJA838,C700,27.0


In [13]:
# Convert time to datetime format
operations['time'] = pd.to_datetime(operations['time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Convert runway to integer, keeping NaN as NaN
operations['runway'] = pd.to_numeric(operations['runway'], errors='coerce').astype('Int64')

operations['flight'] = operations['flight'].astype(str)

seen = {}
for idx, op in operations.iterrows():
    if op['flight'] not in seen.keys():
        seen[op['flight']] = 1
    else:
        operations.loc[idx, 'flight'] = f"{op['flight']}_{seen[op['flight']]}"
        seen[op['flight']] += 1

operations.head()

Unnamed: 0,time,operation,flight,ac_type,runway
0,2025-03-01 00:00:38,landing,SWA4157,B38M,27
1,2025-03-01 00:02:05,takeoff,SKW3378,E75L,27
2,2025-03-01 00:04:27,landing,BAW82P,A35K,27
3,2025-03-01 00:08:37,landing,SWA3338,B737,27
4,2025-03-01 00:09:33,takeoff,EJA838,C700,27


In [14]:
# gather rolling initial state for takeoff operations
rolling_initial_state = []
for i in range(len(operations)):
    if operations['operation'][i] == 'takeoff':
        runway_val = operations['runway'].iloc[i]
        if pd.isna(runway_val):
            # handle missing value
            raise ValueError(f"Missing runway value at index {i}")
        elif runway_val == 9:
            time_val = operations['time'][i]
            if pd.notnull(time_val):
                seconds_of_day = time_val.hour * 3600 + time_val.minute * 60 + time_val.second
            else:
                raise ValueError(f"Invalid time value at index {i}: {time_val}")
            rolling_initial_state.append({
                'callsign': operations['flight'][i],
                'runway': 9,
                'lat': 0.0,
                'lon': 0.0,
                'alt': 0.0,
                'v_z': 0.0,
                'gspd': 0.0,
                'hdg': 0.0,
                'model': operations['ac_type'][i],
                'time_added': seconds_of_day,
                'state': 'takeoff'
            })
        elif runway_val == 27:
            time_val = operations['time'][i]
            if pd.notnull(time_val):
                seconds_of_day = time_val.hour * 3600 + time_val.minute * 60 + time_val.second
            else:
                raise ValueError(f"Invalid time value at index {i}: {time_val}")
            rolling_initial_state.append({
                'callsign': operations['flight'][i],
                'runway': 27,
                'lat': 0.0,
                'lon': 0.0,
                'alt': 0.0,
                'v_z': 0.0,
                'gspd': 0.0,
                'hdg': 0.0,
                'model': operations['ac_type'][i],
                'time_added': seconds_of_day,
                'state': 'takeoff'
            })
        else:
            # handle unexpected value
            raise ValueError(f"Unexpected runway value at index {i}: {runway_val}")

print(rolling_initial_state[:5])  # Display the first 5 entries for verification

[{'callsign': 'SKW3378', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'E75L', 'time_added': 125, 'state': 'takeoff'}, {'callsign': 'EJA838', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'C700', 'time_added': 573, 'state': 'takeoff'}, {'callsign': 'UAL2117', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B738', 'time_added': 768, 'state': 'takeoff'}, {'callsign': 'SWA4363', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B737', 'time_added': 817, 'state': 'takeoff'}, {'callsign': 'SKW3473', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'E75L', 'time_added': 907, 'state': 'takeoff'}]


In [15]:
print(rolling_initial_state[-1])  # Display last state; should be a takeoff

{'callsign': 'SWA4850_1', 'runway': 27, 'lat': 0.0, 'lon': 0.0, 'alt': 0.0, 'v_z': 0.0, 'gspd': 0.0, 'hdg': 0.0, 'model': 'B737', 'time_added': 86102, 'state': 'takeoff'}


In [16]:
# Add landing planes to rolling initial state

aircraft = pd.read_csv('../../Data/AnalysisData/aircraft_cleaned_20250301.csv')

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-03-01 00:00:00,ACA1013,A321,0.0,314.0,0.0,32.733467,-117.206653,0
1,2025-03-01 00:00:00,SWA4157,B38M,144.0,286.55,-832.0,32.729324,-117.172101,158
2,2025-03-01 00:00:00,SKW3378,E75L,0.0,,,32.729517,-117.176939,0
3,2025-03-01 00:00:00,EJA838,C700,0.0,14.0,0.0,32.73345,-117.178396,0
4,2025-03-01 00:00:00,SWA3095,B737,0.0,,,32.733112,-117.195735,0


In [17]:

# here, manually remove any entries that you don't want for the initial state

aircraft = aircraft[aircraft['flight'] != 'SWA4157']
aircraft = aircraft[aircraft['flight'] != 'SWA2605']

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-03-01 00:00:00,ACA1013,A321,0.0,314.0,0.0,32.733467,-117.206653,0
2,2025-03-01 00:00:00,SKW3378,E75L,0.0,,,32.729517,-117.176939,0
3,2025-03-01 00:00:00,EJA838,C700,0.0,14.0,0.0,32.73345,-117.178396,0
4,2025-03-01 00:00:00,SWA3095,B737,0.0,,,32.733112,-117.195735,0
5,2025-03-01 00:00:00,SWA3523,B737,0.0,,,32.732706,-117.196978,0


In [18]:
# convert time to datetime format
aircraft['timestamp'] = pd.to_datetime(aircraft['timestamp'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Convert ground speed to float, track to float, baro rate to float, lat to float, lon to float, alt to float
aircraft['flight'] = aircraft['flight'].astype(str)
aircraft['gs'] = pd.to_numeric(aircraft['gs'], errors='coerce').apply(float)
aircraft['track'] = pd.to_numeric(aircraft['track'], errors='coerce').apply(float)
aircraft['baro_rate'] = pd.to_numeric(aircraft['baro_rate'], errors='coerce').apply(float)
aircraft['lat'] = pd.to_numeric(aircraft['lat'], errors='coerce').apply(float)
aircraft['lon'] = pd.to_numeric(aircraft['lon'], errors='coerce').apply(float)
aircraft['alt'] = pd.to_numeric(aircraft['alt'], errors='coerce').apply(float)

aircraft.head()

Unnamed: 0,timestamp,flight,t,gs,track,baro_rate,lat,lon,alt
0,2025-03-01,ACA1013,A321,0.0,314.0,0.0,32.733467,-117.206653,0.0
2,2025-03-01,SKW3378,E75L,0.0,,,32.729517,-117.176939,0.0
3,2025-03-01,EJA838,C700,0.0,14.0,0.0,32.73345,-117.178396,0.0
4,2025-03-01,SWA3095,B737,0.0,,,32.733112,-117.195735,0.0
5,2025-03-01,SWA3523,B737,0.0,,,32.732706,-117.196978,0.0


In [19]:
# Add landing states to rolling initial state
seen = set()
for idx, op in operations.iterrows():
    if op['operation'] == 'landing':
        callsign = str(op['flight'])
        if callsign in seen or pd.isna(callsign):
            continue
        # Extract base callsign (before any _N suffix)
        base_callsign = callsign.split('_')[0]
        op_time = op['time']
        # Find aircraft rows with base callsign, timestamp within 10 minutes, and heading within ±10 of 286
        ac_rows = aircraft[(aircraft['flight'] == base_callsign) &
                           (aircraft['timestamp'].notna()) &
                           (abs((aircraft['timestamp'] - op_time).dt.total_seconds()) <= 600) &
                           (aircraft['track'].notna()) &
                           (abs(aircraft['track'] - 286) <= 10)]
        if ac_rows.empty:
            print(f"No aircraft data found for {callsign} (base {base_callsign}) within 10 min of {op_time} and heading near 286")
            continue
        ac_row = ac_rows.iloc[0]
        state = {
            'callsign': callsign,
            'lat': float(ac_row['lat']) if not pd.isna(ac_row['lat']) else None,
            'lon': float(ac_row['lon']) if not pd.isna(ac_row['lon']) else None,
            'alt': (float(ac_row['alt']) * 0.3048) if not pd.isna(ac_row['alt']) else None, # FEET TO METERS
            'v_z': 0.0,
            'gspd': (float(ac_row['gs']) / 1.94384) if not pd.isna(ac_row['gs']) else None, # KNOTS to METERS PER SECOND
            'hdg': float(ac_row['track']) if not pd.isna(ac_row['track']) else None,
            'model': str(ac_row['t']),
            'state': 'landing',
            'time_added': (
                int(ac_row['timestamp'].hour) * 3600 +
                int(ac_row['timestamp'].minute) * 60 +
                int(ac_row['timestamp'].second)
            ) if not pd.isna(ac_row['timestamp']) else None,
            'runway': op['runway'] if not pd.isna(op['runway']) else None
        }
        rolling_initial_state.append(state)
        seen.add(callsign)
        print(f"Added landing state for {callsign} (base {base_callsign}) at {ac_row['timestamp']} with runway {state['runway']}")

print(rolling_initial_state[-1])  # Display last state; should be a landing

No aircraft data found for SWA4157 (base SWA4157) within 10 min of 2025-03-01 00:00:38 and heading near 286
Added landing state for BAW82P (base BAW82P) at 2025-03-01 00:00:15 with runway 27
Added landing state for SWA3338 (base SWA3338) at 2025-03-01 00:04:40 with runway 27
Added landing state for UAL222 (base UAL222) at 2025-03-01 00:07:05 with runway 27
Added landing state for SKW3346 (base SKW3346) at 2025-03-01 00:14:35 with runway 27
Added landing state for SKW3496 (base SKW3496) at 2025-03-01 00:16:30 with runway 27
Added landing state for ASA1454 (base ASA1454) at 2025-03-01 00:18:35 with runway 27
Added landing state for TSU7122 (base TSU7122) at 2025-03-01 00:22:50 with runway 27
Added landing state for SKW3439 (base SKW3439) at 2025-03-01 00:29:15 with runway 27
Added landing state for SWA6169 (base SWA6169) at 2025-03-01 00:37:35 with runway 27
Added landing state for SKW4087 (base SKW4087) at 2025-03-01 00:43:35 with runway 27
Added landing state for SWA3564 (base SWA3564)

In [20]:
# sort by time_added
rolling_initial_state.sort(key=lambda x: x['time_added'])

In [21]:
# split into pieces of 10
chunk_size = 10
chunks = [rolling_initial_state[i:i + chunk_size] for i in range(0, len(rolling_initial_state), chunk_size)]

In [22]:
# In each chunk, start times at 0
for i in range(len(chunks)):
    for state in chunks[i]:
        state['time_added'] -= chunks[i][0]['time_added']

In [23]:
# export the rolling initial state dictionary to a python file
with open("../rolling_initial_state_20250301.py", "w") as f:
    for i in range(len(chunks)):
        rolling_initial_state = chunks[i]
        f.write(f"rolling_initial_state_{i:02d} = {rolling_initial_state}\n")