***
### Import of required libraries
***

In [None]:
import glob
from tqdm.auto import tqdm
import pandas as pd

***
### Import of data
***

##### Import of M1 data

In [None]:
# Get a list of all the m1 so6 files
filelist = glob.glob(
    "/mnt/beegfs/store/Projects_CRM/RAD_paper/flightplans/m1_daily/*.so6"
)

# Create a list to store the daily dataframes
read_files = []

# Loop through the files and read them into a dataframe which is then appended
# to the list
for filename in tqdm(filelist):
    day = pd.read_csv(
        filename,
        sep=" ",
        header=None,
        index_col=False,
        names=[
            "segment_id",
            "origin",
            "destination",
            "ac_type",
            "segment_start_t",
            "segment_stop_t",
            "segment_start_fl",
            "segment_stop_fl",
            "segment_status",
            "callsign",
            "segment_start_d",
            "segment_stop_d",
            "segment_start_lat",
            "segment_start_lon",
            "segment_stop_lat",
            "segment_stop_lon",
            "identifier",
            "sequence",
            "segment_length_nm",
            "segment_parity_color",
        ],
    )
    read_files.append(day)

# Concatenate the list of dataframes into a single dataframe
m1_all = pd.concat(read_files)
# m1_all

##### Import of flightlist data

In [None]:
flightlist = pd.read_parquet(
    "/mnt/beegfs/store/Projects_CRM/RAD_paper/flightplans/flightlist/flights_jan_2023-05-01_2024-05-01.parquet"
)
# flightlist

***
### Preprocessing of data
***

##### Preprocessing of M1 data

In [None]:
# Convert the lat and lon columns decimal minutes to decimal degrees
m1_all["segment_start_lat"] = m1_all["segment_start_lat"] / 60
m1_all["segment_start_lon"] = m1_all["segment_start_lon"] / 60
m1_all["segment_stop_lat"] = m1_all["segment_stop_lat"] / 60
m1_all["segment_stop_lon"] = m1_all["segment_stop_lon"] / 60

# Convert date and time columns to datetime
m1_all["segment_start_t"] = pd.to_datetime(
    m1_all["segment_start_t"].astype(str), format="%H%M%S"
).dt.time
m1_all["segment_stop_t"] = pd.to_datetime(
    m1_all["segment_stop_t"].astype(str), format="%H%M%S"
).dt.time
m1_all["segment_start_d"] = pd.to_datetime(
    m1_all["segment_start_d"].astype(str), format="%y%m%d"
).dt.date
m1_all["segment_stop_d"] = pd.to_datetime(
    m1_all["segment_stop_d"].astype(str), format="%y%m%d"
).dt.date

##### Preprocessing of flightlist data

In [None]:
# Rename columns for clarity
flightlist = flightlist.rename(
    columns={
        "ID": "identifier",
        "WK_TBL_CAT": "wtc",
        "REGISTRATION": "registration",
        "AIRCRAFT_ADDRESS": "icao24",
        "AIRCRAFT_OPERATOR": "operator",
        "ICAO_FLT_TYPE": "icao_flight_type",
    }
)

***
### Data merging
***

##### Merge

In [None]:
# Merge using identifier column as reference
m1_all = m1_all.merge(
    flightlist[
        [
            "identifier",
            "wtc",
            "registration",
            "icao24",
            "operator",
            "icao_flight_type",
        ]
    ],
    left_on="identifier",
    right_on="identifier",
    how="left",
)

# Reorder columns for clarity
m1_all = m1_all[
    [
        "identifier",
        "callsign",
        "operator",
        "registration",
        "ac_type",
        "origin",
        "destination",
        "icao_flight_type",
        "wtc",
        "sequence",
        "segment_id",
        "segment_length_nm",
        "segment_status",
        "segment_parity_color",
        "segment_start_d",
        "segment_start_t",
        "segment_start_fl",
        "segment_start_lat",
        "segment_start_lon",
        "segment_stop_d",
        "segment_stop_t",
        "segment_stop_fl",
        "segment_stop_lat",
        "segment_stop_lon",
    ]
]
m1_all

##### Save

In [None]:
m1_all.to_parquet(
    "/mnt/beegfs/store/Projects_CRM/RAD_paper/flightplans/flightplans_complete.parquet"
)