In [None]:
import pandas as pd
import numpy as np

X = pd.read_parquet("/content/2018_flights_cleaned_final_before_merging.parquet")

In [None]:
X.drop(["FlightDate"], axis=1, inplace=True)

In [None]:
X.to_parquet("2018_flights_cleaned_final_before_merging.parquet")

In [None]:
del(X)

In [None]:
!pip install duckdb




In [None]:
import duckdb
con = duckdb.connect(database='flight_weather.duckdb', read_only=False)

In [None]:
# Load Parquet and CSVs directly
con.execute("""
CREATE or replace TABLE flights AS
SELECT * FROM '/content/2018_flights_cleaned_final_before_merging.parquet';
""")


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x7ddee47237f0>

In [None]:
con.execute("""
CREATE OR REPLACE TABLE weather AS
SELECT *,
       regexp_extract(filename, '([^/\\\\]+)\\.csv$', 1) AS airport_code
FROM read_csv_auto('*.csv');
""").fetchdf()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,Count
0,4697994


In [None]:
con.execute("""CREATE OR REPLACE TABLE flight_weather AS
WITH origin_weather AS (
    SELECT
        f.__index_level_0__,  -- or whatever unique identifier you have
        f.Origin,
        f.Dep_DateTime,
        AVG(o.WND)  AS o_WND,
        AVG(o.TMP)  AS o_TMP,
        AVG(o.DEW)  AS o_DEW,
        AVG(o.SLP)  AS o_SLP,
        AVG(o.CIG)  AS o_CIG,
        AVG(o.VIS)  AS o_VIS,
        AVG(o.AA1)  AS o_AA1,
        AVG(o.AA2)  AS o_AA2,
        MODE(o.AT1) AS o_AT1,
        MODE(o.AT2) AS o_AT2,
        MODE(o.AU1) AS o_AU1,
        MODE(o.AU2) AS o_AU2,
        MODE(o.AW1) AS o_AW1,
        MODE(o.AW2) AS o_AW2,
        MODE(o.GD1) AS o_GD1,
        MODE(o.GD2) AS o_GD2,
        AVG(o.OC1)  AS o_OC1
    FROM flights f
    LEFT JOIN weather o
      ON f.Origin = o.airport_code
     AND o.DATE BETWEEN f.Dep_DateTime - INTERVAL 6 HOUR AND f.Dep_DateTime + INTERVAL 6 HOUR
    GROUP BY f.__index_level_0__, f.Origin, f.Dep_DateTime
),
dest_weather AS (
    SELECT
        f.__index_level_0__,
        f.Dest,
        f.Arr_DateTime,
        AVG(d.WND)  AS d_WND,
        AVG(d.TMP)  AS d_TMP,
        AVG(d.DEW)  AS d_DEW,
        AVG(d.SLP)  AS d_SLP,
        AVG(d.CIG)  AS d_CIG,
        AVG(d.VIS)  AS d_VIS,
        AVG(d.AA1)  AS d_AA1,
        AVG(d.AA2)  AS d_AA2,
        MODE(d.AT1) AS d_AT1,
        MODE(d.AT2) AS d_AT2,
        MODE(d.AU1) AS d_AU1,
        MODE(d.AU2) AS d_AU2,
        MODE(d.AW1) AS d_AW1,
        MODE(d.AW2) AS d_AW2,
        MODE(d.GD1) AS d_GD1,
        MODE(d.GD2) AS d_GD2,
        AVG(d.OC1)  AS d_OC1
    FROM flights f
    LEFT JOIN weather d
      ON f.Dest = d.airport_code
     AND d.DATE BETWEEN f.Arr_DateTime - INTERVAL 6 HOUR AND f.Arr_DateTime + INTERVAL 6 HOUR
    GROUP BY f.__index_level_0__, f.Dest, f.Arr_DateTime
)

SELECT
    f.*,
    o.* EXCLUDE (__index_level_0__, Origin, Dep_DateTime),
    d.* EXCLUDE (__index_level_0__, Dest, Arr_DateTime)
FROM flights f
LEFT JOIN origin_weather o ON f.__index_level_0__ = o.__index_level_0__
LEFT JOIN dest_weather d   ON f.__index_level_0__ = d.__index_level_0__;
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [None]:
import os
from glob import glob
import shutil

In [None]:
weather_folder = "/content"
base_flight_file = "/content/2018_flights_cleaned_final_before_merging.parquet"
working_flight_file = "flights_weather_current.parquet"

# Start by copying base dataset as working file
if not os.path.exists(working_flight_file):
    shutil.copy(base_flight_file, working_flight_file)

weather_files = sorted(glob(f"{weather_folder}/*.csv"))

In [None]:
for i, wf in enumerate(weather_files, 1):
    airport_code = os.path.basename(wf).split('.')[0].upper()
    print(f"\nProcessing {airport_code} ({i}/{len(weather_files)})...")

    # Register the working flights parquet and current weather CSV
    con.execute(f"CREATE OR REPLACE TABLE flights AS SELECT * FROM '{working_flight_file}';")
    con.execute(f"CREATE OR REPLACE TABLE weather AS SELECT *, '{wf.split('.')[0].split('/')[2]}' as airport_code FROM '{wf}';")

    print("read")

    merged_df = con.execute(f"""
    SELECT
    f.*,

    -- ===== ORIGIN WEATHER (mean for numeric, mode for categorical) =====
    AVG(w.WND)  AS o_WND,
    AVG(w.CIG)  AS o_CIG,
    AVG(w.VIS)  AS o_VIS,
    AVG(w.TMP)  AS o_TMP,
    AVG(w.DEW)  AS o_DEW,
    AVG(w.SLP)  AS o_SLP,
    AVG(w.AA1)  AS o_AA1,
    AVG(w.AA2)  AS o_AA2,
    MODE(w.AT1) AS o_AT1,
    MODE(w.AT2) AS o_AT2,
    MODE(w.AU1) AS o_AU1,
    MODE(w.AU2) AS o_AU2,
    MODE(w.AW1) AS o_AW1,
    MODE(w.AW2) AS o_AW2,
    MODE(w.GD1) AS o_GD1,
    MODE(w.GD2) AS o_GD2,
    AVG(w.OC1)  AS o_OC1,

    -- ===== DESTINATION WEATHER =====
    AVG(wd.WND)  AS d_WND,
    AVG(wd.CIG)  AS d_CIG,
    AVG(wd.VIS)  AS d_VIS,
    AVG(wd.TMP)  AS d_TMP,
    AVG(wd.DEW)  AS d_DEW,
    AVG(wd.SLP)  AS d_SLP,
    AVG(wd.AA1)  AS d_AA1,
    AVG(wd.AA2)  AS d_AA2,
    MODE(wd.AT1) AS d_AT1,
    MODE(wd.AT2) AS d_AT2,
    MODE(wd.AU1) AS d_AU1,
    MODE(wd.AU2) AS d_AU2,
    MODE(wd.AW1) AS d_AW1,
    MODE(wd.AW2) AS d_AW2,
    MODE(wd.GD1) AS d_GD1,
    MODE(wd.GD2) AS d_GD2,
    AVG(wd.OC1)  AS d_OC1

FROM flights f

-- ===== ORIGIN JOIN =====
LEFT JOIN weather w
  ON f.Origin = w.airport_code
 AND w.DATE BETWEEN f.Dep_DateTime - INTERVAL 6 HOUR AND f.Dep_DateTime + INTERVAL 6 HOUR

-- ===== DESTINATION JOIN =====
LEFT JOIN weather wd
  ON f.Dest = wd.airport_code
 AND wd.DATE BETWEEN f.Arr_DateTime - INTERVAL 6 HOUR AND f.Arr_DateTime + INTERVAL 6 HOUR

GROUP BY ALL;
""").df()

    print("merged")
        # Save result to a temp parquet
    temp_out = f"flights_weather_{wf.split('.')[0].split('/')[2]}.parquet"
    merged_df.to_parquet(temp_out, index=False)

    # Delete old working file and rename new one
    os.remove(working_flight_file)
    os.rename(temp_out, working_flight_file)

    print(f"Updated working file with {airport_code} weather.")

print("\nðŸŽ‰ All weather files integrated!")
print(f"Final merged dataset â†’ {working_flight_file}")


Processing ABE (1/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

read


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

merged
Updated working file with ABE weather.

Processing ABI (2/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

read


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [None]:
import os
import duckdb
import pandas as pd
from glob import glob

# --- Setup paths ---
con = duckdb.connect()
weather_folder = "/content"
base_flight_file = "/content/2018_flights_cleaned_final_before_merging.parquet"

origin_dir = "/content/merged_batches_origin"
dest_dir = "/content/merged_batches_dest"
os.makedirs(origin_dir, exist_ok=True)
os.makedirs(dest_dir, exist_ok=True)

weather_files = sorted(glob(f"{weather_folder}/*.csv"))

# =======================================================
# PASS 1: ORIGIN WEATHER
# # =======================================================
# for i, wf in enumerate(weather_files, 1):
#     airport_code = os.path.splitext(os.path.basename(wf))[0].upper()
#     print(f"\n[ORIGIN] Processing {airport_code} ({i}/{len(weather_files)})...")

#     # Load only flights where this airport is the ORIGIN
#     flights_subset = con.execute(f"""
#         SELECT *
#         FROM '{base_flight_file}'
#         WHERE Origin = '{airport_code}'
#     """).df()

#     if flights_subset.empty:
#         print(f"No origin flights for {airport_code}, skipping.")
#         continue

#     # Register tables
#     con.register("flights", flights_subset)
#     con.execute(f"""
#         CREATE OR REPLACE TABLE weather AS
#         SELECT *, '{airport_code}' AS airport_code FROM '{wf}';
#     """)

#     # Join with Â±6h window on departure time
#     merged_df = con.execute("""
#         SELECT
#             f.*,
# AVG(CAST(w.WND AS DOUBLE))  AS o_WND,
# AVG(CAST(w.CIG AS DOUBLE))  AS o_CIG,
# AVG(CAST(w.VIS AS DOUBLE))  AS o_VIS,
# AVG(CAST(w.TMP AS DOUBLE))  AS o_TMP,
# AVG(CAST(w.DEW AS DOUBLE))  AS o_DEW,
# AVG(CAST(w.SLP AS DOUBLE))  AS o_SLP,
# AVG(CAST(w.AA1 AS DOUBLE))  AS o_AA1,
# AVG(CAST(w.AA2 AS DOUBLE))  AS o_AA2,
# MODE(w.AT1) AS o_AT1,
# MODE(w.AT2) AS o_AT2,
# MODE(w.AU1) AS o_AU1,
# MODE(w.AU2) AS o_AU2,
# MODE(w.AW1) AS o_AW1,
# MODE(w.AW2) AS o_AW2,
# MODE(w.GD1) AS o_GD1,
# MODE(w.GD2) AS o_GD2,
# AVG(CAST(w.OC1 AS DOUBLE))  AS o_OC1
#         FROM flights f
#         LEFT JOIN weather w
#           ON w.airport_code = f.Origin
#          AND w.DATE BETWEEN f.Dep_DateTime - INTERVAL 6 HOUR AND f.Dep_DateTime + INTERVAL 6 HOUR
#         GROUP BY ALL;
#     """).df()

#     merged_df.to_parquet(f"{origin_dir}/{airport_code}_origin.parquet", index=False)
#     print(f"âœ… Saved origin batch: {airport_code}_origin.parquet")

# =======================================================
# PASS 2: DESTINATION WEATHER
# =======================================================
for i, wf in enumerate(weather_files, 1):
    airport_code = os.path.splitext(os.path.basename(wf))[0].upper()
    print(f"\n[DEST] Processing {airport_code} ({i}/{len(weather_files)})...")

    # Load only flights where this airport is the DESTINATION
    flights_subset = con.execute(f"""
        SELECT *
        FROM '{base_flight_file}'
        WHERE Dest = '{airport_code}'
    """).df()

    if flights_subset.empty:
        print(f"No destination flights for {airport_code}, skipping.")
        continue

    # Register tables
    con.register("flights", flights_subset)
    con.execute(f"""
        CREATE OR REPLACE TABLE weather AS
        SELECT *, '{airport_code}' AS airport_code FROM '{wf}';
    """)

    # Join with Â±6h window on arrival time
    merged_df = con.execute("""
        SELECT
            f.*,
            AVG(CAST(wd.WND AS DOUBLE))  AS d_WND,
AVG(CAST(wd.CIG AS DOUBLE))  AS d_CIG,
AVG(CAST(wd.VIS AS DOUBLE))  AS d_VIS,
AVG(CAST(wd.TMP AS DOUBLE))  AS d_TMP,
AVG(CAST(wd.DEW AS DOUBLE))  AS d_DEW,
AVG(CAST(wd.SLP AS DOUBLE))  AS d_SLP,
AVG(CAST(wd.AA1 AS DOUBLE))  AS d_AA1,
AVG(CAST(wd.AA2 AS DOUBLE))  AS d_AA2,
MODE(wd.AT1) AS d_AT1,
MODE(wd.AT2) AS d_AT2,
MODE(wd.AU1) AS d_AU1,
MODE(wd.AU2) AS d_AU2,
MODE(wd.AW1) AS d_AW1,
MODE(wd.AW2) AS d_AW2,
MODE(wd.GD1) AS d_GD1,
MODE(wd.GD2) AS d_GD2,
AVG(CAST(wd.OC1 AS DOUBLE))  AS d_OC1
        FROM flights f
        LEFT JOIN weather wd
          ON wd.airport_code = f.Dest
         AND wd.DATE BETWEEN f.Arr_DateTime - INTERVAL 6 HOUR AND f.Arr_DateTime + INTERVAL 6 HOUR
        GROUP BY ALL;
    """).df()

    merged_df.to_parquet(f"{dest_dir}/{airport_code}_dest.parquet", index=False)
    print(f"âœ… Saved destination batch: {airport_code}_dest.parquet")

print("\nðŸŽ‰ All origin and destination weather merges complete!")



[DEST] Processing ABE (1/362)...
âœ… Saved destination batch: ABE_dest.parquet

[DEST] Processing ABI (2/362)...
âœ… Saved destination batch: ABI_dest.parquet

[DEST] Processing ABQ (3/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: ABQ_dest.parquet

[DEST] Processing ABR (4/362)...
âœ… Saved destination batch: ABR_dest.parquet

[DEST] Processing ABY (5/362)...
âœ… Saved destination batch: ABY_dest.parquet

[DEST] Processing ACK (6/362)...
âœ… Saved destination batch: ACK_dest.parquet

[DEST] Processing ACT (7/362)...
âœ… Saved destination batch: ACT_dest.parquet

[DEST] Processing ACV (8/362)...
âœ… Saved destination batch: ACV_dest.parquet

[DEST] Processing ACY (9/362)...
âœ… Saved destination batch: ACY_dest.parquet

[DEST] Processing ADK (10/362)...
âœ… Saved destination batch: ADK_dest.parquet

[DEST] Processing ADQ (11/362)...
âœ… Saved destination batch: ADQ_dest.parquet

[DEST] Processing AEX (12/362)...
âœ… Saved destination batch: AEX_dest.parquet

[DEST] Processing AGS (13/362)...
âœ… Saved destination batch: AGS_dest.parquet

[DEST] Processing AKN (14/362)...
âœ… Saved destination batch: AKN_dest.parquet

[DEST] Processing ALB (15/362)...
âœ… Saved destination batch: ALB_d

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: ANC_dest.parquet

[DEST] Processing APN (20/362)...
âœ… Saved destination batch: APN_dest.parquet

[DEST] Processing ART (21/362)...
âœ… Saved destination batch: ART_dest.parquet

[DEST] Processing ASE (22/362)...
âœ… Saved destination batch: ASE_dest.parquet

[DEST] Processing ATL (23/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: ATL_dest.parquet

[DEST] Processing ATW (24/362)...
âœ… Saved destination batch: ATW_dest.parquet

[DEST] Processing AUS (25/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: AUS_dest.parquet

[DEST] Processing AVL (26/362)...
âœ… Saved destination batch: AVL_dest.parquet

[DEST] Processing AVP (27/362)...
âœ… Saved destination batch: AVP_dest.parquet

[DEST] Processing AZA (28/362)...
âœ… Saved destination batch: AZA_dest.parquet

[DEST] Processing AZO (29/362)...
âœ… Saved destination batch: AZO_dest.parquet

[DEST] Processing BDL (30/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: BDL_dest.parquet

[DEST] Processing BET (31/362)...
âœ… Saved destination batch: BET_dest.parquet

[DEST] Processing BFF (32/362)...
âœ… Saved destination batch: BFF_dest.parquet

[DEST] Processing BFL (33/362)...
âœ… Saved destination batch: BFL_dest.parquet

[DEST] Processing BGM (34/362)...
âœ… Saved destination batch: BGM_dest.parquet

[DEST] Processing BGR (35/362)...
âœ… Saved destination batch: BGR_dest.parquet

[DEST] Processing BHM (36/362)...
âœ… Saved destination batch: BHM_dest.parquet

[DEST] Processing BIL (37/362)...
âœ… Saved destination batch: BIL_dest.parquet

[DEST] Processing BIS (38/362)...
âœ… Saved destination batch: BIS_dest.parquet

[DEST] Processing BJI (39/362)...
âœ… Saved destination batch: BJI_dest.parquet

[DEST] Processing BKG (40/362)...
âœ… Saved destination batch: BKG_dest.parquet

[DEST] Processing BLI (41/362)...
âœ… Saved destination batch: BLI_dest.parquet

[DEST] Processing BLV (42/362)...
âœ… Saved destination batch:

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: BNA_dest.parquet

[DEST] Processing BOI (45/362)...
âœ… Saved destination batch: BOI_dest.parquet

[DEST] Processing BOS (46/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: BOS_dest.parquet

[DEST] Processing BPT (47/362)...
âœ… Saved destination batch: BPT_dest.parquet

[DEST] Processing BQK (48/362)...
âœ… Saved destination batch: BQK_dest.parquet

[DEST] Processing BQN (49/362)...
âœ… Saved destination batch: BQN_dest.parquet

[DEST] Processing BRD (50/362)...
âœ… Saved destination batch: BRD_dest.parquet

[DEST] Processing BRO (51/362)...
âœ… Saved destination batch: BRO_dest.parquet

[DEST] Processing BRW (52/362)...
âœ… Saved destination batch: BRW_dest.parquet

[DEST] Processing BTM (53/362)...
âœ… Saved destination batch: BTM_dest.parquet

[DEST] Processing BTR (54/362)...
âœ… Saved destination batch: BTR_dest.parquet

[DEST] Processing BTV (55/362)...
âœ… Saved destination batch: BTV_dest.parquet

[DEST] Processing BUF (56/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: BUF_dest.parquet

[DEST] Processing BUR (57/362)...
âœ… Saved destination batch: BUR_dest.parquet

[DEST] Processing BWI (58/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: BWI_dest.parquet

[DEST] Processing BZN (59/362)...
âœ… Saved destination batch: BZN_dest.parquet

[DEST] Processing CAE (60/362)...
âœ… Saved destination batch: CAE_dest.parquet

[DEST] Processing CAK (61/362)...
âœ… Saved destination batch: CAK_dest.parquet

[DEST] Processing CDC (62/362)...
âœ… Saved destination batch: CDC_dest.parquet

[DEST] Processing CDV (63/362)...
âœ… Saved destination batch: CDV_dest.parquet

[DEST] Processing CGI (64/362)...
âœ… Saved destination batch: CGI_dest.parquet

[DEST] Processing CHA (65/362)...
âœ… Saved destination batch: CHA_dest.parquet

[DEST] Processing CHO (66/362)...
âœ… Saved destination batch: CHO_dest.parquet

[DEST] Processing CHS (67/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: CHS_dest.parquet

[DEST] Processing CID (68/362)...
âœ… Saved destination batch: CID_dest.parquet

[DEST] Processing CIU (69/362)...
âœ… Saved destination batch: CIU_dest.parquet

[DEST] Processing CKB (70/362)...
âœ… Saved destination batch: CKB_dest.parquet

[DEST] Processing CLE (71/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: CLE_dest.parquet

[DEST] Processing CLL (72/362)...
âœ… Saved destination batch: CLL_dest.parquet

[DEST] Processing CLT (73/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: CLT_dest.parquet

[DEST] Processing CMH (74/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: CMH_dest.parquet

[DEST] Processing CMI (75/362)...
âœ… Saved destination batch: CMI_dest.parquet

[DEST] Processing CMX (76/362)...
âœ… Saved destination batch: CMX_dest.parquet

[DEST] Processing CNY (77/362)...
âœ… Saved destination batch: CNY_dest.parquet

[DEST] Processing COD (78/362)...
âœ… Saved destination batch: COD_dest.parquet

[DEST] Processing COS (79/362)...
âœ… Saved destination batch: COS_dest.parquet

[DEST] Processing COU (80/362)...
âœ… Saved destination batch: COU_dest.parquet

[DEST] Processing CPR (81/362)...
âœ… Saved destination batch: CPR_dest.parquet

[DEST] Processing CRP (82/362)...
âœ… Saved destination batch: CRP_dest.parquet

[DEST] Processing CRW (83/362)...
âœ… Saved destination batch: CRW_dest.parquet

[DEST] Processing CSG (84/362)...
âœ… Saved destination batch: CSG_dest.parquet

[DEST] Processing CVG (85/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: CVG_dest.parquet

[DEST] Processing CWA (86/362)...
âœ… Saved destination batch: CWA_dest.parquet

[DEST] Processing CYS (87/362)...
âœ… Saved destination batch: CYS_dest.parquet

[DEST] Processing DAB (88/362)...
âœ… Saved destination batch: DAB_dest.parquet

[DEST] Processing DAL (89/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: DAL_dest.parquet

[DEST] Processing DAY (90/362)...
âœ… Saved destination batch: DAY_dest.parquet

[DEST] Processing DBQ (91/362)...
âœ… Saved destination batch: DBQ_dest.parquet

[DEST] Processing DCA (92/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: DCA_dest.parquet

[DEST] Processing DEN (93/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: DEN_dest.parquet

[DEST] Processing DFW (94/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: DFW_dest.parquet

[DEST] Processing DHN (95/362)...
âœ… Saved destination batch: DHN_dest.parquet

[DEST] Processing DIK (96/362)...
âœ… Saved destination batch: DIK_dest.parquet

[DEST] Processing DLG (97/362)...
âœ… Saved destination batch: DLG_dest.parquet

[DEST] Processing DLH (98/362)...
âœ… Saved destination batch: DLH_dest.parquet

[DEST] Processing DRO (99/362)...
âœ… Saved destination batch: DRO_dest.parquet

[DEST] Processing DRT (100/362)...
âœ… Saved destination batch: DRT_dest.parquet

[DEST] Processing DSM (101/362)...
âœ… Saved destination batch: DSM_dest.parquet

[DEST] Processing DTW (102/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: DTW_dest.parquet

[DEST] Processing DUT (103/362)...
âœ… Saved destination batch: DUT_dest.parquet

[DEST] Processing DVL (104/362)...
âœ… Saved destination batch: DVL_dest.parquet

[DEST] Processing EAR (105/362)...
âœ… Saved destination batch: EAR_dest.parquet

[DEST] Processing EAT (106/362)...
âœ… Saved destination batch: EAT_dest.parquet

[DEST] Processing EAU (107/362)...
âœ… Saved destination batch: EAU_dest.parquet

[DEST] Processing ECP (108/362)...
âœ… Saved destination batch: ECP_dest.parquet

[DEST] Processing EGE (109/362)...
âœ… Saved destination batch: EGE_dest.parquet

[DEST] Processing EKO (110/362)...
âœ… Saved destination batch: EKO_dest.parquet

[DEST] Processing ELM (111/362)...
âœ… Saved destination batch: ELM_dest.parquet

[DEST] Processing ELP (112/362)...
âœ… Saved destination batch: ELP_dest.parquet

[DEST] Processing ERI (113/362)...
âœ… Saved destination batch: ERI_dest.parquet

[DEST] Processing ESC (114/362)...
âœ… Saved destin

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: EWR_dest.parquet

[DEST] Processing EYW (119/362)...
âœ… Saved destination batch: EYW_dest.parquet

[DEST] Processing FAI (120/362)...
âœ… Saved destination batch: FAI_dest.parquet

[DEST] Processing FAR (121/362)...
âœ… Saved destination batch: FAR_dest.parquet

[DEST] Processing FAT (122/362)...
âœ… Saved destination batch: FAT_dest.parquet

[DEST] Processing FAY (123/362)...
âœ… Saved destination batch: FAY_dest.parquet

[DEST] Processing FCA (124/362)...
âœ… Saved destination batch: FCA_dest.parquet

[DEST] Processing FLG (125/362)...
âœ… Saved destination batch: FLG_dest.parquet

[DEST] Processing FLL (126/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: FLL_dest.parquet

[DEST] Processing FLO (127/362)...
âœ… Saved destination batch: FLO_dest.parquet

[DEST] Processing FNT (128/362)...
âœ… Saved destination batch: FNT_dest.parquet

[DEST] Processing FSD (129/362)...
âœ… Saved destination batch: FSD_dest.parquet

[DEST] Processing FSM (130/362)...
âœ… Saved destination batch: FSM_dest.parquet

[DEST] Processing FWA (131/362)...
âœ… Saved destination batch: FWA_dest.parquet

[DEST] Processing GCC (132/362)...
âœ… Saved destination batch: GCC_dest.parquet

[DEST] Processing GCK (133/362)...
âœ… Saved destination batch: GCK_dest.parquet

[DEST] Processing GEG (134/362)...
âœ… Saved destination batch: GEG_dest.parquet

[DEST] Processing GFK (135/362)...
âœ… Saved destination batch: GFK_dest.parquet

[DEST] Processing GGG (136/362)...
âœ… Saved destination batch: GGG_dest.parquet

[DEST] Processing GJT (137/362)...
âœ… Saved destination batch: GJT_dest.parquet

[DEST] Processing GNV (138/362)...
âœ… Saved destin

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: HNL_dest.parquet

[DEST] Processing HOB (156/362)...
âœ… Saved destination batch: HOB_dest.parquet

[DEST] Processing HOU (157/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: HOU_dest.parquet

[DEST] Processing HPN (158/362)...
âœ… Saved destination batch: HPN_dest.parquet

[DEST] Processing HRL (159/362)...
âœ… Saved destination batch: HRL_dest.parquet

[DEST] Processing HSV (160/362)...
âœ… Saved destination batch: HSV_dest.parquet

[DEST] Processing HTS (161/362)...
âœ… Saved destination batch: HTS_dest.parquet

[DEST] Processing HVN (162/362)...
âœ… Saved destination batch: HVN_dest.parquet

[DEST] Processing HYA (163/362)...
âœ… Saved destination batch: HYA_dest.parquet

[DEST] Processing HYS (164/362)...
âœ… Saved destination batch: HYS_dest.parquet

[DEST] Processing IAD (165/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: IAD_dest.parquet

[DEST] Processing IAG (166/362)...
âœ… Saved destination batch: IAG_dest.parquet

[DEST] Processing IAH (167/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: IAH_dest.parquet

[DEST] Processing ICT (168/362)...
âœ… Saved destination batch: ICT_dest.parquet

[DEST] Processing IDA (169/362)...
âœ… Saved destination batch: IDA_dest.parquet

[DEST] Processing ILM (170/362)...
âœ… Saved destination batch: ILM_dest.parquet

[DEST] Processing IMT (171/362)...
âœ… Saved destination batch: IMT_dest.parquet

[DEST] Processing IND (172/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: IND_dest.parquet

[DEST] Processing INL (173/362)...
âœ… Saved destination batch: INL_dest.parquet

[DEST] Processing IPT (174/362)...
âœ… Saved destination batch: IPT_dest.parquet

[DEST] Processing ISN (175/362)...
âœ… Saved destination batch: ISN_dest.parquet

[DEST] Processing ISP (176/362)...
âœ… Saved destination batch: ISP_dest.parquet

[DEST] Processing ITH (177/362)...
âœ… Saved destination batch: ITH_dest.parquet

[DEST] Processing ITO (178/362)...
âœ… Saved destination batch: ITO_dest.parquet

[DEST] Processing JAC (179/362)...
âœ… Saved destination batch: JAC_dest.parquet

[DEST] Processing JAN (180/362)...
âœ… Saved destination batch: JAN_dest.parquet

[DEST] Processing JAX (181/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: JAX_dest.parquet

[DEST] Processing JFK (182/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: JFK_dest.parquet

[DEST] Processing JHM (183/362)...
âœ… Saved destination batch: JHM_dest.parquet

[DEST] Processing JLN (184/362)...
âœ… Saved destination batch: JLN_dest.parquet

[DEST] Processing JMS (185/362)...
âœ… Saved destination batch: JMS_dest.parquet

[DEST] Processing JNU (186/362)...
âœ… Saved destination batch: JNU_dest.parquet

[DEST] Processing KOA (187/362)...
âœ… Saved destination batch: KOA_dest.parquet

[DEST] Processing KTN (188/362)...
âœ… Saved destination batch: KTN_dest.parquet

[DEST] Processing LAN (189/362)...
âœ… Saved destination batch: LAN_dest.parquet

[DEST] Processing LAR (190/362)...
âœ… Saved destination batch: LAR_dest.parquet

[DEST] Processing LAS (191/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: LAS_dest.parquet

[DEST] Processing LAW (192/362)...
âœ… Saved destination batch: LAW_dest.parquet

[DEST] Processing LAX (193/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: LAX_dest.parquet

[DEST] Processing LBB (194/362)...
âœ… Saved destination batch: LBB_dest.parquet

[DEST] Processing LBE (195/362)...
âœ… Saved destination batch: LBE_dest.parquet

[DEST] Processing LBF (196/362)...
âœ… Saved destination batch: LBF_dest.parquet

[DEST] Processing LBL (197/362)...
âœ… Saved destination batch: LBL_dest.parquet

[DEST] Processing LCH (198/362)...
âœ… Saved destination batch: LCH_dest.parquet

[DEST] Processing LCK (199/362)...
âœ… Saved destination batch: LCK_dest.parquet

[DEST] Processing LEX (200/362)...
âœ… Saved destination batch: LEX_dest.parquet

[DEST] Processing LFT (201/362)...
âœ… Saved destination batch: LFT_dest.parquet

[DEST] Processing LGA (202/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: LGA_dest.parquet

[DEST] Processing LGB (203/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: LGB_dest.parquet

[DEST] Processing LIH (204/362)...
âœ… Saved destination batch: LIH_dest.parquet

[DEST] Processing LIT (205/362)...
âœ… Saved destination batch: LIT_dest.parquet

[DEST] Processing LNK (206/362)...
âœ… Saved destination batch: LNK_dest.parquet

[DEST] Processing LNY (207/362)...
âœ… Saved destination batch: LNY_dest.parquet

[DEST] Processing LRD (208/362)...
âœ… Saved destination batch: LRD_dest.parquet

[DEST] Processing LSE (209/362)...
âœ… Saved destination batch: LSE_dest.parquet

[DEST] Processing LWB (210/362)...
âœ… Saved destination batch: LWB_dest.parquet

[DEST] Processing LWS (211/362)...
âœ… Saved destination batch: LWS_dest.parquet

[DEST] Processing LYH (212/362)...
âœ… Saved destination batch: LYH_dest.parquet

[DEST] Processing MAF (213/362)...
âœ… Saved destination batch: MAF_dest.parquet

[DEST] Processing MBS (214/362)...
âœ… Saved destination batch: MBS_dest.parquet

[DEST] Processing MCI (215/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MCI_dest.parquet

[DEST] Processing MCO (216/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MCO_dest.parquet

[DEST] Processing MDT (217/362)...
âœ… Saved destination batch: MDT_dest.parquet

[DEST] Processing MDW (218/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MDW_dest.parquet

[DEST] Processing MEI (219/362)...
âœ… Saved destination batch: MEI_dest.parquet

[DEST] Processing MEM (220/362)...
âœ… Saved destination batch: MEM_dest.parquet

[DEST] Processing MFE (221/362)...
âœ… Saved destination batch: MFE_dest.parquet

[DEST] Processing MFR (222/362)...
âœ… Saved destination batch: MFR_dest.parquet

[DEST] Processing MGM (223/362)...
âœ… Saved destination batch: MGM_dest.parquet

[DEST] Processing MHK (224/362)...
âœ… Saved destination batch: MHK_dest.parquet

[DEST] Processing MHT (225/362)...
âœ… Saved destination batch: MHT_dest.parquet

[DEST] Processing MIA (226/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MIA_dest.parquet

[DEST] Processing MKE (227/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MKE_dest.parquet

[DEST] Processing MKG (228/362)...
âœ… Saved destination batch: MKG_dest.parquet

[DEST] Processing MKK (229/362)...
âœ… Saved destination batch: MKK_dest.parquet

[DEST] Processing MLB (230/362)...
âœ… Saved destination batch: MLB_dest.parquet

[DEST] Processing MLI (231/362)...
âœ… Saved destination batch: MLI_dest.parquet

[DEST] Processing MLU (232/362)...
âœ… Saved destination batch: MLU_dest.parquet

[DEST] Processing MMH (233/362)...
âœ… Saved destination batch: MMH_dest.parquet

[DEST] Processing MOB (234/362)...
âœ… Saved destination batch: MOB_dest.parquet

[DEST] Processing MOT (235/362)...
âœ… Saved destination batch: MOT_dest.parquet

[DEST] Processing MQT (236/362)...
âœ… Saved destination batch: MQT_dest.parquet

[DEST] Processing MRY (237/362)...
âœ… Saved destination batch: MRY_dest.parquet

[DEST] Processing MSN (238/362)...
âœ… Saved destination batch: MSN_dest.parquet

[DEST] Processing MSO (239/362)...
âœ… Saved destin

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MSP_dest.parquet

[DEST] Processing MSY (241/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: MSY_dest.parquet

[DEST] Processing MTJ (242/362)...
âœ… Saved destination batch: MTJ_dest.parquet

[DEST] Processing MVY (243/362)...
âœ… Saved destination batch: MVY_dest.parquet

[DEST] Processing MYR (244/362)...
âœ… Saved destination batch: MYR_dest.parquet

[DEST] Processing OAJ (245/362)...
âœ… Saved destination batch: OAJ_dest.parquet

[DEST] Processing OAK (246/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: OAK_dest.parquet

[DEST] Processing OGD (247/362)...
âœ… Saved destination batch: OGD_dest.parquet

[DEST] Processing OGG (248/362)...
âœ… Saved destination batch: OGG_dest.parquet

[DEST] Processing OGS (249/362)...
âœ… Saved destination batch: OGS_dest.parquet

[DEST] Processing OKC (250/362)...
âœ… Saved destination batch: OKC_dest.parquet

[DEST] Processing OMA (251/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: OMA_dest.parquet

[DEST] Processing OME (252/362)...
âœ… Saved destination batch: OME_dest.parquet

[DEST] Processing ONT (253/362)...
âœ… Saved destination batch: ONT_dest.parquet

[DEST] Processing ORD (254/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: ORD_dest.parquet

[DEST] Processing ORF (255/362)...
âœ… Saved destination batch: ORF_dest.parquet

[DEST] Processing ORH (256/362)...
âœ… Saved destination batch: ORH_dest.parquet

[DEST] Processing OTH (257/362)...
âœ… Saved destination batch: OTH_dest.parquet

[DEST] Processing OTZ (258/362)...
âœ… Saved destination batch: OTZ_dest.parquet

[DEST] Processing OWB (259/362)...
âœ… Saved destination batch: OWB_dest.parquet

[DEST] Processing PAH (260/362)...
âœ… Saved destination batch: PAH_dest.parquet

[DEST] Processing PBG (261/362)...
âœ… Saved destination batch: PBG_dest.parquet

[DEST] Processing PBI (262/362)...
âœ… Saved destination batch: PBI_dest.parquet

[DEST] Processing PDX (263/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: PDX_dest.parquet

[DEST] Processing PGD (264/362)...
âœ… Saved destination batch: PGD_dest.parquet

[DEST] Processing PGV (265/362)...
âœ… Saved destination batch: PGV_dest.parquet

[DEST] Processing PHF (266/362)...
âœ… Saved destination batch: PHF_dest.parquet

[DEST] Processing PHL (267/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: PHL_dest.parquet

[DEST] Processing PHX (268/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: PHX_dest.parquet

[DEST] Processing PIA (269/362)...
âœ… Saved destination batch: PIA_dest.parquet

[DEST] Processing PIB (270/362)...
âœ… Saved destination batch: PIB_dest.parquet

[DEST] Processing PIE (271/362)...
âœ… Saved destination batch: PIE_dest.parquet

[DEST] Processing PIH (272/362)...
âœ… Saved destination batch: PIH_dest.parquet

[DEST] Processing PIT (273/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: PIT_dest.parquet

[DEST] Processing PLN (274/362)...
âœ… Saved destination batch: PLN_dest.parquet

[DEST] Processing PNS (275/362)...
âœ… Saved destination batch: PNS_dest.parquet

[DEST] Processing PQI (276/362)...
âœ… Saved destination batch: PQI_dest.parquet

[DEST] Processing PRC (277/362)...
âœ… Saved destination batch: PRC_dest.parquet

[DEST] Processing PSC (278/362)...
âœ… Saved destination batch: PSC_dest.parquet

[DEST] Processing PSG (279/362)...
âœ… Saved destination batch: PSG_dest.parquet

[DEST] Processing PSM (280/362)...
âœ… Saved destination batch: PSM_dest.parquet

[DEST] Processing PSP (281/362)...
âœ… Saved destination batch: PSP_dest.parquet

[DEST] Processing PUB (282/362)...
âœ… Saved destination batch: PUB_dest.parquet

[DEST] Processing PUW (283/362)...
âœ… Saved destination batch: PUW_dest.parquet

[DEST] Processing PVD (284/362)...
âœ… Saved destination batch: PVD_dest.parquet

[DEST] Processing PVU (285/362)...
âœ… Saved destin

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: RDU_dest.parquet

[DEST] Processing RFD (291/362)...
âœ… Saved destination batch: RFD_dest.parquet

[DEST] Processing RHI (292/362)...
âœ… Saved destination batch: RHI_dest.parquet

[DEST] Processing RIC (293/362)...
âœ… Saved destination batch: RIC_dest.parquet

[DEST] Processing RKS (294/362)...
âœ… Saved destination batch: RKS_dest.parquet

[DEST] Processing RNO (295/362)...
âœ… Saved destination batch: RNO_dest.parquet

[DEST] Processing ROA (296/362)...
âœ… Saved destination batch: ROA_dest.parquet

[DEST] Processing ROC (297/362)...
âœ… Saved destination batch: ROC_dest.parquet

[DEST] Processing ROW (298/362)...
âœ… Saved destination batch: ROW_dest.parquet

[DEST] Processing RST (299/362)...
âœ… Saved destination batch: RST_dest.parquet

[DEST] Processing RSW (300/362)...
âœ… Saved destination batch: RSW_dest.parquet

[DEST] Processing SAF (301/362)...
âœ… Saved destination batch: SAF_dest.parquet

[DEST] Processing SAN (302/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SAN_dest.parquet

[DEST] Processing SAT (303/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SAT_dest.parquet

[DEST] Processing SAV (304/362)...
âœ… Saved destination batch: SAV_dest.parquet

[DEST] Processing SBA (305/362)...
âœ… Saved destination batch: SBA_dest.parquet

[DEST] Processing SBN (306/362)...
âœ… Saved destination batch: SBN_dest.parquet

[DEST] Processing SBP (307/362)...
âœ… Saved destination batch: SBP_dest.parquet

[DEST] Processing SBY (308/362)...
âœ… Saved destination batch: SBY_dest.parquet

[DEST] Processing SCC (309/362)...
âœ… Saved destination batch: SCC_dest.parquet

[DEST] Processing SCE (310/362)...
âœ… Saved destination batch: SCE_dest.parquet

[DEST] Processing SCK (311/362)...
âœ… Saved destination batch: SCK_dest.parquet

[DEST] Processing SDF (312/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SDF_dest.parquet

[DEST] Processing SEA (313/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SEA_dest.parquet

[DEST] Processing SFB (314/362)...
âœ… Saved destination batch: SFB_dest.parquet

[DEST] Processing SFO (315/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SFO_dest.parquet

[DEST] Processing SGF (316/362)...
âœ… Saved destination batch: SGF_dest.parquet

[DEST] Processing SGU (317/362)...
âœ… Saved destination batch: SGU_dest.parquet

[DEST] Processing SHD (318/362)...
âœ… Saved destination batch: SHD_dest.parquet

[DEST] Processing SHV (319/362)...
âœ… Saved destination batch: SHV_dest.parquet

[DEST] Processing SIT (320/362)...
âœ… Saved destination batch: SIT_dest.parquet

[DEST] Processing SJC (321/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SJC_dest.parquet

[DEST] Processing SJT (322/362)...
âœ… Saved destination batch: SJT_dest.parquet

[DEST] Processing SLC (323/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SLC_dest.parquet

[DEST] Processing SLN (324/362)...
âœ… Saved destination batch: SLN_dest.parquet

[DEST] Processing SMF (325/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SMF_dest.parquet

[DEST] Processing SMX (326/362)...
âœ… Saved destination batch: SMX_dest.parquet

[DEST] Processing SNA (327/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SNA_dest.parquet

[DEST] Processing SPI (328/362)...
âœ… Saved destination batch: SPI_dest.parquet

[DEST] Processing SPS (329/362)...
âœ… Saved destination batch: SPS_dest.parquet

[DEST] Processing SRQ (330/362)...
âœ… Saved destination batch: SRQ_dest.parquet

[DEST] Processing STC (331/362)...
âœ… Saved destination batch: STC_dest.parquet

[DEST] Processing STL (332/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: STL_dest.parquet

[DEST] Processing STS (333/362)...
âœ… Saved destination batch: STS_dest.parquet

[DEST] Processing SUN (334/362)...
âœ… Saved destination batch: SUN_dest.parquet

[DEST] Processing SUX (335/362)...
âœ… Saved destination batch: SUX_dest.parquet

[DEST] Processing SWF (336/362)...
âœ… Saved destination batch: SWF_dest.parquet

[DEST] Processing SWO (337/362)...
âœ… Saved destination batch: SWO_dest.parquet

[DEST] Processing SYR (338/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: SYR_dest.parquet

[DEST] Processing TLH (339/362)...
âœ… Saved destination batch: TLH_dest.parquet

[DEST] Processing TOL (340/362)...
âœ… Saved destination batch: TOL_dest.parquet

[DEST] Processing TPA (341/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: TPA_dest.parquet

[DEST] Processing TRI (342/362)...
âœ… Saved destination batch: TRI_dest.parquet

[DEST] Processing TTN (343/362)...
âœ… Saved destination batch: TTN_dest.parquet

[DEST] Processing TUL (344/362)...


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Saved destination batch: TUL_dest.parquet

[DEST] Processing TUS (345/362)...
âœ… Saved destination batch: TUS_dest.parquet

[DEST] Processing TVC (346/362)...
âœ… Saved destination batch: TVC_dest.parquet

[DEST] Processing TWF (347/362)...
âœ… Saved destination batch: TWF_dest.parquet

[DEST] Processing TXK (348/362)...
âœ… Saved destination batch: TXK_dest.parquet

[DEST] Processing TYR (349/362)...
âœ… Saved destination batch: TYR_dest.parquet

[DEST] Processing TYS (350/362)...
âœ… Saved destination batch: TYS_dest.parquet

[DEST] Processing UIN (351/362)...
âœ… Saved destination batch: UIN_dest.parquet

[DEST] Processing USA (352/362)...
âœ… Saved destination batch: USA_dest.parquet

[DEST] Processing VEL (353/362)...
âœ… Saved destination batch: VEL_dest.parquet

[DEST] Processing VLD (354/362)...
âœ… Saved destination batch: VLD_dest.parquet

[DEST] Processing VPS (355/362)...
âœ… Saved destination batch: VPS_dest.parquet

[DEST] Processing WRG (356/362)...
âœ… Saved destin

In [None]:
import shutil
from google.colab import files

# Folder you want to download
folder_path = "/content/merged_batches_dest"

# Create a ZIP of the folder
shutil.make_archive("merged_batches_dest", 'zip', folder_path)

# Download it
files.download("merged_batches_dest.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import duckdb

# Connect to DuckDB
# con = duckdb.connect(database=':memory:')

# Read and merge all origin files into one big table
con.execute("""
    CREATE TABLE origin AS
    SELECT * FROM read_parquet('/content/origin/*.parquet');
""")

# Read and merge all destination files into another big table
con.execute("""
    CREATE TABLE dest AS
    SELECT * FROM read_parquet('/content/destination/*.parquet');
""")
print("read")

# Now join both on FlightID
merged_df = con.execute("""
    SELECT
        o.FlightID,
        o.*,
        d.d_WND, d.d_CIG, d.d_VIS, d.d_TMP, d.d_DEW, d.d_SLP,
        d.d_AA1, d.d_AA2, d.d_AT1, d.d_AT2, d.d_AU1, d.d_AU2,
        d.d_AW1, d.d_AW2, d.d_GD1, d.d_GD2, d.d_OC1
    FROM origin o
    LEFT JOIN dest d USING (FlightID);
""").df()

# Save the fully merged dataset
merged_df.to_parquet("/content/flights_2018_final_merged.parquet", index=False)

print("âœ… Final dataset saved â†’ flights_2018_final_merged.parquet")


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

read


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

âœ… Final dataset saved â†’ flights_2018_final_merged.parquet


In [None]:
len(merged_df)

5531151

In [None]:
merged_df.head(1000).to_csv("final_merged_1000_rows.csv")

In [None]:
df = merged_df.sort_values(by="FlightID", ascending=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,FlightID,Origin,Dest,CRSDepTime,CRSArrTime,Distance,Month,DayofMonth,DayOfWeek,IATA_Code_Marketing_Airline,...,d_AA2,d_AT1,d_AT2,d_AU1,d_AU2,d_AW1,d_AW2,d_GD1,d_GD2,d_OC1
0,0,ABY,ATL,1202,1304,145.0,1,23,2,DL,...,21.10,16.0,13.0,,,,,2.0,2.0,10.625
1,1,ABY,ATL,1202,1304,145.0,1,24,3,DL,...,,,,,,,,1.0,,9.800
2,2,ABY,ATL,1202,1304,145.0,1,25,4,DL,...,,,,,,,,1.0,1.0,
3,3,ABY,ATL,1202,1304,145.0,1,26,5,DL,...,,,,,,,,1.0,3.0,
4,4,ABY,ATL,1400,1500,145.0,1,27,6,DL,...,0.25,16.0,13.0,1.0,,61.0,,1.0,4.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5531146,5531146,SCE,IAD,1445,1546,133.0,9,11,2,UA,...,2.55,16.0,14.0,0.0,0.0,10.0,51.0,4.0,4.0,
5531147,5531147,IAD,GSO,1235,1355,239.0,9,11,2,UA,...,,16.0,14.0,,,,,3.0,4.0,
5531148,5531148,EVV,ORD,1030,1204,272.0,9,11,2,UA,...,,13.0,,0.0,,10.0,,0.0,,
5531149,5531149,ORD,HPN,1410,1726,738.0,9,11,2,UA,...,0.20,14.0,1.0,0.0,0.0,10.0,51.0,5.0,3.0,


In [None]:
merged_df.columns

Index(['FlightID', 'Origin', 'Dest', 'CRSDepTime', 'CRSArrTime', 'Distance',
       'Month', 'DayofMonth', 'DayOfWeek', 'IATA_Code_Marketing_Airline',
       'Tail_Number', 'Dep_DateTime', 'Arr_DateTime', 'FlightID_1',
       '__index_level_0__', 'o_WND', 'o_CIG', 'o_VIS', 'o_TMP', 'o_DEW',
       'o_SLP', 'o_AA1', 'o_AA2', 'o_AT1', 'o_AT2', 'o_AU1', 'o_AU2', 'o_AW1',
       'o_AW2', 'o_GD1', 'o_GD2', 'o_OC1', 'd_WND', 'd_CIG', 'd_VIS', 'd_TMP',
       'd_DEW', 'd_SLP', 'd_AA1', 'd_AA2', 'd_AT1', 'd_AT2', 'd_AU1', 'd_AU2',
       'd_AW1', 'd_AW2', 'd_GD1', 'd_GD2', 'd_OC1'],
      dtype='object')

In [None]:
del(merged_df)

In [None]:
import pandas as pd
import numpy as np


In [None]:
Y = pd.read_parquet("/content/target_variables.parquet")

In [None]:
len(Y)

5531151

In [None]:
len(df)

5531151

In [None]:
Y.columns

Index(['Cancelled', 'DepDelay', 'ArrDelay'], dtype='object')

In [None]:
y_dep = Y["DepDelay"].copy()
y_arr = Y["ArrDelay"].copy()
y_cancel = Y["Cancelled"].copy()

In [None]:
del(Y)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5531151 entries, 0 to 5531150
Data columns (total 49 columns):
 #   Column                       Dtype         
---  ------                       -----         
 0   FlightID                     int64         
 1   Origin                       object        
 2   Dest                         object        
 3   CRSDepTime                   int64         
 4   CRSArrTime                   int64         
 5   Distance                     float64       
 6   Month                        int64         
 7   DayofMonth                   int64         
 8   DayOfWeek                    int64         
 9   IATA_Code_Marketing_Airline  object        
 10  Tail_Number                  object        
 11  Dep_DateTime                 datetime64[ns]
 12  Arr_DateTime                 datetime64[ns]
 13  FlightID_1                   int64         
 14  __index_level_0__            int64         
 15  o_WND                        float64       
 16  

In [None]:
df = pd.read_parquet("/content/flights_2018_final_merged.parquet")

In [None]:
cat_cols = [
    "IATA_Code_Marketing_Airline",
    "Origin",
    "Dest",
    "Tail_Number",
    # Weather categorical columns
    "o_AT1", "o_AT2", "o_AU1", "o_AU2", "o_AW1", "o_AW2", "o_GD1", "o_GD2",
    "d_AT1", "d_AT2", "d_AU1", "d_AU2", "d_AW1", "d_AW2", "d_GD1", "d_GD2"
]

# -----------------------------
# 2. Convert all categorical columns to string dtype
# -----------------------------
for col in cat_cols:
    if col in df.columns:
        df[col] = df[col].astype("string")

# --------------------------

# Optionally save back
df.to_parquet("/content/flights_2018_final_encoded.parquet", index=False)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5531151 entries, 0 to 5531150
Data columns (total 49 columns):
 #   Column                       Dtype         
---  ------                       -----         
 0   FlightID                     int64         
 1   Origin                       string        
 2   Dest                         string        
 3   CRSDepTime                   int64         
 4   CRSArrTime                   int64         
 5   Distance                     float64       
 6   Month                        int64         
 7   DayofMonth                   int64         
 8   DayOfWeek                    int64         
 9   IATA_Code_Marketing_Airline  string        
 10  Tail_Number                  string        
 11  Dep_DateTime                 datetime64[ns]
 12  Arr_DateTime                 datetime64[ns]
 13  FlightID_1                   int64         
 14  __index_level_0__            int64         
 15  o_WND                        float64       
 16  