# Data Analysis

In [None]:
#creating the final dataset with all the months and assigning a flight index to each flight within the final dataset (landing index is assigned to each flight within each month)
import pandas as pd
import glob

file_paths = sorted(glob.glob("*.csv")) 
all_dfs = []
next_index = 0

for path in file_paths:
    df = pd.read_csv(path, dtype={"ICAO": str})
    unique_landings = sorted(df["Landing_Index"].unique())
    mapping = {old: new for new, old in enumerate(unique_landings, start=next_index + 1)} 
    df["Flight_Index"] = df["Landing_Index"].map(mapping)
    next_index += len(unique_landings)
    all_dfs.append(df)

full_df = pd.concat(all_dfs, ignore_index=True)
full_df.to_csv("all_months.csv", index=False)

In [None]:
#checking the number of normal landings and go-arounds in the dataset
collumns = ["Flight_Index", "Go_Around_Label"]
df = pd.read_csv("all_months.csv", usecols=collumns)

total_flights = df["Flight_Index"].nunique()
go_around_count = df.groupby("Flight_Index")["Go_Around_Label"].first().value_counts()
fraction = (go_around_count.get(1, 0) / total_flights) * 100

print("Number of unique landings:", total_flights)
print("Number of normal landings (0):", go_around_count.get(0, 0))
print("Number of go-arounds (1):", go_around_count.get(1, 0))
print("Go-around %:", fraction)

In [None]:
#checking the max, min, mean and std of the dataset features
features = ["Latitude", "Longitude", "Speed", "Vertical Rate", "Heading", "Correct Altitude", "Glideslope", "Deviation", "Energy", "Wind_Speed_M", "Wind_Direction_M", "Visibility_M", "G_M", "TS_M", "FG_M", "RA_M", "WS_M", "Wind_Dir_Change", "Wind_Dir_Change_M", "Wind_Dir_Sector_M", "Wind_CompU_E", "Wind_CompV_E", "Wind_Gust_E", "Wind_Shear_E", "CBHA_E", "Visibility_E", "C_1000_E", "C_500_E", "LOS", "Alt_Diff_lt", "Speed_Diff_lt","Alt_Diff_tl", "Speed_Diff_tl", "GA_before", "GA_lasttime", "GA_hourly"]

n_features = len(features)  
model_df = full_df

for feature in features:
    if feature in model_df.columns:
        values = model_df[feature].dropna() 
        mean = values.mean()
        std = values.std()
        min_val = values.min()
        max_val = values.max()
        print(f"{feature}: mean={mean:.2f}, std={std:.2f}, min={min_val:.2f}, max={max_val:.2f}")
    else:
        print(f"{feature}: not found in model_df")

#### ----------------------------------------------------------

In [None]:
#go-arounds per day
import pandas as pd

full_df["Date"] = pd.to_datetime(full_df["Date"], errors="coerce")
flight_info = (full_df.groupby("Flight_Index").agg({"Date": "first", "Go_Around_Label": "max"}).reset_index())
ga_per_day = (flight_info.groupby(flight_info["Date"].dt.date)["Go_Around_Label"].sum().reset_index())
ga_per_day.columns = ["Date", "Go_Around_Count"]
ga_per_day.to_csv("go_around_per_day.csv", index=False)

In [None]:
#plot: go-arounds per day
import matplotlib.pyplot as plt

counts = ga_per_day["Go_Around_Count"].value_counts().sort_index()

plt.figure(figsize=(8,5))
plt.bar(counts.index, counts.values, color="yellowgreen", edgecolor="black")

plt.xlabel("Go-arounds per day", fontsize=16)
plt.ylabel("Number of occurrences", fontsize=16)

for x, y in zip(counts.index, counts.values):
    plt.text(x, y + 0.2, str(y), ha="center", va="bottom", fontsize=10)

plt.xticks(range(counts.index.min(), counts.index.max()+1, 1), fontsize=16)
plt.yticks(fontsize=16)
plt.tight_layout()
plt.show()

max_ga = ga_per_day["Go_Around_Count"].max()
max_day = ga_per_day[ga_per_day["Go_Around_Count"] == max_ga]

print(f"Maximum go-arounds in a day: {max_ga}")
print("Days with that many go-arounds:")
print(max_day)

#### ----------------------------------------------------------

In [None]:
#mean points per flight and mean samples per minute
import pandas as pd
flight_df = pd.read_csv("all_months.csv", dtype={"ICAO": str})

points_per_flight = flight_df.groupby("Flight_Index").size()

min_points = points_per_flight.min()
max_points = points_per_flight.max()
mean_points = points_per_flight.mean()

print("Min points per flight:", min_points)
print("Max points per flight:", max_points)
print("Mean points per flight:", mean_points)

samples_per_minute = []

for _, flight in flight_df.groupby("Flight_Index"):
    duration_sec = flight["Time_Sec"].max() - flight["Time_Sec"].min()
    n_points = len(flight)
    if duration_sec > 0:
        rate = n_points / (duration_sec / 60) 
        samples_per_minute.append(rate)

mean_samples_per_min = sum(samples_per_minute) / len(samples_per_minute)
print("Mean samples per minute:", mean_samples_per_min)

In [None]:
#correlation matrix
import seaborn as sns
correlation_matrix = flight_df.corr(numeric_only=True)
print(correlation_matrix)

correlation_ga = correlation_matrix["Go_Around_Label"].sort_values(ascending=False)
print(correlation_ga)

#### ----------------------------------------------------------

In [None]:
#plot: distance go-arounds start climbing 
from collections import Counter
import matplotlib.pyplot as plt

ga_df = full_df[full_df["Go_Around_Label"] == 1].copy()
ga_start = []

for landing_index, data in ga_df.groupby("Flight_Index"):
    data = data.sort_values("Time_Sec")
    distance = data["Distance_NM"].iloc[-1]
    if distance is not None:
        ga_start.append(distance)

plt.figure(figsize=(12,5))
plt.hist(ga_start, bins=40, color="orange", edgecolor="black") #orange, royalblue
plt.xlabel("Distance from runway (NM)", fontsize=16)
plt.ylabel("Number of go-arounds", fontsize=16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.show()

#### ----------------------------------------------------------

In [None]:
#go-arounds per runway
import pandas as pd
import numpy as np

results = []

for landing_index, data in full_df.groupby("Flight_Index"):
    data = data.sort_values("Time_Sec").reset_index(drop=True)

    if len(data) < 5:
        continue

    heading_start = data["Heading"].iloc[0]
    heading_end = data["Heading"].iloc[-1]

    runway_02_heading = 22.72
    runway_20_heading = 202.73

    if abs(heading_start - runway_02_heading) < abs(heading_start - runway_20_heading):
        initial = "02"
    else:
        initial = "20"

    ga_label = data["Go_Around_Label"].iloc[0]

    results.append({
        "Flight_Index": landing_index,
        "Initial_Approach": initial,
        "Go_Arounds": ga_label
    })

summary_df = pd.DataFrame(results)

runway_df = summary_df.groupby(["Initial_Approach", "Go_Arounds"]).size().unstack(fill_value=0)
runway_df = runway_df.rename(columns={0: "Normal_Landings", 1: "Go-Arounds"})
runway_df["Total Flights"] = runway_df["Normal_Landings"] + runway_df["Go-Arounds"]
runway_df["Go-Arounds Rate (%)"] = (runway_df["Go-Arounds"] / runway_df["Total Flights"]) * 100
runway_df = runway_df.reset_index()

total_row = {
    "Initial_Approach": "Total",
    "Normal_Landings": runway_df["Normal_Landings"].sum(),
    "Go-Arounds": runway_df["Go-Arounds"].sum(),
    "Total Flights": runway_df["Total Flights"].sum(),
    "Go-Arounds Rate (%)": (runway_df["Go-Arounds"].sum() / runway_df["Total Flights"].sum()) * 100
}

final_table = pd.concat([runway_df, pd.DataFrame([total_row])], ignore_index=True)
final_table["Go-Arounds Rate (%)"] = final_table["Go-Arounds Rate (%)"].map(lambda x: f"{x:.3f}%")
final_table = final_table.rename(columns={"Initial_Approach": "Runway"})

print(final_table[["Runway", "Total Flights", "Go-Arounds", "Go-Arounds Rate (%)"]].to_string(index=False))

#### ----------------------------------------------------------

In [None]:
#go-arounds per month
import pandas as pd

full_df["Date"] = pd.to_datetime(full_df["Date"])
full_df["Month"] = full_df["Date"].dt.strftime("%b %Y")

flights_df = full_df.groupby("Flight_Index").first().reset_index()

#** unpack the dict into keyword arguments
months_df = flights_df.groupby("Month")["Go_Around_Label"].agg(
  **{
        "Go-Arounds": lambda x: (x == 1).sum(),
        "Total Flights": "count"
    }
).reset_index()

months_df["Go-Arounds Rate (%)"] = (months_df["Go-Arounds"] / months_df["Total Flights"] * 100).map(lambda x: f"{x:.3f}%")

months_order = pd.to_datetime(months_df["Month"], format="%b %Y").sort_values()
months_df["Month"] = pd.Categorical(
    months_df["Month"],
    categories=[d.strftime("%b %Y") for d in months_order],
    ordered=True
)
months_df = months_df.sort_values("Month")

print(months_df[["Month", "Total Flights", "Go-Arounds", "Go-Arounds Rate (%)"]].to_string(index=False))

In [None]:
#plot: go-arounds per month
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.bar(months_df["Month"], months_df["Go-Arounds"], color="royalblue",edgecolor="black")

mean_val = months_df["Go-Arounds"].mean()
plt.axhline(mean_val, color="red", linestyle="--", linewidth=1.5, label=f"Mean = {mean_val:.1f}")

plt.xlabel("Month", fontsize=16)
plt.ylabel("Number of go-arounds", fontsize=16)

plt.xticks(rotation=25, ha="right")
for i, val in enumerate(months_df["Go-Arounds"]):
    plt.text(i, val + 0.2, str(val), ha='center', va='bottom', fontsize=10)
plt.yticks(fontsize=16)

plt.tight_layout()
plt.show()

#### ----------------------------------------------------------

In [None]:
#go-arounds per airline
import pandas as pd

full_df["Airline"] = full_df["Callsign"].str.extract(r"^([A-Z]{3})")
flights_df = full_df.groupby("Airline")["Flight_Index"].nunique()

ga_df = (full_df[full_df["Go_Around_Label"] == 1].groupby("Airline")["Flight_Index"].nunique())

airline_df = pd.DataFrame({
    "Total Flights": flights_df,
    "Go-Arounds": ga_df
}).fillna(0)

airline_df["Go-Arounds Rate (%)"] = (airline_df["Go-Arounds"] / airline_df["Total Flights"] * 100).round(3)

airline_df = airline_df.reset_index()
airline_df = airline_df.sort_values("Go-Arounds", ascending=False)
print(airline_df.to_string(index=False))

#### ----------------------------------------------------------

In [None]:
#plots: glideslope, deviation and energy
import matplotlib.pyplot as plt
import seaborn as sns

full_df["Gate"] = (full_df["Distance_NM"] * 2).round() / 2
gates_df = full_df[(full_df["Gate"] <= 7) & (full_df["Gate"] >= 1)]

ylim_dict = {
    "Glideslope": (2, 4), 
    "Deviation": (-10, 75),
    "Energy": (250, 1500)
}

colors = {1: "orange", 0: "royalblue"}
labels = {1: "Go-around", 0: "Normal Landing"}

for var, ylabel in zip(["Glideslope", "Deviation", "Energy"],["Glideslope", "Deviation (m)", "Energy"]):
    plt.figure(figsize=(12, 6))
    ax = sns.boxplot(data=gates_df, x="Gate", y=var, hue="Go_Around_Label", palette=colors, showfliers=False, dodge=True)
    plt.xlabel("Distance to the runway (NM)", fontsize=15)
    plt.ylabel(ylabel, fontsize = 15)
    plt.gca().invert_xaxis()
    if var in ylim_dict:
        plt.ylim(ylim_dict[var])
    plt.grid(True)
    handles, _ = ax.get_legend_handles_labels()
    plt.xticks(fontsize=15, rotation=0)
    plt.yticks(fontsize=15)
    plt.legend(handles, ["Normal", "Go-around"], title="Landing Type", title_fontsize=14, fontsize=14)
    plt.tight_layout()
    plt.show()

#### ----------------------------------------------------------

In [None]:
#plots: in trail relationship features
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 3, 1)
bin_labels = [f"{round(bin_limits[i], 2)}" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["Lead_Aircraft"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="tomato", edgecolor="black", zorder=2)
plt.xlabel("Lead Aircraft", fontsize=20)
plt.ylabel("Frequency of go-arounds (%)", fontsize=15)
plt.xticks(fontsize=16, rotation=0)
plt.yticks(fontsize=16)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder = 1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 5, 1) 
bin_labels = [f"{round(bin_limits[i], 2)}" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["LOS"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="tomato", edgecolor="black", zorder=2)
plt.xlabel("Loss of Separation", fontsize=20)
plt.ylabel("Frequency of go-arounds (%)", fontsize=15)
plt.xticks(fontsize=16, rotation=0)
plt.yticks(fontsize=16)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder = 1)
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns

clean_df = flight_df[["LOS", "Alt_Diff_lt"]].replace([np.inf, -np.inf], np.nan).dropna()
bin_los = np.arange(0, clean_df["LOS"].max() + 0.5, 0.5)
clean_df["LOS_Bin"] = pd.cut(clean_df["LOS"], bins=bin_los, right=False)

plt.figure(figsize=(8, 5))
ax = sns.violinplot(
    x="LOS_Bin", 
    y="Alt_Diff_lt", 
    data=clean_df,
    inner="box",
    color="royalblue",
    density_norm="width" 
)
plt.xlabel("Loss of Separation (NM)", fontsize=15)
plt.ylabel("Altitude Difference (m)", fontsize=15)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12)
plt.tight_layout()
ax.set_axisbelow(True)
ax.grid(True, zorder=0)
plt.show()

In [None]:
import seaborn as sns

clean_df = flight_df[["LOS", "Speed_Diff_lt"]].replace([np.inf, -np.inf], np.nan).dropna()
bin_los = np.arange(0, clean_df["LOS"].max() + 0.5, 0.5)
clean_df["LOS_Bin"] = pd.cut(clean_df["LOS"], bins=bin_los, right=False)

plt.figure(figsize=(8, 5))
ax = sns.violinplot(
    x="LOS_Bin", 
    y="Speed_Diff_lt", 
    data=clean_df,
    inner="box",
    color="slateblue",
    density_norm="width" 
)
plt.xlabel("Loss of Separation (NM)", fontsize=15)
plt.ylabel("Speed Difference (m/s)", fontsize=15)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12)
plt.tight_layout()
ax.set_axisbelow(True)
ax.grid(True, zorder=0)
plt.show()

#### ----------------------------------------------------------

In [None]:
#plots: go around clustering effects features
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 3, 1) 
bin_labels = [f"{round(bin_limits[i], 2)}" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["GA_before"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="orange", edgecolor="black", zorder=2)
plt.xlabel("Go-around before", fontsize=20)
plt.ylabel("Frequency of go-arounds (%)" , fontsize=15)
plt.xticks(fontsize=16, rotation=0)
plt.yticks(fontsize=16)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 5, 1) 
bin_labels = [f"{round(bin_limits[i], 2)}" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["GA_hourly"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="orange", edgecolor="black", zorder=2)
plt.xlabel("Go-around hourly", fontsize=20)
plt.ylabel("Frequency of go-arounds (%)" , fontsize=15)
plt.xticks(fontsize=16, rotation=0)
plt.yticks(fontsize=16)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 1500, 200) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["GA_lasttime"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="orange", edgecolor="black", zorder=2)
plt.xlabel("Closest go-around (min)", fontsize=20)
plt.ylabel("Frequency of go-arounds (%)", fontsize=15)
plt.xticks(rotation=0)
plt.yticks(fontsize=16)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

#### ----------------------------------------------------------

In [None]:
#plots: metar weather features 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 16, 2) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Wind_Speed_M"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="royalblue", edgecolor="black", zorder=2)
plt.xlabel("Wind Speed (m/s)", fontsize = 15)
plt.ylabel("Frequency of go-arounds (%)" , fontsize = 15)
plt.xticks(rotation=0)
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(4, 5))
sns.boxplot(
    data=flights_df,
    x="Go_Around_Label",
    y="Wind_Speed_M",
    hue="Go_Around_Label",   
    palette=["royalblue", "orange"],
    legend=False,
    showfliers=False
)

ylim_dict = {"Wind_Speed_M": (-2, 14)}
plt.ylim(ylim_dict["Wind_Speed_M"])
plt.grid(True)
plt.xticks([0, 1], ["Normal landing", "Go-around"], fontsize = 15)
plt.xlabel("")
plt.ylabel("Wind Speed (m/s)", fontsize = 15)
plt.grid(True, axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 11000, 2000) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Visibility_M"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="royalblue", edgecolor="black", zorder=2)
plt.xlabel("Visibility (m)", fontsize = 15)
plt.ylabel("Frequency of go-arounds (%)" , fontsize = 15)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True,zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 3, 1) 
bin_labels = [f"{round(bin_limits[i], 2)}" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["G_M"], bins=bin_limits, labels=bin_labels, right=False) #change to WS_M, RA_M, TS_m, FG_M, Wind_Dir_Sector_M, Wind_Dir_Change_M

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="royalblue", edgecolor="black", zorder = 2)
plt.xlabel("Gust", fontsize=20) #change to Wind Shear, Rain, Thunderstorm, Fog, Wind Direction Sector, Wind Direction Change
plt.ylabel("Frequency of go-arounds (%)", fontsize=20)
plt.xticks(fontsize=20, rotation=0)
plt.yticks(fontsize=20) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder = 1)
plt.tight_layout()
plt.show()

#### ----------------------------------------------------------

In [None]:
#plots: ecmwf weather features
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 0.35 + 0.001, 0.05)
bin_labels = [f"[{bin_limits[i]:.2f}, {bin_limits[i+1]:.2f})" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["C_500_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Turbulence at 500 m", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 0.3 + 0.001, 0.05)
bin_labels = [f"[{bin_limits[i]:.2f}, {bin_limits[i+1]:.2f})" for i in range(len(bin_limits)-1)]

flights_df["Bin"] = pd.cut(flights_df["C_1000_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Turbulence at 1000 m", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 80, 10) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Visibility_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Visibility (m)", fontsize = 15)
plt.ylabel("Frequency of go-arounds (%)", fontsize = 15)
plt.xticks(fontsize=12, rotation=0)
plt.yticks(fontsize=12) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 13, 1) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["CBHA_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Cloud base height (m)", fontsize = 15)
plt.ylabel("Frequency of go-arounds (%)" , fontsize = 15)
plt.xticks(fontsize=10, rotation=0)
plt.yticks(fontsize=12) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(-4, 19, 2) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Wind_Shear_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Wind Shear (m/s)", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(4, 5))
sns.boxplot(
    data=flights_df,
    x="Go_Around_Label",
    y="Wind_Shear_E",
    hue="Go_Around_Label",   
    palette=["royalblue", "orange"],
    legend=False,
    showfliers=False
)

ylim_dict = {"Wind_Shear_E": (-10, 20)}
plt.ylim(ylim_dict["Wind_Shear_E"])
plt.grid(True)
plt.xticks([0, 1], ["Normal landing", "Go-around"], fontsize=15)
plt.xlabel("")
plt.ylabel("Wind Shear (m/s)", fontsize=15)
plt.grid(True, axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(0, 30, 5) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Wind_Gust_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Wind Gust (m/s)", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(4, 5))
sns.boxplot(
    data=flights_df,
    x="Go_Around_Label",
    y="Wind_Gust_E",
    hue="Go_Around_Label",   
    palette=["royalblue", "orange"],
    legend=False,
    showfliers=False
)

ylim_dict = {"Wind_Gust_E": (-5, 25)}
plt.ylim(ylim_dict["Wind_Gust_E"])
plt.grid(True)
plt.xticks([0, 1], ["Normal landing", "Go-around"], fontsizer=15)
plt.xlabel("")
plt.ylabel("Wind Gust (m/s)", fontsize=15)
plt.grid(True, axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(-10, 12, 2) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Wind_CompV_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Wind Component V (m/s)", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(4, 5))
sns.boxplot(
    data=flights_df,
    x="Go_Around_Label",
    y="Wind_CompV_E",
    hue="Go_Around_Label",   
    palette=["royalblue", "orange"],
    legend=False,
    showfliers=False
)

ylim_dict = {"Wind_CompV_E": (-10, 10)}
plt.ylim(ylim_dict["Wind_CompV_E"])
plt.grid(True)
plt.xticks([0, 1], ["Normal landing", "Go-around"], fontsize=15)
plt.xlabel("")
plt.ylabel("Wind Component V (m/s)", fontsize=15)
plt.grid(True, axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

flights_df = full_df.groupby("Flight_Index").first().reset_index()

bin_limits = np.arange(-8, 14, 2) 
bin_labels = [f"[{low}, {high})" for low, high in zip(bin_limits[:-1], bin_limits[1:])]

flights_df["Bin"] = pd.cut(flights_df["Wind_CompU_E"], bins=bin_limits, labels=bin_labels, right=False)

bin_totals = flights_df["Bin"].value_counts().sort_index()
bin_gas = flights_df[flights_df["Go_Around_Label"] == 1]["Bin"].value_counts().sort_index()
ga_freq = ((bin_gas / bin_totals).fillna(0)) * 100

plt.figure(figsize=(8, 5))
ga_freq.plot(kind="bar", color="yellowgreen", edgecolor="black", zorder=2)
plt.xlabel("Wind Component U (m/s)", fontsize=16)
plt.ylabel("Frequency of go-arounds (%)", fontsize=16)
plt.xticks(rotation=0)
plt.yticks(fontsize=16) 
plt.ylim(0, ga_freq.max() + 1)
plt.grid(True, zorder=1)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(4, 5))
sns.boxplot(
    data=flights_df,
    x="Go_Around_Label",
    y="Wind_CompU_E",
    hue="Go_Around_Label",   
    palette=["royalblue", "orange"],
    legend=False,
    #showfliers=False
)

ylim_dict = {"Wind_CompU_E": (-6, 10)}
plt.ylim(ylim_dict["Wind_CompU_E"])
plt.grid(True)
plt.xticks([0, 1], ["Normal landing", "Go-around"], fontsize=15)
plt.xlabel("")
plt.ylabel("Wind Component U (m/s)", fontsize=15)
plt.grid(True, axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

#### ----------------------------------------------------------