# Import of required Libraries

In [34]:
import pandas as pd
import time
import numpy as np
import matplotlib.pyplot as plt
import shap
import plotly.graph_objs as go
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
from sklearn.inspection import permutation_importance
from sklearn.model_selection import KFold
from scipy.stats import norm
from math import sqrt
from xgboost import XGBRegressor
from xgboost import plot_importance

## Import of Table

In [35]:
table = pd.read_csv('test.csv', index_col=0)

### Creating a Copy of table

In [36]:
data = table.copy()

### Dropping unneceressary columns

In [37]:
data.drop('Flight (Callsign)', axis=1, inplace=True)
data.drop('ICAO Aircraft Type', axis=1, inplace=True)
data.drop('Meteo idx', axis=1, inplace=True)
data.drop('Flight ID', axis=1, inplace=True)
data.drop('Time at TH', axis=1, inplace=True)
data.drop('Not Vacated', axis=1, inplace=True)
data.drop('Short ROT', axis=1, inplace=True)
data.drop('Previous Flight', axis=1, inplace=True)
data.drop('Emergency', axis=1, inplace=True)
data.drop('ROT Previous Flight [s]', axis=1, inplace=True)
data.drop('Month cos', axis=1, inplace=True)
data.drop('Month sin', axis=1, inplace=True)
data.drop('Arrival Traffic Intensity', axis=1, inplace=True)
data.drop('Precipitation idx', axis=1, inplace=True)
data.drop('ICAO 24', axis=1, inplace=True)
data.drop('Energy at MALW [MJ]', axis=1, inplace=True)
data.drop('Time of Go-around', axis=1, inplace=True)
data.drop('Minute sin', axis=1, inplace=True)
data.drop('Minute cos', axis=1, inplace=True)
data.drop('Average ROT Previous 5 Flight [s]', axis=1, inplace=True)
data.drop('Average ROT Previous 5 Flights same A/C Type [s]', axis=1, inplace=True)

### Transform Categorical Data to Type category

In [38]:
data['A/C Type'] = data['A/C Type'].astype('category')
data['ICAO Code'] = data['ICAO Code'].astype('category')
data['ICAO Weight Turbulence Category'] = data['ICAO Weight Turbulence Category'].astype('category')
data['Propulsion Type'] = data['Propulsion Type'].astype('category')
data['Day/Night'] = data['Day/Night'].astype('category')
data['No Wind'] = data['No Wind'].astype('category')
data['Wind Variable'] = data['Wind Variable'].astype('category')

### Define features (X)

In [39]:
X = data[[
    'MALW [kg]',
    'Propulsion Type',
    'Number of Engines',
    'ICAO Code',
    'Speed at TH [kt]',
    'Geoaltitude at TH [ft]',
    'Specific Energy [J/kg]',
    'Hour sin',
    'Hour cos',
    'Day of week sin',
    'Day of week cos',
    'Arrival Traffic Intensity with go-arounds',
    'Time from Previous Landing [s]',
    'Temperature [°C]',
    'Headwind [kt]',
    'Crosswind [kt]',
    'No Wind', 
    'Wind Variable',
    'Visibility Category',
    'Precipitation [mm]',
    'Average ROT Previous Flight (Intensity) [s]',
]]


### Copy of original Column names of X for Plots

In [40]:
col = X.columns

### Clean column names of X from special characters

In [41]:
X.columns = X.columns.str.replace(r"[\[\]<>]", "", regex=True)

### Define Target (y)

In [42]:
y = data[['ROT [s]']]

### Clean column name of y from special characters

In [43]:
y.columns = y.columns.str.replace(r"[\[\]<>]", "", regex=True)

### Split data in Test Set and Folds for Train/ Validation Sets

In [44]:
# Step 1: Split into training and testing data (80:20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Cross-validation with training data (4-Fold KFold)
kf = KFold(n_splits=4, shuffle=True, random_state=42)

### Define XGBoost Model

In [45]:
alpha = [0.05, 0.5, 0.95]

In [46]:
model = XGBRegressor(
    objective='reg:quantileerror',
    quantile_alpha=[0.05, 0.5, 0.95],
    n_estimators=1418,
    learning_rate=0.04545957595996846,
    max_depth=3,
    alpha=3,
    reg_lambda=16,
    gamma=1,
    subsample=0.7841837601370512,
    colsample_bytree=0.846166923806416,
    min_child_weight=4,
    max_delta_step=7,
    enable_categorical=True
)


### Train the model

In [47]:
# Initialize lists to store metrics for each fold
mae_scores = []
rmse_scores = []
r2_scores = []
coverage_scores = []

# Cross-validation loop
for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
    X_fold_train, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_fold_train, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Train the model on the current fold
    model.fit(X_fold_train, y_fold_train, verbose=0)
    
    # Make predictions for all three quantiles
    y_pred = model.predict(X_val)
    
    # Get predictions for the lower (0.05), median (0.5), and upper (0.95) quantiles
    y_pred_lower = y_pred[:, 0]  # 0.05 quantile
    y_pred_median = y_pred[:, 1] # 0.5 quantile
    y_pred_upper = y_pred[:, 2]  # 0.95 quantile
    
    # Calculate metrics based on the median prediction
    mae = mean_absolute_error(y_val['ROT s'], y_pred_median)
    rmse = mean_squared_error(y_val['ROT s'], y_pred_median, squared=False)
    r2 = r2_score(y_val['ROT s'], y_pred_median)
    
    # Calculate Coverage: Percentage of actual values within the predicted interval (0.05 to 0.95 quantile)
    coverage = np.mean((y_val['ROT s'] >= y_pred_lower) & (y_val['ROT s'] <= y_pred_upper))
    
    # Store the scores
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    coverage_scores.append(coverage)
    
    print(f'Fold {fold + 1} -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2 Score: {r2:.4f}, Coverage (90%): {coverage:.4f}')

# Calculate the mean of the metrics
mean_mae = np.mean(mae_scores)
mean_rmse = np.mean(rmse_scores)
mean_r2 = np.mean(r2_scores)
mean_coverage = np.mean(coverage_scores)

print("\nAverage metrics over all folds:")
print(f'Mean MAE: {mean_mae:.4f}')
print(f'Mean RMSE: {mean_rmse:.4f}')
print(f'Mean R2 Score: {mean_r2:.4f}')
print(f'Mean Coverage (90%): {mean_coverage:.4f}')



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



Fold 1 -> MAE: 4.1630, RMSE: 5.8246, R2 Score: 0.3635, Coverage (90%): 0.8807



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



Fold 2 -> MAE: 4.1662, RMSE: 5.9706, R2 Score: 0.3219, Coverage (90%): 0.8800



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



Fold 3 -> MAE: 4.1621, RMSE: 5.9867, R2 Score: 0.3477, Coverage (90%): 0.8878
Fold 4 -> MAE: 4.1613, RMSE: 5.9830, R2 Score: 0.2997, Coverage (90%): 0.8865

Average metrics over all folds:
Mean MAE: 4.1632
Mean RMSE: 5.9412
Mean R2 Score: 0.3332
Mean Coverage (90%): 0.8837



'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.



In [48]:
# Step 1: Train the model using the training data
model.fit(
    X_train,
    y_train,
    verbose=0
)

# Step 2: Evaluate the model on the test data
y_pred = model.predict(X_test)

### Calculate metrics

In [49]:
mse = mean_squared_error(y_test, y_pred[:,1])
mae = mean_absolute_error(y_test, y_pred[:,1])
rmse = sqrt(mse)  # Root Mean Squared Error

r_squared = r2_score(y_test, y_pred[:,1])

y_pred_lower = y_pred[:, 0]
y_pred_upper = y_pred[:, 2]

coverage_test = np.mean((y_test['ROT s'] >= y_pred_lower) & (y_test['ROT s'] <= y_pred_upper))


# Display results
print(f"Test MAE (s): {mae}")
print(f"Test RMSE (s): {rmse}")
print(f"R-squared: {r_squared}")
print(f"Coverage Score: {coverage_test}")

Test MAE (s): 4.375986000436251
Test RMSE (s): 6.211409792888561
R-squared: 0.31622040271759033
Coverage Score: 0.8918032786885246


In [50]:
# Store the results in a DataFrame
results = {
    'Metric': ['MAE (s)', 'RMSE (s)', 'R²', 'Coverage Score'],
    'Value': [mae, rmse, r_squared, coverage_test]
}

# Create the DataFrame
results_df = pd.DataFrame(results)

# Display the table
print(results_df)

           Metric     Value
0         MAE (s)  4.375986
1        RMSE (s)  6.211410
2              R²  0.316220
3  Coverage Score  0.891803


In [51]:
# Ensure that the directory exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the table as a LaTeX file in the 'results' folder
results_df.to_latex(os.path.join(output_dir, 'metrics_final_model_test_set.tex'))

### Compare Prediction to actual ROT

In [52]:
comparison_df = pd.DataFrame({
    'Actual ROT': y_test['ROT s'],
    'Predicted ROT': y_pred[:, 1],  # Median or central prediction
    'Lower Boundary': y_pred[:, 0],  # Lower boundary of interval
    'Upper Boundary': y_pred[:, 2],  # Upper boundary of interval
    'Interval Size': y_pred[:, 2] - y_pred[:, 0],
    'Residual': abs(y_test['ROT s'] - y_pred[:, 1]),
    'Residual (signed)': y_test['ROT s'] - y_pred[:, 1],  # Residual without absolute value
    'Within Predicted Interval': (y_pred[:, 0] <= y_test['ROT s']) & (y_test['ROT s'] <= y_pred[:, 2])
})

comparison_df


Unnamed: 0,Actual ROT,Predicted ROT,Lower Boundary,Upper Boundary,Interval Size,Residual,Residual (signed),Within Predicted Interval
7452,53.0,50.347965,43.993759,58.101124,14.107365,2.652035,2.652035,True
6239,57.0,56.238972,49.637177,63.005928,13.368752,0.761028,0.761028,True
5849,60.0,51.953205,46.751522,60.823364,14.071842,8.046795,8.046795,True
5946,51.0,53.923050,47.432465,63.159252,15.726788,2.923050,-2.923050,True
1084,63.0,52.119125,46.788181,60.940685,14.152504,10.880875,10.880875,False
...,...,...,...,...,...,...,...,...
1513,49.0,49.892628,44.238556,59.243061,15.004505,0.892628,-0.892628,True
216,55.0,59.472855,51.874912,78.650375,26.775463,4.472855,-4.472855,True
4375,58.0,58.635189,49.886772,66.234779,16.348007,0.635189,-0.635189,True
5154,65.0,54.019894,47.252632,60.496231,13.243599,10.980106,10.980106,False


### Plots

#### Resiudal Scatterplot

In [53]:
# Ensure that the 'Plots' folder exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Create a scatter plot with Plotly
scatter = go.Scatter(
    x=comparison_df['Actual ROT'],
    y=comparison_df['Residual (signed)'],
    mode='markers',
    marker=dict(
        size=10,
        color='blue',  # Set color to blue
        line=dict(width=1, color='black'),  # Add border for markers
        opacity=0.6
    ),
    name='Residuals'
)

# Add a zero residual reference line
zero_line = go.Scatter(
    x=[comparison_df['Actual ROT'].min(), comparison_df['Actual ROT'].max()],
    y=[0, 0],
    mode='lines',
    line=dict(color='red', dash='dash'),
    name='Zero Residual Line'
)

# Define the layout for the scatter plot
layout = go.Layout(
    width=800,  # Consistent width
    height=500,  # Consistent height
    xaxis=dict(
        title='Actual Runway Occupancy Time [s]',  # Consistent x-axis title format
        title_font=dict(size=18),  # Font size matching histogram
        tickfont=dict(size=14)  # Consistent tick font size
    ),
    yaxis=dict(
        title='Residuals [s]',  # Consistent y-axis title format
        title_font=dict(size=18),  # Font size matching histogram
        tickfont=dict(size=14)  # Consistent tick font size
    ),
    showlegend=True,
    legend=dict(x=0.01, y=0.99),  # Legend placed in the top-left corner
    margin=dict(l=50, r=50, t=50, b=50),  # Consistent margins
)

# Combine the scatter plot and zero line into a single figure
fig = go.Figure(data=[scatter, zero_line], layout=layout)

# Adjust layout margins (reduce top margin for consistency)
fig.update_layout(
    margin=dict(l=40, r=40, t=20, b=40)
)

# Save the plot as a PDF in the 'Plots' folder
fig.write_image(os.path.join(output_dir, 'residual_plot.pdf'), scale=5)

# Display the plot (interactive HTML view)
fig.show()


#### Residual CDF

In [54]:
# Sicherstellen, dass der Ordner 'Plots' existiert
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Erstellen einer Grafik für die absoluten Fehler
absolute_error = comparison_df['Residual']

# Berechnung der CDF
sorted_errors = np.sort(absolute_error)
cdf = np.arange(1, len(sorted_errors) + 1) / len(sorted_errors)

# Berechnung von Median und 90. Quantil
median = np.median(absolute_error)
quantile_90 = np.quantile(absolute_error, 0.9)

# Erstellen des CDF-Plots
cdf_plot = go.Scatter(
    x=sorted_errors,
    y=cdf,
    mode='lines',
    line=dict(color='blue', width=2),
    name='CDF',
    showlegend=False
)

# Hinzufügen von Linien für Median und 90. Quantil
median_line = go.Scatter(
    x=[median, median],
    y=[0, 0.5],
    mode='lines',
    line=dict(color='green', dash='dash', width=2),
    name=f'Median ({median:.2f}s)',
    showlegend=False
)
quantile_90_line = go.Scatter(
    x=[quantile_90, quantile_90],
    y=[0, 0.9],
    mode='lines',
    line=dict(color='red', dash='dash', width=2),
    name=f'90th Quantile ({quantile_90:.2f}s)',
    showlegend=False
)

# Hinzufügen von horizontalen Linien zu Median und 90. Quantil
median_horizontal = go.Scatter(
    x=[0, median],
    y=[0.5, 0.5],
    mode='lines',
    line=dict(color='green', dash='dot', width=2),
    showlegend=False
)
quantile_90_horizontal = go.Scatter(
    x=[0, quantile_90],
    y=[0.9, 0.9],
    mode='lines',
    line=dict(color='red', dash='dot', width=2),
    showlegend=False
)

# Hinzufügen von Text für Median und 90. Quantil
median_text_x = go.Scatter(
    x=[median + 1.5],
    y=[0.2],
    mode='text',
    text=[f'{median:.2f}s'],
    textfont=dict(size=17),
    textposition='bottom center',
    showlegend=False
)
median_text_y = go.Scatter(
    x=[3.2],
    y=[0.5],
    mode='text',
    text=['Median'],
    textfont=dict(size=17),
    textposition='top left',
    showlegend=False
)
quantile_90_text_x = go.Scatter(
    x=[quantile_90 + 1.5],
    y=[0.2],
    mode='text',
    text=[f'{quantile_90:.2f}s'],
    textfont=dict(size=17),
    textposition='bottom center',
    showlegend=False
)
quantile_90_text_y = go.Scatter(
    x=[6],
    y=[0.9],
    mode='text',
    text=['90th Quantile'],
    textfont=dict(size=17),
    textposition='top left',
    showlegend=False
)

# Define the layout of the plot
layout = go.Layout(
    xaxis=dict(
        title='Absolute Error [s]',  # Title for the x-axis
        title_font=dict(size=20),  # Adjust font size for axis title
        tickfont=dict(size=16),  # Adjust font size for tick labels
        dtick=2  # Show every second tick
    ),
    yaxis=dict(
        title='Cumulative Probability',  # Title for the y-axis
        title_font=dict(size=20),  # Adjust font size for axis title
        tickfont=dict(size=16)  # Adjust font size for tick labels
    ),
    showlegend=True,
    margin=dict(l=10, r=10, t=10, b=10),  # Increase margins for larger labels
    width=1000,  # Explicitly set width
    height=500  # Explicitly set height
)

# Combine all elements into a figure
fig = go.Figure(data=[
    cdf_plot, 
    median_line, 
    quantile_90_line, 
    median_horizontal, 
    quantile_90_horizontal, 
    median_text_x, 
    median_text_y, 
    quantile_90_text_x, 
    quantile_90_text_y
], layout=layout)

# Save the plot as a PDF
fig.write_image(os.path.join(output_dir, 'absolute_error_cdf.pdf'), scale=5)

# Display the plot (interactive HTML view)
fig.show()


#### Intervall Size Histogramm

In [55]:
# Ensure that the 'Plots' folder exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Create a variable for the interval size
interval_size = comparison_df['Interval Size']

# Create a histogram for the interval size with Plotly (0.5-second bins)
histogram = go.Histogram(
    x=interval_size,
    xbins=dict(
        start=0,  # Start at a whole number
        end=interval_size.max(),
        size=0.5  # Bin size set to 0.5 seconds
    ),
    marker=dict(
        color='blue',
        line=dict(width=1, color='black'),  # Clear separation of bins with borders
        opacity=0.6
    ),
    name='Interval Sizes'
)

# Define the layout for the histogram
layout = go.Layout(
    xaxis=dict(title='Interval Size [s]', dtick=1),  # Clearly show each bin
    yaxis=dict(title='Count'),
    showlegend=False,
    margin=dict(l=50, r=50, t=50, b=50),
    bargap=0.1  # Slight gap between bars for clarity
)

# Combine the histogram data and layout into a figure
fig = go.Figure(data=[histogram], layout=layout)
fig.update_layout(
    margin=dict(l=40, r=40, t=20, b=40)  # Adjust margins (reduce top margin)
)

# Save the histogram as a PDF in the 'Plots' folder
fig.write_image(os.path.join(output_dir, 'intervall_size_hist.pdf'), scale=5)

# Display the histogram (interactive HTML view)
fig.show()

### Evaluating Importance of Featuresfor the prediction Model

##### Permutation based Feature Importance

In [56]:
# Save original feature names
original_feature_names = col.copy()

# Custom scoring function that only considers the median quantile (0.5) and calculates the MSE
def median_mse(y_true, y_pred):
    y_pred_median = y_pred[:, 1]  # Extract the median quantile (0.5)
    return mean_absolute_error(y_true, y_pred_median)

# Create a scorer for permutation importance that uses MSE
median_mse_scorer = make_scorer(median_mse, greater_is_better=False)

# Calculate permutation-based feature importance using only the median quantile
result = permutation_importance(
    model, X_test, y_test,
    n_repeats=30,  # Number of permutations
    random_state=42,
    scoring=median_mse_scorer  # Use the custom scorer for MSE
)

# Sort the feature importances based on the mean decrease in performance
sorted_idx = result.importances_mean.argsort()

# Create a DataFrame for the feature importance results
importance_df = pd.DataFrame({
    'Feature': [original_feature_names[idx] for idx in sorted_idx],
    'Mean Importance': result.importances_mean[sorted_idx],
    'Std Importance': result.importances_std[sorted_idx]
})

# Define categories and their colors
categories = {
    'Aircraft-specific features': 'steelblue',
    'Dynamic flight features': 'orange',
    'Time-related features': 'green',
    'Environmental conditions': 'red',
    'Traffic intensity': 'purple',
    'Sequential features': 'brown'
}

# Map features to categories based on the screenshot
feature_category_mapping = {
    'MALW [kg]': 'Aircraft-specific features',
    'ICAO Code': 'Aircraft-specific features',
    'Number of Engines': 'Aircraft-specific features',
    'Propulsion Type': 'Aircraft-specific features',
    'Speed at TH [kt]': 'Dynamic flight features',
    'Specific Energy [J/kg]': 'Dynamic flight features',
    'Geoaltitude at TH [ft]': 'Dynamic flight features',
    'Hour sin': 'Time-related features',
    'Hour cos': 'Time-related features',
    'Day of week sin': 'Time-related features',
    'Day of week cos': 'Time-related features',
    'Temperature [°C]': 'Environmental conditions',
    'Headwind [kt]': 'Environmental conditions',
    'Crosswind [kt]': 'Environmental conditions',
    'Visibility Category': 'Environmental conditions',
    'Precipitation [mm]': 'Environmental conditions',
    'Arrival Traffic Intensity with go-arounds': 'Traffic intensity',
    'Time from Previous Landing [s]': 'Sequential features',
    'Average ROT Previous Flight (Intensity) [s]': 'Sequential features'
}

# Assign colors based on categories
colors = [
    categories[feature_category_mapping[feature]] if feature in feature_category_mapping else 'gray'
    for feature in importance_df['Feature']
]

# Create a horizontal bar chart with Plotly
fig = go.Figure()

fig.add_trace(go.Bar(
    x=importance_df['Mean Importance'],
    y=importance_df['Feature'],
    orientation='h',
    error_x=dict(type='data', array=importance_df['Std Importance']),
    marker=dict(color=colors),
    name=''
))

# Add legend for color coding
for category, color in categories.items():
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, color=color),
        legendgroup=category,
        showlegend=True,
        name=category
    ))

# Dynamically adjust height and significantly increase width for better layout
height = 800 + len(importance_df) * 20
width = 1800  # Further increase width for optimal representation

fig.update_layout(
    xaxis=dict(
        title='Decrease in MAE [s]',  # Title for the x-axis
        title_font=dict(size=20, color='black'),  # Adjust font size for axis title
        tickfont=dict(size=16, color='black')  # Adjust font size for tick labels
    ),
    yaxis=dict(
        title='Feature',  # Title for the y-axis
        title_font=dict(size=20, color='black'),  # Adjust font size for axis title
        tickfont=dict(size=16, color='black')  # Adjust font size for tick labels
    ),
    margin=dict(l=100, r=50, t=50, b=50),  # Margins
    showlegend=True,
    legend=dict(
        font=dict(size=14),  # Adjust font size for legend
        x=1,  # Position legend to the right
        y=0,  # Position legend at the bottom
        xanchor="right",
        yanchor="bottom"
    ),
    height=height,
    width=width,
    template='plotly_white'
)

# Remove empty legend item
fig.for_each_trace(lambda trace: trace.update(showlegend=False) if trace.name == '' else None)

# Ensure the 'Plots' folder exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the plot as a PDF
fig.write_image(os.path.join(output_dir, 'Feature_importance_plot.pdf'), scale=5)

# Display the plot (interactive HTML view)
fig.show()

### Analysis of Time Buffer between Landings

#### Histogram of Time between vacated RWY and next arrival

In [57]:
# Ensure that the 'Time at TH' column is in datetime format
table['Time at TH'] = pd.to_datetime(table['Time at TH'])

# Calculate the time difference between the current and the next entry
table['Time Difference (s)'] = table['Time at TH'].diff().dt.total_seconds()

# If you want the difference for the next entry (instead of the previous one)
table['Time Difference (s)'] = table['Time at TH'].shift(-1) - table['Time at TH']
table['Time Difference (s)'] = table['Time Difference (s)'].dt.total_seconds()

# Display or save the table with relevant columns
table[['Time at TH', 'Time Difference (s)']]

# Calculate the difference between the time buffer and ROT
table['Diff'] = table['Time Difference (s)'] - table['ROT [s]']

# Filter values where the difference is less than 120 seconds
filtered_diff = table['Diff'][table['Diff'] < 120]

# Create a histogram using Plotly
histogram = go.Figure()
histogram.add_trace(
    go.Histogram(
        x=filtered_diff.dropna(),
        xbins=dict(end=120, size=1),  # Set bin size to 1 second
        marker=dict(color='blue', line=dict(width=1, color='black')),
        opacity=0.7
    )
)

# Adjust the layout of the plot
histogram.update_layout(
    xaxis=dict(
        title='Time Difference [s]',  # Title for the x-axis
        title_font=dict(size=16),  # Adjust font size for the axis title
        tickfont=dict(size=14)  # Adjust font size for tick labels
    ),
    yaxis=dict(
        title='Count',  # Title for the y-axis
        title_font=dict(size=16),  # Adjust font size for the axis title
        tickfont=dict(size=14)  # Adjust font size for tick labels
    ),
    margin=dict(l=40, r=40, t=20, b=40)  # Adjust margins
)

# Ensure the 'Plots' folder exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the histogram as a PDF in the 'Plots' folder
histogram.write_image(os.path.join(output_dir, 'Time_buffer_hist_v1.pdf'), scale=5)

# Display the plot
histogram.show()


### Analysis of ROT distribution

In [58]:
# Create a histogram
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=table['ROT [s]'],  # Data for the histogram
    xbins=dict(
        size=1  # Set bin size to 1 second
    ),
    marker=dict(
        line=dict(
            width=1,
            color='black'  # Add borders to distinguish bins
        )
    )
))

# Adjust the layout
fig.update_layout(
    xaxis=dict(
        title="ROT [s]",  # Label for the x-axis
        title_font=dict(size=18),  # Adjust font size for the axis title
        tickfont=dict(size=14),  # Adjust font size for tick labels
        range=[0, 121]  # Set the range for the x-axis
    ),
    yaxis=dict(
        title="Count",  # Label for the y-axis
        title_font=dict(size=18),  # Adjust font size for the axis title
        tickfont=dict(size=14)  # Adjust font size for tick labels
    ),
    showlegend=False,  # Hide the legend
    margin=dict(l=40, r=40, t=20, b=40)  # Adjust margins (reduce top margin)
)

# Ensure the 'Plots' folder exists
output_dir = 'Plots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the plot as a PDF
fig.write_image(os.path.join(output_dir, 'ROT_distribution_hist_v2.pdf'), scale=5)

# Display the plot
fig.show()

In [59]:
table['ROT [s]'].mean()

54.92037255673619

In [60]:
table['ROT [s]'].std()

7.32970457525948

In [61]:
table['Speed at TH [kt]'].loc[table['A/C Type'] == 'A388'].min()

136.0

In [62]:
table

Unnamed: 0,Flight (Callsign),Flight ID,ICAO Code,A/C Type,ICAO 24,ICAO Aircraft Type,Propulsion Type,Number of Engines,MALW [kg],ICAO Weight Turbulence Category,...,Average ROT Previous 5 Flight [s],Average ROT Previous Flight (Intensity) [s],ROT Previous Flight same A/C Type [s],Average ROT Previous 5 Flights same A/C Type [s],Average ROT Previous Flight same A/C Type Total [s],Not Vacated,Short ROT,ROT [s],Time Difference (s),Diff
0,SWR74A,SWR74A_10187,SWR,E290,4b0291,L2J,Jet,2.0,49050.0,Medium,...,,,,,,False,False,47.0,107.0,60.0
1,EJU96HY,EJU96HY_6701,EJU,A320,440ca9,L2J,Jet,2.0,66000.0,Medium,...,47.0,47.00,,,,False,False,52.0,185.0,133.0
2,N121SA,N121SA_31081,Unknown,GLEX,a058fa,L2J,Jet,2.0,35652.0,Medium,...,49.5,49.50,,,,False,False,60.0,373.0,313.0
3,DLH3AR,DLH3AR_2002,DLH,CRJ9,3c4dd0,L2J,Jet,2.0,33339.0,Medium,...,53.0,53.00,,,,False,False,53.0,98.0,45.0
4,UAL3,UAL3_31463,UAL,B763,a8ec57,L2J,Jet,2.0,145149.0,Heavy,...,53.0,53.00,,,,False,False,56.0,351.0,295.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7618,SWR6GM,SWR6GM_18688,SWR,A20N,4b1812,L2J,Jet,2.0,67400.0,Medium,...,57.4,58.25,62.0,61.2,55.358289,False,False,57.0,149.0,92.0
7619,SWR9XT,SWR9XT_19077,SWR,A20N,4b1818,L2J,Jet,2.0,67400.0,Medium,...,58.0,57.00,57.0,60.8,55.362667,False,False,59.0,97.0,38.0
7620,SWR5GZ,SWR5GZ_10490,SWR,E295,4b0293,L2J,Jet,2.0,54000.0,Medium,...,57.4,59.50,57.0,58.2,51.796407,False,False,53.0,228.0,175.0
7621,CFG5XM,CFG5XM_2780,CFG,A320,3c6461,L2J,Jet,2.0,66000.0,Medium,...,58.2,58.25,54.0,51.8,53.441091,False,False,62.0,96.0,34.0
