# Ceiling Height Prediction Model
*C964 Capstone by Cynthia Black 001364386*

## How to Run the Model
1. Run ```pip install -r requirements.txt``` or manually ensure that the following libraries are installed: ipywidgets, IPython.display, matplotlib, numpy, pandas, scikit-learn
2. Ensure the data files (PANC_METAR_cleaned.csv and PAFA_METAR_cleaned.csv) are in the same directory as this notebook.
3. Click the Run button ▶️ on the toolbar or choose Run -> Run All Cells from the menu
4. Scroll to the bottom of the notebook, then select an airport from the dropdown.
5. Click the *Generate* button to generate predictions and visualizations and calculate RMSE

## About this Model
The model uses Bayesian Ridge Regression to predict cloud ceiling heights at Anchorage International and Fairbanks International Airports in Alaska. It was developed in response to the Federal Aviation Administration's NextGen Weather Program's call for applied research to minimize the impact of weather on the National Airspace System (FAA, 2025). Before the regression model runs, a rolling three-hour average (via pandas .rolling() method) of key variables (ambient temperature, dew point, relative humidity, and wind speed) is taken to reduce variance in the model.

In [3]:
# IMPORTS
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import BayesianRidge, Ridge
from sklearn.metrics import mean_squared_error, PredictionErrorDisplay

# MACHINE LEARNING MODEL (NONDESCRIPTIVE METHOD)
def run_model(airport):
    # import METAR values with date parsing
    if airport == 'PANC':
        METAR = pd.read_csv('PANC_METAR_cleaned.csv', parse_dates=['valid'])

    if airport == 'PAFA':
        METAR = pd.read_csv('PAFA_METAR_cleaned.csv', parse_dates=['valid'])
    
    # sort dates - required for rolling average function
    METAR = METAR.set_index("valid").sort_index()
    
    # calculate 3-hour rolling averages (prediction variables)
    predictions = pd.DataFrame({
        'temperature_3hour_average' : METAR['tmpf'].rolling("3h").mean(), 
        'humidity_3hour_average' : METAR['relh'].rolling("3h").mean(), 
        'dewpoint_3hour_average' : METAR['dwpf'].rolling("3h").mean(), 
        'windspeed_3hour_average' : METAR['sknt'].rolling("3h").mean()
    })
    
    predictions = predictions.dropna() # delete rows if NaN or other invalid value present
    
    # define target and aligh Y axis
    Y = METAR.loc[predictions.index, "ceiling"] # target variables
    
    # Bayesian Ridge linear regression model
    model = BayesianRidge()
    model.fit(predictions, Y)
    BayesianRidgePrediction = model.predict(predictions)

    # ACCURACY MEASURES
    # calculate RMSE to evaluate model prediction accuracy
    rmse=mean_squared_error(Y, BayesianRidgePrediction)
    print("RMSE: ")
    print(rmse)

    # compare actual vs predicted ceiling at or below 1,000 feet
    observed_low_ceiling = Y.values <= 1000
    predicted_low_ceiling = BayesianRidgePrediction <= 1000
    error_rate = abs(observed_low_ceiling.sum() - predicted_low_ceiling.sum()) / observed_low_ceiling.sum() * 100

    print("Error Rate for Observed vs Predicted Low Ceiling")
    print(error_rate)
    
    #VISUALIZATIONS (DESCRIPTIVE METHOD)
    # Show Bayesian Ridge Linear Regression Model
    plt.plot(predictions.index, BayesianRidgePrediction, label='Bayesian Prediction', color='#5DE2E7')
    if airport == 'PANC':
        plt.title("Ceiling Height Predictions for Anchorage with Bayesian Ridge Regression")
    if airport == 'PAFA':
        plt.title("Ceiling Height Predictions for Fairbanks with Bayesian Ridge Regression")
    plt.xlabel("Datetime")
    plt.ylabel("Predicted Ceiling Height")
    plt.xticks(rotation=90)
    plt.show()
    
    # Show Observed vs Predicted Ceiling Heights
    plt.scatter(predictions.index, Y, label='Observed', color='#5DE2E7')
    plt.scatter(predictions.index, BayesianRidgePrediction, label='Predicted', color='#060270')
    if airport == 'PANC':
        plt.title("Observed vs Predicted Ceiling Heights - Anchorage")
    if airport == 'PAFA':
        plt.title("Observed vs Predicted Ceiling Heights - Fairbanks")
    plt.xlabel("Datetime")
    plt.ylabel("Ceiling Height")
    plt.xticks(rotation=90)
    plt.legend()
    plt.show()
    
    # Show Prediction Error
    ridge = Ridge().fit(predictions, Y)
    y_pred = ridge.predict(predictions)
    display = PredictionErrorDisplay(y_true=Y, y_pred=y_pred)
    display.plot()
    if airport == 'PANC':
        plt.title("Prediction Errors - Anchorage")
    if airport == 'PAFA':
        plt.title("Prediction Errors - Fairbanks")
    plt.show()

In [4]:
#INTERACTIVITY
# instructions for interactives
html = widgets.HTML(
    value="<b>Instructions:</b> To see predicted ceiling height for an airport, select the airport from the dropdown, then click the Generate button.",
)

# dropdown to select airport
airport_selector = widgets.Dropdown(
    options=[('Anchorage', 'PANC'), ('Fairbanks', 'PAFA')],
    value='PANC',
    description='Airport: '
)

button = widgets.Button(
    value=False,
    description='Generate',
    disabled=False,
    button_style='',
    Tooltip='Generate predictions for selected airport'
)

output = widgets.Output()
display(html, airport_selector, button, output)
# run model for user-selected airport
def run_on_input(b):
    with output:
        output.clear_output()
        airport = airport_selector.value
        run_model(airport)

button.on_click(run_on_input)

HTML(value='<b>Instructions:</b> To see predicted ceiling height for an airport, select the airport from the d…

Dropdown(description='Airport: ', options=(('Anchorage', 'PANC'), ('Fairbanks', 'PAFA')), value='PANC')

Button(description='Generate', style=ButtonStyle())

Output()