## Quantile Model Prediction
Use the trained quantile model to predict the quantiles for the competition submission.

In [None]:
import pandas as pd
from sklearn.linear_model import QuantileRegressor
import datetime
from typing import Literal

#### Load SNODAS, site metadata, and submission csv

In [None]:
snodas_data = pd.DataFrame()
for year in range(2005,2024):
    year_data = pd.read_csv(f'./data/snodas/snodas_swe_{year}.csv', index_col=0, parse_dates=True)
    snodas_data = pd.concat([snodas_data, year_data])

In [None]:
model_submission = pd.read_csv('./data/competition/submission_format.csv')

In [None]:
site_metadata = pd.read_csv('./data/competition/metadata.csv',index_col=0)

In [None]:
# Change the column names of the SNODAS data to match the other datasets
snodas_data.columns = site_metadata.index.to_numpy()

#### Load pre-trained quantile model

In [None]:
# Unpickle the model
import pickle
with open('./model/quantile_models.pkl', 'rb') as f:
    quantile_models = pickle.load(f)

### Create a function to perform the prediction

In [None]:
def predict_quantile(site:str, prediction_date_str: str, quantile: Literal['0.1','0.5','0.9']):
    """
    Predicts the snow water equivalent for a given site and date at a given quantile.

    Any dates that are after the runoff start date use the value for the runoff start date.
    """
    # Convert the prediction date to a datetime object
    prediction_date = datetime.datetime.strptime(prediction_date_str, '%Y-%m-%d').date()

    # Check if the prediction date is after the runoff start date
    runoff_start_month = site_metadata.loc[site,'season_start_month']
    if prediction_date.month >= runoff_start_month:
        prediction_date = datetime.date(prediction_date.year, runoff_start_month, 1)

    # Get the model for the site and date
    model_index = {'0.1': 0, '0.5': 1, '0.9': 2}
    model: QuantileRegressor = quantile_models[site][(prediction_date.month, prediction_date.day)][model_index[quantile]]

    # Handle missing 2017-04-01 from SNODAS data
    if prediction_date == datetime.date(2017,4,1):
        prediction_date = datetime.date(2017,4,8)
    
    # Get the snodas data for the site and date and convert to KAF
    snodas_swe = snodas_data.loc[prediction_date.strftime('%Y-%m-%d'), site] / 1233.48

    # Predict the quantile
    return model.predict([[snodas_swe]])[0]

    


In [None]:
# Test the model
predict_quantile('pecos_r_nr_pecos', '2021-04-01', '0.5')

#### Predict quantiles for each site
Use the submission format to generate all the predictions

In [None]:
model_submission['volume_10'] = model_submission.apply(lambda row: predict_quantile(row['site_id'], row['issue_date'], '0.1'), axis=1)

In [None]:
model_submission['volume_50'] = model_submission.apply(lambda row: predict_quantile(row['site_id'], row['issue_date'], '0.5'), axis=1)

In [None]:
model_submission['volume_90'] = model_submission.apply(lambda row: predict_quantile(row['site_id'], row['issue_date'], '0.9'), axis=1)

In [None]:
# Save the submission to csv
model_submission.to_csv('./data/competition/submissions/quantile_submission_120623.csv', index=False)