 # Inference

In [1]:
# %%
from pathlib import Path
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "xgboost_inference.ipynb"  # Manually set the notebook name

import pandas as pd
import polars as pl
import xgboost as xgb
import wandb
from tqdm.notebook import tqdm
import pickle
import numpy as np

import utils
import yaml


In [2]:
# %%
DEBUG = False
# Load the inference config from the YAML file

with open('configs/direct_inference_config_11_10_24.yaml', 'r') as f:
    train_config = yaml.safe_load(f)

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

train_config = dotdict(train_config)

inference_config = {
    'prediction_length': 168,
    'create_submission_csv': True
}

# Update the checkpoints directory to 'checkpoints_final'
checkpoints_dir = 'checkpoints_final'
xgboost_models_dir = Path(checkpoints_dir)


In [3]:

# Load all models
models = {}
for file_name in tqdm(os.listdir(xgboost_models_dir)):
    if file_name.startswith('forward_shift_'):
        shift = int(file_name.split('_')[-1])
        model_path = xgboost_models_dir / file_name
        with open(model_path, 'rb') as f:
            models[shift] = pickle.load(f)

  0%|          | 0/168 [00:00<?, ?it/s]

 ## Load and Prepare Data

In [4]:
# %%
# Read the CSV files
data_dir = Path('input-data')
target_dataframes = {
    'thp_vol': pl.read_csv(data_dir / 'traffic_DLThpVol.csv'),  # This is the target variable
    'prb': pl.read_csv(data_dir / 'traffic_DLPRB.csv'),
    'thp_time': pl.read_csv(data_dir / 'traffic_DLThpTime.csv'),
    'mr_number': pl.read_csv(data_dir / 'traffic_MR_number.csv')
}

idx_hour_series = target_dataframes['thp_vol']['']

for k, v in target_dataframes.items():
    target_dataframes[k] = v.drop('')

template_df = target_dataframes['thp_vol']

predict_hour = 840

null_row = pl.DataFrame({beam_id: [None] for beam_id in template_df.columns})

target_dataframes = {k: pl.concat([v, null_row], how='vertical_relaxed') for k, v in target_dataframes.items()}

target_names = list(target_dataframes.keys())
feature_dfs = utils.create_all_feature_dfs(target_dataframes, idx_hour_series, train_config)
feature_dfs = {k: v.tail(1) for k, v in feature_dfs.items()}  # maybe turn in to lazyframe for efficiency?
X_predict = utils.convert_to_long_format(feature_dfs)

cat_types = utils.make_id_cat_type(template_df.columns)
X_predict = X_predict.to_pandas()
for col in ['beam_id', 'cell_id', 'station_id']:
    if col in X_predict.columns:
        X_predict[col] = X_predict[col].astype(cat_types[col])

In [5]:
# %%

ys_predicted_wide = []

for shift in tqdm(range(168)):
    y_predicted = models[shift].predict(X_predict)

    idx_hour = pl.DataFrame({'idx_hour': [840 + shift] * len(template_df.columns)})

    y_predicted_long_df = pl.DataFrame({'idx_hour': idx_hour, 'beam_id': X_predict['beam_id'], 'thp_vol': y_predicted})

    y_predicted_wide = utils.convert_to_wide_format(y_predicted_long_df, ['thp_vol'])['thp_vol']

    ys_predicted_wide.append(y_predicted_wide)


  0%|          | 0/168 [00:00<?, ?it/s]

In [6]:
# %%
predictions_wide = pl.concat(ys_predicted_wide, how='vertical')
predictions_wide = predictions_wide.with_columns(idx_hour=pl.Series(range(840, 1008)))


In [7]:
predictions_wide = predictions_wide.select(pl.col('idx_hour'), pl.exclude('idx_hour'))
predictions_wide

idx_hour,0_0_0,0_0_1,0_0_2,0_0_3,0_0_4,0_0_5,0_0_6,0_0_7,0_0_8,0_0_9,0_0_10,0_0_11,0_0_12,0_0_13,0_0_14,0_0_15,0_0_16,0_0_17,0_0_18,0_0_19,0_0_20,0_0_21,0_0_22,0_0_23,0_0_24,0_0_25,0_0_26,0_0_27,0_0_28,0_0_29,0_0_30,0_0_31,0_1_0,0_1_1,0_1_2,0_1_3,…,29_1_27,29_1_28,29_1_29,29_1_30,29_1_31,29_2_0,29_2_1,29_2_2,29_2_3,29_2_4,29_2_5,29_2_6,29_2_7,29_2_8,29_2_9,29_2_10,29_2_11,29_2_12,29_2_13,29_2_14,29_2_15,29_2_16,29_2_17,29_2_18,29_2_19,29_2_20,29_2_21,29_2_22,29_2_23,29_2_24,29_2_25,29_2_26,29_2_27,29_2_28,29_2_29,29_2_30,29_2_31
i64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
840,0.461486,0.103442,0.14711,0.068884,1.105122,0.475052,0.09189,0.068884,0.588862,0.610352,0.117408,0.137233,0.644805,0.139171,0.134169,0.105918,0.097251,0.275353,1.339129,0.705991,0.123535,0.107277,0.127729,0.339758,0.085048,0.098066,1.37333,0.082317,0.095719,0.158356,0.09264,0.07508,0.072502,0.171782,0.140857,0.502418,…,0.378553,0.570243,0.877174,0.252508,0.069086,0.091609,0.102281,0.097378,0.0768,0.070058,0.096816,0.161302,0.098579,0.080093,0.212345,0.096378,0.095538,0.096449,0.292107,0.102383,0.099029,0.069389,0.104655,0.096977,0.278365,0.173926,0.082786,0.124096,0.068713,0.093333,0.081932,0.080994,0.080556,0.068901,0.088005,0.099411,0.074781
841,0.346771,0.104956,0.3642,0.070777,0.725037,0.47322,0.086983,0.070777,0.525958,0.395227,0.122652,0.127459,0.393492,0.152312,0.123007,0.122495,0.095271,0.176803,1.163849,0.480911,0.137013,0.138097,0.13346,0.324713,0.085406,0.093995,0.154344,0.084067,0.09403,0.149887,0.086003,0.074696,0.072087,0.180052,0.129335,0.467746,…,0.27385,0.66014,0.548925,0.193116,0.069083,0.091681,0.119051,0.099459,0.078562,0.072588,0.096002,0.183678,0.097856,0.076908,0.190425,0.114711,0.099412,0.105025,0.202014,0.111289,0.095364,0.070277,0.092499,0.096794,0.199281,0.100288,0.078952,0.121183,0.071347,0.086277,0.078001,0.0795,0.079036,0.069812,0.088021,0.095875,0.074376
842,0.312044,0.137974,0.956416,0.070082,0.533667,0.491734,0.083906,0.070082,0.367781,0.0518,0.128116,0.131564,0.28491,0.214477,0.14256,0.162862,0.105578,0.219768,1.123015,0.393527,0.192512,0.199407,0.182047,0.277616,0.079466,0.097078,0.141454,0.077482,0.095992,0.162511,0.076884,0.070996,0.071204,0.180175,0.123239,0.270621,…,0.264366,0.47229,0.395678,0.154001,0.068517,0.091223,0.134313,0.103212,0.074296,0.07309,0.096937,0.271062,0.105211,0.077193,0.229091,0.178292,0.1085,0.124598,0.183993,0.125484,0.099165,0.070735,0.091383,0.101167,0.165055,0.08274,0.078235,0.09871,0.071433,0.08509,0.079603,0.077342,0.077517,0.068456,0.085651,0.096756,0.085613
843,0.256408,0.146319,0.144487,0.071674,0.641792,0.366624,0.084175,0.071674,0.281305,0.178115,0.141256,0.119906,0.260864,0.187928,0.134269,0.139499,0.103823,0.214634,0.932818,0.226384,0.168473,0.177292,0.172015,0.25701,0.081144,0.096496,0.100195,0.088367,0.092697,0.14328,0.079478,0.072043,0.071444,0.167093,0.108961,0.208146,…,0.24916,0.440034,0.30887,0.134441,0.070195,0.089118,0.129243,0.10086,0.075407,0.07452,0.101216,0.228939,0.105606,0.078254,0.204459,0.1571,0.111729,0.121196,0.158519,0.121469,0.100946,0.071303,0.089933,0.103331,0.156645,0.077789,0.07881,0.089243,0.072275,0.086618,0.08156,0.080355,0.077835,0.06953,0.085817,0.098264,0.07589
844,0.201087,0.143179,0.16632,0.07232,0.517022,0.354426,0.084704,0.072635,0.195417,0.151439,0.13693,0.122025,0.180761,0.186344,0.151403,0.158687,0.113019,0.181516,0.702928,0.206651,0.149926,0.175239,0.162114,0.222103,0.082036,0.093864,0.087678,0.081165,0.090841,0.125476,0.081493,0.072635,0.072332,0.136798,0.100723,0.228552,…,0.209984,0.405753,0.247368,0.122692,0.071858,0.086731,0.13388,0.098436,0.075976,0.075591,0.096837,0.215622,0.106575,0.080185,0.18451,0.140212,0.108815,0.120908,0.123515,0.118473,0.100682,0.071501,0.093696,0.106902,0.145347,0.079269,0.080939,0.086445,0.072798,0.088355,0.082125,0.082773,0.079596,0.070511,0.085437,0.09678,0.078004
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1003,0.76124,0.186156,0.868758,0.077338,3.338638,2.455235,0.095133,0.077338,0.739765,0.924496,0.234689,0.315722,0.961915,0.436202,0.316979,0.276733,0.155126,0.967403,1.288057,0.638234,0.516168,0.512105,0.798533,0.434205,0.100423,0.134692,0.155371,0.084654,0.129919,0.240834,0.094344,0.077338,0.079789,0.444324,0.174854,0.216443,…,0.575744,0.993154,0.668,0.212426,0.080438,0.111839,0.33327,0.249925,0.078342,0.077338,0.140178,0.592295,0.181254,0.08195,1.018844,0.552964,0.168125,0.257193,0.284096,0.272058,0.160295,0.077338,0.097255,0.167579,0.437013,0.104835,0.088466,0.121484,0.077338,0.097871,0.107458,0.103457,0.080433,0.076884,0.101394,0.141376,0.08195
1004,0.857234,0.184682,0.27787,0.07595,4.142357,3.002193,0.097703,0.07595,0.565626,0.885516,0.256127,0.326732,1.119742,0.458974,0.249441,0.23578,0.15534,0.846855,1.435119,0.710054,0.431074,0.447414,0.503308,0.405256,0.101315,0.138648,0.250338,0.084282,0.129307,0.23338,0.093366,0.07595,0.078154,0.620921,0.159009,0.228297,…,0.533916,0.980322,0.673203,0.213137,0.0796,0.112935,0.285759,0.24829,0.079411,0.076208,0.144215,0.466695,0.175344,0.079996,0.652474,0.377499,0.172627,0.210092,0.329396,0.253477,0.153918,0.076399,0.098592,0.156716,0.370271,0.120622,0.085288,0.126401,0.07595,0.098599,0.105133,0.101386,0.079043,0.07595,0.103158,0.159747,0.082998
1005,0.782135,0.164116,0.351139,0.077257,3.645724,2.321768,0.102878,0.077257,0.415691,0.460047,0.335542,0.297672,1.164669,0.321998,0.189505,0.183533,0.146818,0.639726,1.890159,0.747457,0.293731,0.326523,0.378659,0.496702,0.095171,0.131344,0.237333,0.086816,0.129963,0.212502,0.08694,0.077257,0.080505,0.57791,0.150834,0.242493,…,0.449237,0.955954,0.661402,0.245135,0.083624,0.115748,0.192033,0.175267,0.083035,0.077257,0.140233,0.257763,0.153205,0.081804,0.412722,0.205211,0.140483,0.170843,0.405438,0.212397,0.133734,0.07771,0.102768,0.137082,0.320988,0.142064,0.089403,0.144772,0.077257,0.102138,0.100164,0.092733,0.082816,0.077257,0.107566,0.14758,0.085454
1006,0.649139,0.155732,0.353077,0.077999,2.857275,1.250584,0.103841,0.077999,0.437379,0.459047,0.27269,0.273132,0.796274,0.280279,0.174086,0.227004,0.131093,0.493074,1.579648,0.708604,0.226275,0.220429,0.242419,0.393824,0.093722,0.124845,0.189097,0.092527,0.129286,0.178563,0.089495,0.077999,0.081121,0.476042,0.165003,0.268883,…,0.41227,0.870932,0.757358,0.262681,0.082125,0.108814,0.172276,0.149076,0.085403,0.081242,0.137327,0.244356,0.139213,0.082191,0.345578,0.167103,0.13662,0.156557,0.309279,0.190712,0.131305,0.077999,0.097296,0.135758,0.267968,0.159091,0.085848,0.151604,0.077999,0.108381,0.095178,0.093831,0.084707,0.077999,0.111247,0.168184,0.085715


In [14]:
def create_half_submission_df(input_df: pl.DataFrame, weeks: str) -> pl.DataFrame:
    """
    Create a submission CSV file from a Polars DataFrame of thp_vol.
    """
    if weeks == '5w-6w':
        range = [840, 1007]
    elif weeks == '10w-11w':
        range = [1680, 1847]

    # Choose rows with first column 'idx_hour' having the values 671-840.
    input_df = input_df.filter(pl.col('idx_hour') >= range[0], pl.col('idx_hour') <= range[1])

    # Some checks on the input_df
    assert input_df.shape == (168, 2881), f"Expected shape (168, 2881), got {input_df.shape}"
    assert input_df.select(pl.any_horizontal(pl.all().is_null().any())).item() == False, "Submission dataframe contains null values"
    assert input_df['idx_hour'].head(1)[0] <= range[0] and input_df['idx_hour'].tail(1)[0] >= range[1], "Submission dataframe does seemingly not contain the correct idx_hour values"

    # Stack the dataframe with f'traffic_DLThpVol_test_5w-6w_{hour}_{beam_id}' as index
    # where it cycles through the values 671-840 for hour and then the beam_ids, which are colnames of input_df
    # return input_df.unpivot(index='idx_hour')
    return input_df.unpivot(index='idx_hour', variable_name='beam_id').with_columns(
        pl.concat_str([pl.lit('traffic_DLThpVol_test'), pl.lit(weeks), pl.col('idx_hour') - range[0], pl.col('beam_id')], separator='_').alias('ID')
    ).select(['ID', 'value']).rename({'value': 'Target'})


def create_submission_csv(input_df: pl.DataFrame, output_filename='traffic_forecast.csv', archiving_dir='submission-csvs-archive') -> pl.DataFrame:
    """
    Create a submission CSV file from data in input format that's been extended to cover weeks 5-6 and 10-11.
    """

    # Create half submission dataframes
    half_submission_5w_6w = create_half_submission_df(input_df, '5w-6w')
    half_submission_10w_11w = create_half_submission_df(input_df, '10w-11w')

    # Concatenate the two half submission dataframes
    submission_df = pl.concat([half_submission_5w_6w, half_submission_10w_11w], how='vertical')

    # Save the submission dataframe to a CSV file for submission, and to wandb
    submission_df.write_csv(output_filename)
    # wandb.save(output_filename)

    # Save the submission dataframe to a CSV file for archiving
    if archiving_dir:
        archiving_dir = Path(archiving_dir)
        archiving_dir.mkdir(parents=True, exist_ok=True)
        submission_df.write_csv(archiving_dir / f'{'FINAL'}_{output_filename}')

    return submission_df

In [9]:
DEBUG


False

In [10]:
inference_config['create_submission_csv']

True

In [11]:
if DEBUG:
    predictions_wide = predictions_wide.with_columns(idx_hour=pl.Series(range(840, 1008)))
    # idxs from for 11th week
    dummy_w11 = pl.DataFrame({'idx_hour': list(range(1680, 1848))} | {beam_id: [0] * 168 for beam_id in template_df.columns})
    ys_final = pl.concat([predictions_wide, dummy_w11], how='vertical_relaxed')

In [15]:
if inference_config['create_submission_csv']:
    w10_11_df = pl.read_csv('seasonality_final.csv')
    
    combined_df = pl.concat([predictions_wide, w10_11_df], how='vertical_relaxed')

    submission_df = create_submission_csv(combined_df)