1. Load pre-trained model from pkl file
2. load inputs
3. transform inputs
4. predict June 2022 - May 2023
5. print results

In [1]:
import pandas as pd
import pickle as pkl
import numpy as np
from xgboost import XGBRegressor
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
file = './Data/dataset.csv'

In [3]:
df = pd.read_csv(f'{file}')\
       .fillna(0)

# Reading a pkl file
pkl_model = pkl.load(open('./Model/model.pkl', 'rb'))

def count_y(df):
    num_y = len(df.columns[3:])
    return num_y

def prepare_dataset(df, num_y, train_samples, train_interval, fh):
    # add fh number of empty rows to bottom of df
    ts, n = train_samples, train_interval
    df = df.reindex(list(range(0,(ts+fh))))
    # populate timestep col for new rows
    timestep_list = [*range(ts+1, (ts+1+fh))]
    df.iloc[ts:(ts+fh) , 2] = timestep_list
    
    df.fillna(0, inplace=True)
        
    # Add shift+12 cols for 12 month lag cols 
    for label, content in df.iloc[:,3:(3+num_y)].items(): # target cols start at [3:]     
        col_name = label+f" lag+{n}"
        df[col_name] = df[label].shift(n)
        # lag-step cols are null first row, so drop first row
    df.drop(index=df.index[[0]], axis=0, inplace=True)
        
    return df

def X_y_split(df, num_y):
    X = pd.concat((df['Timestep'], df.iloc[:,(3+num_y):]), axis=1)# Train on timestep and lag values
    y = df.iloc[:, 3:(3+num_y)]# Target X1 + X2
    return X, y

def get_results_pretrained(model, X, y, train_samples, fh):
    ts = train_samples
    X_fut = X.iloc[(ts-1):(ts+fh) , :]
    predictions = model.predict(X_fut)
    
    df_X_fut = pd.DataFrame(X_fut, columns=['Timestep'])
    df_y_preds = pd.DataFrame(predictions, columns=y.columns)
    
    df_X_fut.reset_index(drop=True, inplace=True)    
    df_y_preds.reset_index(drop=True, inplace=True)
    
    df_fut = pd.concat([df_X_fut, df_y_preds],axis=1)

    return df_fut
    
num_y = count_y(df) # number of targets model predicts
train_samples = 77  # <-- user choice # number of rows in the data before the target rows (ie XX months)
fh = 12             # <-- user choice # forecast horizon number of future months to blind predict
train_interval = 12 # <-- user choice # model performs best using each training month to inform prediction for 12 months later
df = prepare_dataset(df, num_y, train_samples, train_interval, fh)
X, y = X_y_split(df, num_y)
df_fut = get_results_pretrained(pkl_model, X, y, train_samples, fh)

print('June 2022 - May 2023:')
display(df_fut)

June 2022 - May 2023:


Unnamed: 0,Timestep,A1,B1,C1,D1,E1,F1,G1,H1,I1,...,A2,B2,C2,D2,E2,F2,G2,H2,I2,J2
0,78.0,1704.77417,804.57251,2130.046631,390.480164,392.226868,100.267166,21770.644531,12341.163086,6251.287598,...,1711.361938,207.697327,2850.224365,139.724777,700.687378,17.893074,10088.442383,2405.543213,791.522156,352.084808
1,79.0,1808.200684,738.215332,2130.046631,365.784088,340.679199,109.836723,25146.480469,12036.032227,6071.300293,...,1554.525513,238.618225,2761.366943,135.115173,605.188232,30.01058,7115.916992,2238.628418,612.454407,345.645111
2,80.0,1436.302246,725.18866,2117.963867,387.438293,386.191864,100.936295,18697.460938,11697.129883,4935.188965,...,1314.789307,167.058701,2535.916016,143.572571,754.067444,17.291162,9683.092773,2389.021484,494.133972,251.128845
3,81.0,1530.706299,716.571045,2130.046631,390.130981,431.680939,95.107376,17967.669922,8631.333008,4780.489746,...,1321.965576,188.630051,2821.939209,145.365372,859.664246,17.291162,7489.082031,2473.566406,312.242523,352.319916
4,82.0,1588.990479,737.624268,2130.046631,381.330994,658.211792,87.053566,27199.398438,8389.782227,5621.973633,...,1211.456909,163.557022,2831.188721,139.737091,801.965515,15.463931,7772.947754,2146.727539,599.530151,347.73877
5,83.0,1558.702026,589.368713,1905.091675,401.7948,674.509277,70.417732,13940.449219,8001.390137,5517.349609,...,1262.356445,163.557022,2587.381592,151.942093,961.730164,15.463931,6880.969238,2353.563232,540.694885,319.467163
6,84.0,1509.10498,645.208252,2117.963867,390.518433,632.550903,83.129272,26018.429688,7695.896484,4170.397949,...,1170.041992,143.214417,2608.039062,139.806839,965.093445,16.964367,9932.104492,2222.694336,492.492493,318.372009
7,85.0,1425.406128,640.209167,1549.150635,417.806763,623.141663,96.440613,12067.542969,8001.390137,3625.818604,...,1073.961182,158.555481,2205.697998,125.231735,1058.685059,15.463931,5854.824707,2247.74292,225.735275,248.701309
8,86.0,1511.558472,712.569092,1442.189819,416.994171,669.065308,125.822647,16359.170898,9064.136719,3881.883301,...,1041.027466,163.557022,1955.408447,132.951828,965.093445,16.964367,6923.568359,2253.239502,336.923004,236.778488
9,87.0,1556.413086,733.840576,1905.091675,388.701721,523.200745,84.544266,19116.666016,9121.53125,5400.936523,...,1262.356445,163.557022,2697.340332,137.092621,943.403442,18.328045,9433.152344,2182.547119,970.684448,319.467163


In [4]:
df_fut.to_excel('./Outputs/e_em_xgb_lambda4_junoct22_blind_export.xlsx')