# Regression models: lags, dummies and autoregression

In [None]:
import time
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.linear_model import Ridge, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, rand

import warnings
warnings.filterwarnings("ignore")



## Load data

Two datasets will be loaded:

1. Dataset for the creation of train, validation and test splits with NaNs using expanding window.
2. Baselines results.

In [None]:
# Load CSV data into a DataFrame
trainvalidtest = pd.read_csv('trainvalidtest_ar_v2.csv')

baselines = pd.read_csv('baseline_results_v2.csv')

pd.set_option('display.max_columns', None)

In [None]:
trainvalidtest

Unnamed: 0,Year,Quarter,NCN_sales_est,Order_book_lag_1,Order_book_lag_2,Order_book_lag_3,Order_book_lag_4,Order_book_lag_5,Order_book_lag_6,Order_book_lag_7,Order_book_lag_8,GDP_households_lag_1,GDP_households_lag_2,GDP_households_lag_3,GDP_households_lag_4,GDP_households_lag_5,GDP_households_lag_6,GDP_households_lag_7,GDP_households_lag_8,GDP_government_lag_1,GDP_government_lag_2,GDP_government_lag_3,GDP_government_lag_4,GDP_government_lag_5,GDP_government_lag_6,GDP_government_lag_7,GDP_government_lag_8,GDP_fixed_capital_lag_1,GDP_fixed_capital_lag_2,GDP_fixed_capital_lag_3,GDP_fixed_capital_lag_4,GDP_fixed_capital_lag_5,GDP_fixed_capital_lag_6,GDP_fixed_capital_lag_7,GDP_fixed_capital_lag_8,GDP_export_lag_1,GDP_export_lag_2,GDP_export_lag_3,GDP_export_lag_4,GDP_export_lag_5,GDP_export_lag_6,GDP_export_lag_7,GDP_export_lag_8,GDP_import_lag_1,GDP_import_lag_2,GDP_import_lag_3,GDP_import_lag_4,GDP_import_lag_5,GDP_import_lag_6,GDP_import_lag_7,GDP_import_lag_8,Unemployment_rate_lag_1,Unemployment_rate_lag_2,Unemployment_rate_lag_3,Unemployment_rate_lag_4,Unemployment_rate_lag_5,Unemployment_rate_lag_6,Unemployment_rate_lag_7,Unemployment_rate_lag_8,Employment_rate_lag_1,Employment_rate_lag_2,Employment_rate_lag_3,Employment_rate_lag_4,Employment_rate_lag_5,Employment_rate_lag_6,Employment_rate_lag_7,Employment_rate_lag_8,Gross_monthly_wage_lag_1,Gross_monthly_wage_lag_2,Gross_monthly_wage_lag_3,Gross_monthly_wage_lag_4,Gross_monthly_wage_lag_5,Gross_monthly_wage_lag_6,Gross_monthly_wage_lag_7,Gross_monthly_wage_lag_8,CPI_lag_1,CPI_lag_2,CPI_lag_3,CPI_lag_4,CPI_lag_5,CPI_lag_6,CPI_lag_7,CPI_lag_8,PPI_lag_1,PPI_lag_2,PPI_lag_3,PPI_lag_4,PPI_lag_5,PPI_lag_6,PPI_lag_7,PPI_lag_8,Current_account_balance_lag_1,Current_account_balance_lag_2,Current_account_balance_lag_3,Current_account_balance_lag_4,Current_account_balance_lag_5,Current_account_balance_lag_6,Current_account_balance_lag_7,Current_account_balance_lag_8,Current_account_balance_GDP_lag_1,Current_account_balance_GDP_lag_2,Current_account_balance_GDP_lag_3,Current_account_balance_GDP_lag_4,Current_account_balance_GDP_lag_5,Current_account_balance_GDP_lag_6,Current_account_balance_GDP_lag_7,Current_account_balance_GDP_lag_8,FDI_balance_lag_1,FDI_balance_lag_2,FDI_balance_lag_3,FDI_balance_lag_4,FDI_balance_lag_5,FDI_balance_lag_6,FDI_balance_lag_7,FDI_balance_lag_8,General_gov_net_lag_1,General_gov_net_lag_2,General_gov_net_lag_3,General_gov_net_lag_4,General_gov_net_lag_5,General_gov_net_lag_6,General_gov_net_lag_7,General_gov_net_lag_8,General_gov_budget_balance_lag_1,General_gov_budget_balance_lag_2,General_gov_budget_balance_lag_3,General_gov_budget_balance_lag_4,General_gov_budget_balance_lag_5,General_gov_budget_balance_lag_6,General_gov_budget_balance_lag_7,General_gov_budget_balance_lag_8,General_gov_invest_lag_1,General_gov_invest_lag_2,General_gov_invest_lag_3,General_gov_invest_lag_4,General_gov_invest_lag_5,General_gov_invest_lag_6,General_gov_invest_lag_7,General_gov_invest_lag_8,Conf_industrial_lag_1,Conf_industrial_lag_2,Conf_industrial_lag_3,Conf_industrial_lag_4,Conf_industrial_lag_5,Conf_industrial_lag_6,Conf_industrial_lag_7,Conf_industrial_lag_8,Conf_retail_lag_1,Conf_retail_lag_2,Conf_retail_lag_3,Conf_retail_lag_4,Conf_retail_lag_5,Conf_retail_lag_6,Conf_retail_lag_7,Conf_retail_lag_8,Conf_service_lag_1,Conf_service_lag_2,Conf_service_lag_3,Conf_service_lag_4,Conf_service_lag_5,Conf_service_lag_6,Conf_service_lag_7,Conf_service_lag_8,Conf_consumer_lag_1,Conf_consumer_lag_2,Conf_consumer_lag_3,Conf_consumer_lag_4,Conf_consumer_lag_5,Conf_consumer_lag_6,Conf_consumer_lag_7,Conf_consumer_lag_8,Econ_sent_ind_lag_1,Econ_sent_ind_lag_2,Econ_sent_ind_lag_3,Econ_sent_ind_lag_4,Econ_sent_ind_lag_5,Econ_sent_ind_lag_6,Econ_sent_ind_lag_7,Econ_sent_ind_lag_8,Ind_BSI_lag_1,Ind_BSI_lag_2,Ind_BSI_lag_3,Ind_BSI_lag_4,Ind_BSI_lag_5,Ind_BSI_lag_6,Ind_BSI_lag_7,Ind_BSI_lag_8,Serv_BSI_lag_1,Serv_BSI_lag_2,Serv_BSI_lag_3,Serv_BSI_lag_4,Serv_BSI_lag_5,Serv_BSI_lag_6,Serv_BSI_lag_7,Serv_BSI_lag_8,Retail_volume_idx_lag_1,Retail_volume_idx_lag_2,Retail_volume_idx_lag_3,Retail_volume_idx_lag_4,Retail_volume_idx_lag_5,Retail_volume_idx_lag_6,Retail_volume_idx_lag_7,Retail_volume_idx_lag_8,Industrial_volume_idx_lag_1,Industrial_volume_idx_lag_2,Industrial_volume_idx_lag_3,Industrial_volume_idx_lag_4,Industrial_volume_idx_lag_5,Industrial_volume_idx_lag_6,Industrial_volume_idx_lag_7,Industrial_volume_idx_lag_8,New_car_reg_lag_1,New_car_reg_lag_2,New_car_reg_lag_3,New_car_reg_lag_4,New_car_reg_lag_5,New_car_reg_lag_6,New_car_reg_lag_7,New_car_reg_lag_8,Apt_sales_lag_1,Apt_sales_lag_2,Apt_sales_lag_3,Apt_sales_lag_4,Apt_sales_lag_5,Apt_sales_lag_6,Apt_sales_lag_7,Apt_sales_lag_8,Building_permit_dwellings_lag_1,Building_permit_dwellings_lag_2,Building_permit_dwellings_lag_3,Building_permit_dwellings_lag_4,Building_permit_dwellings_lag_5,Building_permit_dwellings_lag_6,Building_permit_dwellings_lag_7,Building_permit_dwellings_lag_8,Building_permit_nonres_lag_1,Building_permit_nonres_lag_2,Building_permit_nonres_lag_3,Building_permit_nonres_lag_4,Building_permit_nonres_lag_5,Building_permit_nonres_lag_6,Building_permit_nonres_lag_7,Building_permit_nonres_lag_8,Productivity_per_employee_lag_1,Productivity_per_employee_lag_2,Productivity_per_employee_lag_3,Productivity_per_employee_lag_4,Productivity_per_employee_lag_5,Productivity_per_employee_lag_6,Productivity_per_employee_lag_7,Productivity_per_employee_lag_8,Productivity_per_hour_worked_lag_1,Productivity_per_hour_worked_lag_2,Productivity_per_hour_worked_lag_3,Productivity_per_hour_worked_lag_4,Productivity_per_hour_worked_lag_5,Productivity_per_hour_worked_lag_6,Productivity_per_hour_worked_lag_7,Productivity_per_hour_worked_lag_8,Enterprises_total_profit_lag_1,Enterprises_total_profit_lag_2,Enterprises_total_profit_lag_3,Enterprises_total_profit_lag_4,Enterprises_total_profit_lag_5,Enterprises_total_profit_lag_6,Enterprises_total_profit_lag_7,Enterprises_total_profit_lag_8,Enterprises_invest_build_lag_1,Enterprises_invest_build_lag_2,Enterprises_invest_build_lag_3,Enterprises_invest_build_lag_4,Enterprises_invest_build_lag_5,Enterprises_invest_build_lag_6,Enterprises_invest_build_lag_7,Enterprises_invest_build_lag_8,Loans_granted_to_nonfin_co_lag_1,Loans_granted_to_nonfin_co_lag_2,Loans_granted_to_nonfin_co_lag_3,Loans_granted_to_nonfin_co_lag_4,Loans_granted_to_nonfin_co_lag_5,Loans_granted_to_nonfin_co_lag_6,Loans_granted_to_nonfin_co_lag_7,Loans_granted_to_nonfin_co_lag_8,M1_lag_1,M1_lag_2,M1_lag_3,M1_lag_4,M1_lag_5,M1_lag_6,M1_lag_7,M1_lag_8,NCN_sales_est_lagged_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
0,2008,1,40446.0,161482.0,202086.0,174531.0,204326.0,156802.0,140648.0,127027.0,113503.0,2781.0,2593.0,2647.0,2542.0,2639.0,2459.0,2411.0,2234.0,1064.0,791.0,895.0,790.0,987.0,742.0,856.0,752.0,1740.0,1664.0,1655.0,1467.0,1712.0,1636.0,1351.0,1216.0,3052.0,2986.0,3136.0,2651.0,2741.0,2739.0,2723.0,2296.0,3331.0,3311.0,3524.0,3124.0,3238.0,2994.0,2977.0,2555.0,4.1,4.1,4.9,5.3,5.5,5.4,6.2,6.6,62.8,63.5,63.1,62.0,62.4,62.3,62.1,60.6,784.0,697.0,738.0,660.0,653.0,580.0,609.0,549.0,2.98,2.08,1.94,1.75,0.51,1.36,1.46,1.08,1.91,1.51,2.25,2.87,1.82,1.29,0.86,1.63,-501.1,-577.0,-607.7,-751.7,-665.1,-432.1,-446.2,-482.3,-11.4,-13.8,-14.8,-20.3,-18.1,-12.3,-13.3,-16.0,-304.6,-167.0,189.0,-175.4,-121.6,-27.0,77.8,-182.0,-1.0,258.9,145.9,45.6,-65.0,240.4,188.6,28.1,1.4,6.2,3.5,1.2,-1.8,6.9,5.6,0.9,317.5,289.0,253.1,117.0,317.4,221.1,104.8,84.9,5.7,11.4,18.6,21.1,24.2,23.3,18.8,14.6,19.9,23.5,31.8,32.3,32.9,33.0,31.4,27.5,-3.9,2.4,10.7,16.7,18.8,27.9,17.6,28.0,-4.9,1.3,8.0,11.8,10.9,11.1,11.3,9.6,100.0,105.3,112.1,116.1,117.8,119.4,115.0,114.5,34.8,31.6,19.9,23.7,18.0,26.4,36.6,34.6,46.0,38.4,27.2,23.7,21.7,27.4,27.8,23.2,106.1,105.3,103.8,93.5,103.5,96.3,91.6,78.1,84.2,81.4,87.0,78.8,82.2,78.8,81.3,70.4,6697.0,7428.0,9142.0,7649.0,6253.0,6516.0,7373.0,5230.0,4981.0,5370.0,6819.0,7205.0,8432.0,7153.0,6622.0,6970.0,146.3,210.4,371.9,275.8,311.2,364.9,417.3,217.7,287.7,304.5,485.3,241.2,285.2,382.4,315.8,201.8,5.5,1.3,8.9,2.5,5.2,4.2,8.8,-2.3,0.9,6.1,10.9,1.2,0.9,8.9,8.2,-1.4,773602.0,916048.0,920889.0,717347.0,837004.0,858965.0,714053.0,568766.0,386888.0,324305.0,302023.0,253943.0,370855.0,302423.0,310605.0,229217.0,7045.4,6620.3,6163.9,5800.1,5379.8,4827.0,4306.0,3720.8,4873.7,4970.9,5071.7,4851.0,4627.1,4400.8,4217.9,3821.2,29801.0,1,0,0,0
1,2008,2,54576.0,215298.0,161482.0,202086.0,174531.0,204326.0,156802.0,140648.0,127027.0,2535.0,2781.0,2593.0,2647.0,2542.0,2639.0,2459.0,2411.0,818.0,1064.0,791.0,895.0,790.0,987.0,742.0,856.0,1368.0,1740.0,1664.0,1655.0,1467.0,1712.0,1636.0,1351.0,2781.0,3052.0,2986.0,3136.0,2651.0,2741.0,2739.0,2723.0,3074.0,3331.0,3311.0,3524.0,3124.0,3238.0,2994.0,2977.0,4.1,4.1,4.1,4.9,5.3,5.5,5.4,6.2,63.2,62.8,63.5,63.1,62.0,62.4,62.3,62.1,788.0,784.0,697.0,738.0,660.0,653.0,580.0,609.0,3.64,2.98,2.08,1.94,1.75,0.51,1.36,1.46,2.37,1.91,1.51,2.25,2.87,1.82,1.29,0.86,-597.8,-501.1,-577.0,-607.7,-751.7,-665.1,-432.1,-446.2,-15.3,-11.4,-13.8,-14.8,-20.3,-18.1,-12.3,-13.3,-305.3,-304.6,-167.0,189.0,-175.4,-121.6,-27.0,77.8,-137.5,-1.0,258.9,145.9,45.6,-65.0,240.4,188.6,-3.5,1.4,6.2,3.5,1.2,-1.8,6.9,5.6,160.2,317.5,289.0,253.1,117.0,317.4,221.1,104.8,0.9,5.7,11.4,18.6,21.1,24.2,23.3,18.8,10.0,19.9,23.5,31.8,32.3,32.9,33.0,31.4,0.8,-3.9,2.4,10.7,16.7,18.8,27.9,17.6,-8.0,-4.9,1.3,8.0,11.8,10.9,11.1,11.3,97.8,100.0,105.3,112.1,116.1,117.8,119.4,115.0,46.5,34.8,31.6,19.9,23.7,18.0,26.4,36.6,52.8,46.0,38.4,27.2,23.7,21.7,27.4,27.8,92.1,106.1,105.3,103.8,93.5,103.5,96.3,91.6,79.9,84.2,81.4,87.0,78.8,82.2,78.8,81.3,7437.0,6697.0,7428.0,9142.0,7649.0,6253.0,6516.0,7373.0,4498.0,4981.0,5370.0,6819.0,7205.0,8432.0,7153.0,6622.0,125.1,146.3,210.4,371.9,275.8,311.2,364.9,417.3,284.6,287.7,304.5,485.3,241.2,285.2,382.4,315.8,-11.2,5.5,1.3,8.9,2.5,5.2,4.2,8.8,-10.8,0.9,6.1,10.9,1.2,0.9,8.9,8.2,637429.0,773602.0,916048.0,920889.0,717347.0,837004.0,858965.0,714053.0,314285.0,386888.0,324305.0,302023.0,253943.0,370855.0,302423.0,310605.0,7305.4,7045.4,6620.3,6163.9,5800.1,5379.8,4827.0,4306.0,4770.7,4873.7,4970.9,5071.7,4851.0,4627.1,4400.8,4217.9,57462.0,0,1,0,0
2,2008,3,54635.0,204322.0,215298.0,161482.0,202086.0,174531.0,204326.0,156802.0,140648.0,2613.0,2535.0,2781.0,2593.0,2647.0,2542.0,2639.0,2459.0,944.0,818.0,1064.0,791.0,895.0,790.0,987.0,742.0,1449.0,1368.0,1740.0,1664.0,1655.0,1467.0,1712.0,1636.0,3056.0,2781.0,3052.0,2986.0,3136.0,2651.0,2741.0,2739.0,3176.0,3074.0,3331.0,3311.0,3524.0,3124.0,3238.0,2994.0,4.0,4.1,4.1,4.1,4.9,5.3,5.5,5.4,63.0,63.2,62.8,63.5,63.1,62.0,62.4,62.3,850.0,788.0,784.0,697.0,738.0,660.0,653.0,580.0,2.23,3.64,2.98,2.08,1.94,1.75,0.51,1.36,1.35,2.37,1.91,1.51,2.25,2.87,1.82,1.29,-405.1,-597.8,-501.1,-577.0,-607.7,-751.7,-665.1,-432.1,-9.2,-15.3,-11.4,-13.8,-14.8,-20.3,-18.1,-12.3,-3.2,-305.3,-304.6,-167.0,189.0,-175.4,-121.6,-27.0,10.5,-137.5,-1.0,258.9,145.9,45.6,-65.0,240.4,0.2,-3.5,1.4,6.2,3.5,1.2,-1.8,6.9,232.3,160.2,317.5,289.0,253.1,117.0,317.4,221.1,-7.4,0.9,5.7,11.4,18.6,21.1,24.2,23.3,-2.3,10.0,19.9,23.5,31.8,32.3,32.9,33.0,-0.8,0.8,-3.9,2.4,10.7,16.7,18.8,27.9,-10.5,-8.0,-4.9,1.3,8.0,11.8,10.9,11.1,92.6,97.8,100.0,105.3,112.1,116.1,117.8,119.4,53.8,46.5,34.8,31.6,19.9,23.7,18.0,26.4,59.0,52.8,46.0,38.4,27.2,23.7,21.7,27.4,100.5,92.1,106.1,105.3,103.8,93.5,103.5,96.3,85.6,79.9,84.2,81.4,87.0,78.8,82.2,78.8,7766.0,7437.0,6697.0,7428.0,9142.0,7649.0,6253.0,6516.0,4465.0,4498.0,4981.0,5370.0,6819.0,7205.0,8432.0,7153.0,220.8,125.1,146.3,210.4,371.9,275.8,311.2,364.9,338.5,284.6,287.7,304.5,485.3,241.2,285.2,382.4,14.0,-11.2,5.5,1.3,8.9,2.5,5.2,4.2,13.9,-10.8,0.9,6.1,10.9,1.2,0.9,8.9,732708.0,637429.0,773602.0,916048.0,920889.0,717347.0,837004.0,858965.0,286624.0,314285.0,386888.0,324305.0,302023.0,253943.0,370855.0,302423.0,7499.8,7305.4,7045.4,6620.3,6163.9,5800.1,5379.8,4827.0,4913.3,4770.7,4873.7,4970.9,5071.7,4851.0,4627.1,4400.8,63409.0,0,0,1,0
3,2008,4,48859.0,194461.0,204322.0,215298.0,161482.0,202086.0,174531.0,204326.0,156802.0,2448.0,2613.0,2535.0,2781.0,2593.0,2647.0,2542.0,2639.0,836.0,944.0,818.0,1064.0,791.0,895.0,790.0,987.0,1436.0,1449.0,1368.0,1740.0,1664.0,1655.0,1467.0,1712.0,3173.0,3056.0,2781.0,3052.0,2986.0,3136.0,2651.0,2741.0,3208.0,3176.0,3074.0,3331.0,3311.0,3524.0,3124.0,3238.0,6.2,4.0,4.1,4.1,4.1,4.9,5.3,5.5,63.4,63.0,63.2,62.8,63.5,63.1,62.0,62.4,800.0,850.0,788.0,784.0,697.0,738.0,660.0,653.0,1.60,2.23,3.64,2.98,2.08,1.94,1.75,0.51,1.37,1.35,2.37,1.91,1.51,2.25,2.87,1.82,-291.0,-405.1,-597.8,-501.1,-577.0,-607.7,-751.7,-665.1,-6.9,-9.2,-15.3,-11.4,-13.8,-14.8,-20.3,-18.1,-71.7,-3.2,-305.3,-304.6,-167.0,189.0,-175.4,-121.6,0.1,10.5,-137.5,-1.0,258.9,145.9,45.6,-65.0,-3.8,0.2,-3.5,1.4,6.2,3.5,1.2,-1.8,295.7,232.3,160.2,317.5,289.0,253.1,117.0,317.4,-13.9,-7.4,0.9,5.7,11.4,18.6,21.1,24.2,-12.8,-2.3,10.0,19.9,23.5,31.8,32.3,32.9,-9.1,-0.8,0.8,-3.9,2.4,10.7,16.7,18.8,-15.4,-10.5,-8.0,-4.9,1.3,8.0,11.8,10.9,86.7,92.6,97.8,100.0,105.3,112.1,116.1,117.8,60.9,53.8,46.5,34.8,31.6,19.9,23.7,18.0,63.9,59.0,52.8,46.0,38.4,27.2,23.7,21.7,101.8,100.5,92.1,106.1,105.3,103.8,93.5,103.5,80.7,85.6,79.9,84.2,81.4,87.0,78.8,82.2,5615.0,7766.0,7437.0,6697.0,7428.0,9142.0,7649.0,6253.0,4005.0,4465.0,4498.0,4981.0,5370.0,6819.0,7205.0,8432.0,133.7,220.8,125.1,146.3,210.4,371.9,275.8,311.2,381.9,338.5,284.6,287.7,304.5,485.3,241.2,285.2,-4.7,14.0,-11.2,5.5,1.3,8.9,2.5,5.2,-1.1,13.9,-10.8,0.9,6.1,10.9,1.2,0.9,756130.0,732708.0,637429.0,773602.0,916048.0,920889.0,717347.0,837004.0,274107.0,286624.0,314285.0,386888.0,324305.0,302023.0,253943.0,370855.0,7617.8,7499.8,7305.4,7045.4,6620.3,6163.9,5800.1,5379.8,4722.7,4913.3,4770.7,4873.7,4970.9,5071.7,4851.0,4627.1,60470.0,0,0,0,1
4,2009,1,31236.0,141932.0,194461.0,204322.0,215298.0,161482.0,202086.0,174531.0,204326.0,2471.0,2448.0,2613.0,2535.0,2781.0,2593.0,2647.0,2542.0,1104.0,836.0,944.0,818.0,1064.0,791.0,895.0,790.0,1417.0,1436.0,1449.0,1368.0,1740.0,1664.0,1655.0,1467.0,2921.0,3173.0,3056.0,2781.0,3052.0,2986.0,3136.0,2651.0,3014.0,3208.0,3176.0,3074.0,3331.0,3311.0,3524.0,3124.0,7.4,6.2,4.0,4.1,4.1,4.1,4.9,5.3,62.7,63.4,63.0,63.2,62.8,63.5,63.1,62.0,838.0,800.0,850.0,788.0,784.0,697.0,738.0,660.0,0.57,1.60,2.23,3.64,2.98,2.08,1.94,1.75,0.73,1.37,1.35,2.37,1.91,1.51,2.25,2.87,-144.4,-291.0,-405.1,-597.8,-501.1,-577.0,-607.7,-751.7,-3.6,-6.9,-9.2,-15.3,-11.4,-13.8,-14.8,-20.3,-89.0,-71.7,-3.2,-305.3,-304.6,-167.0,189.0,-175.4,-313.0,0.1,10.5,-137.5,-1.0,258.9,145.9,45.6,-7.7,-3.8,0.2,-3.5,1.4,6.2,3.5,1.2,335.1,295.7,232.3,160.2,317.5,289.0,253.1,117.0,-24.6,-13.9,-7.4,0.9,5.7,11.4,18.6,21.1,-27.8,-12.8,-2.3,10.0,19.9,23.5,31.8,32.3,-33.5,-9.1,-0.8,0.8,-3.9,2.4,10.7,16.7,-21.4,-15.4,-10.5,-8.0,-4.9,1.3,8.0,11.8,74.2,86.7,92.6,97.8,100.0,105.3,112.1,116.1,72.5,60.9,53.8,46.5,34.8,31.6,19.9,23.7,67.4,63.9,59.0,52.8,46.0,38.4,27.2,23.7,95.8,101.8,100.5,92.1,106.1,105.3,103.8,93.5,70.4,80.7,85.6,79.9,84.2,81.4,87.0,78.8,3772.0,5615.0,7766.0,7437.0,6697.0,7428.0,9142.0,7649.0,3047.0,4005.0,4465.0,4498.0,4981.0,5370.0,6819.0,7205.0,128.5,133.7,220.8,125.1,146.3,210.4,371.9,275.8,298.7,381.9,338.5,284.6,287.7,304.5,485.3,241.2,-3.2,-4.7,14.0,-11.2,5.5,1.3,8.9,2.5,-3.5,-1.1,13.9,-10.8,0.9,6.1,10.9,1.2,417088.0,756130.0,732708.0,637429.0,773602.0,916048.0,920889.0,717347.0,314748.0,274107.0,286624.0,314285.0,386888.0,324305.0,302023.0,253943.0,7463.8,7617.8,7499.8,7305.4,7045.4,6620.3,6163.9,5800.1,4341.5,4722.7,4913.3,4770.7,4873.7,4970.9,5071.7,4851.0,40446.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,2022,4,79376.0,188847.0,220687.0,251781.0,266856.0,293141.0,269448.0,281431.0,215796.0,3357.0,3349.0,3143.0,3388.0,3402.0,3222.0,2912.0,3135.0,1120.0,1127.0,1091.0,1373.0,1114.0,1122.0,1100.0,1338.0,1953.0,2098.0,1717.0,2107.0,1906.0,2286.0,2140.0,3271.0,5629.0,5755.0,5517.0,6287.0,5445.0,5373.0,4803.0,4964.0,5823.0,6213.0,5731.0,5977.0,5641.0,6127.0,5293.0,6012.0,5.6,5.8,5.5,5.2,5.7,6.9,7.1,7.4,69.5,68.8,69.0,67.6,67.4,65.8,65.9,67.0,1641.0,1667.0,1536.0,1548.0,1463.0,1476.0,1406.0,1515.0,6.13,8.22,4.45,3.17,3.10,1.58,1.19,-0.41,3.41,8.13,8.05,4.89,8.51,3.88,2.31,1.05,-139.7,-487.0,-237.9,498.4,-89.6,-793.6,-423.0,-859.3,-1.5,-5.4,-2.9,5.7,-1.1,-10.4,-6.2,-11.5,-272.5,2452.8,-1633.0,754.4,66.7,-263.6,-1318.3,-960.7,-3.8,203.7,-256.4,-48.3,-56.4,-232.5,-429.5,-431.4,-0.4,2.3,-3.2,-0.6,-0.7,-3.1,-6.2,-5.8,531.9,417.8,292.3,582.4,509.9,403.4,263.4,517.6,-4.3,6.9,12.8,19.2,19.4,14.5,6.1,-1.0,4.2,13.6,15.2,21.2,16.0,5.8,8.3,7.6,-4.4,-4.7,4.8,7.6,9.7,3.2,0.9,-2.9,-37.3,-26.3,-15.7,-10.8,-5.3,-14.7,-15.7,-16.7,87.6,95.6,102.5,107.5,109.6,103.6,99.5,95.4,38.5,22.2,23.9,20.7,30.2,37.0,44.7,60.4,26.0,21.6,23.6,24.6,30.9,37.1,39.9,52.8,133.4,135.1,123.7,140.5,134.4,129.9,112.0,124.9,124.5,138.9,130.8,140.2,131.3,137.4,123.6,125.3,5128.0,5917.0,5240.0,3825.0,5616.0,7281.0,5904.0,4588.0,6629.0,7330.0,6068.0,7978.0,6671.0,6891.0,6185.0,6629.0,149.5,138.3,166.0,189.9,216.1,202.8,183.6,148.5,192.1,256.0,214.5,267.7,305.8,335.4,322.6,362.8,2.8,10.3,-8.4,7.9,2.1,11.0,-6.7,5.1,-0.2,11.8,-8.5,9.2,1.1,9.9,-10.1,1.2,1359730.0,1243513.0,880170.0,1078321.0,947936.0,861687.0,508514.0,563392.0,368238.0,345111.0,238390.0,360505.0,266582.0,261004.0,225775.0,359488.0,9160.7,8764.2,8431.6,8313.7,7906.4,7684.2,7720.4,7604.7,22230.0,22235.0,22103.0,22684.0,21071.0,19847.0,19542.0,18378.0,74396.0,0,0,0,1
60,2023,1,46742.0,149799.0,188847.0,220687.0,251781.0,266856.0,293141.0,269448.0,281431.0,3373.0,3357.0,3349.0,3143.0,3388.0,3402.0,3222.0,2912.0,1373.0,1120.0,1127.0,1091.0,1373.0,1114.0,1122.0,1100.0,2362.0,1953.0,2098.0,1717.0,2107.0,1906.0,2286.0,2140.0,5666.0,5629.0,5755.0,5517.0,6287.0,5445.0,5373.0,4803.0,6008.0,5823.0,6213.0,5731.0,5977.0,5641.0,6127.0,5293.0,5.4,5.6,5.8,5.5,5.2,5.7,6.9,7.1,69.6,69.5,68.8,69.0,67.6,67.4,65.8,65.9,1735.0,1641.0,1667.0,1536.0,1548.0,1463.0,1476.0,1406.0,0.39,6.13,8.22,4.45,3.17,3.10,1.58,1.19,-0.06,3.41,8.13,8.05,4.89,8.51,3.88,2.31,-298.8,-139.7,-487.0,-237.9,498.4,-89.6,-793.6,-423.0,-3.1,-1.5,-5.4,-2.9,5.7,-1.1,-10.4,-6.2,-615.1,-272.5,2452.8,-1633.0,754.4,66.7,-263.6,-1318.3,-292.1,-3.8,203.7,-256.4,-48.3,-56.4,-232.5,-429.5,-3.0,-0.4,2.3,-3.2,-0.6,-0.7,-3.1,-6.2,608.8,531.9,417.8,292.3,582.4,509.9,403.4,263.4,-11.3,-4.3,6.9,12.8,19.2,19.4,14.5,6.1,-3.9,4.2,13.6,15.2,21.2,16.0,5.8,8.3,-11.4,-4.4,-4.7,4.8,7.6,9.7,3.2,0.9,-35.4,-37.3,-26.3,-15.7,-10.8,-5.3,-14.7,-15.7,83.5,87.6,95.6,102.5,107.5,109.6,103.6,99.5,57.6,38.5,22.2,23.9,20.7,30.2,37.0,44.7,40.7,26.0,21.6,23.6,24.6,30.9,37.1,39.9,134.4,133.4,135.1,123.7,140.5,134.4,129.9,112.0,123.8,124.5,138.9,130.8,140.2,131.3,137.4,123.6,4276.0,5128.0,5917.0,5240.0,3825.0,5616.0,7281.0,5904.0,5443.0,6629.0,7330.0,6068.0,7978.0,6671.0,6891.0,6185.0,168.6,149.5,138.3,166.0,189.9,216.1,202.8,183.6,293.0,192.1,256.0,214.5,267.7,305.8,335.4,322.6,3.0,2.8,10.3,-8.4,7.9,2.1,11.0,-6.7,4.0,-0.2,11.8,-8.5,9.2,1.1,9.9,-10.1,1107262.0,1359730.0,1243513.0,880170.0,1078321.0,947936.0,861687.0,508514.0,463394.0,368238.0,345111.0,238390.0,360505.0,266582.0,261004.0,225775.0,9373.4,9160.7,8764.2,8431.6,8313.7,7906.4,7684.2,7720.4,21520.0,22230.0,22235.0,22103.0,22684.0,21071.0,19847.0,19542.0,64830.0,1,0,0,0
61,2023,2,76021.0,199947.0,149799.0,188847.0,220687.0,251781.0,266856.0,293141.0,269448.0,3110.0,3373.0,3357.0,3349.0,3143.0,3388.0,3402.0,3222.0,1094.0,1373.0,1120.0,1127.0,1091.0,1373.0,1114.0,1122.0,1594.0,2362.0,1953.0,2098.0,1717.0,2107.0,1906.0,2286.0,5185.0,5666.0,5629.0,5755.0,5517.0,6287.0,5445.0,5373.0,5448.0,6008.0,5823.0,6213.0,5731.0,5977.0,5641.0,6127.0,5.3,5.4,5.6,5.8,5.5,5.2,5.7,6.9,69.4,69.6,69.5,68.8,69.0,67.6,67.4,65.8,1741.0,1735.0,1641.0,1667.0,1536.0,1548.0,1463.0,1476.0,1.57,0.39,6.13,8.22,4.45,3.17,3.10,1.58,-0.94,-0.06,3.41,8.13,8.05,4.89,8.51,3.88,-76.9,-298.8,-139.7,-487.0,-237.9,498.4,-89.6,-793.6,-0.9,-3.1,-1.5,-5.4,-2.9,5.7,-1.1,-10.4,-615.1,-615.1,-272.5,2452.8,-1633.0,754.4,66.7,-263.6,-382.7,-292.1,-3.8,203.7,-256.4,-48.3,-56.4,-232.5,-4.3,-3.0,-0.4,2.3,-3.2,-0.6,-0.7,-3.1,361.0,608.8,531.9,417.8,292.3,582.4,509.9,403.4,-13.6,-11.3,-4.3,6.9,12.8,19.2,19.4,14.5,-1.9,-3.9,4.2,13.6,15.2,21.2,16.0,5.8,-11.8,-11.4,-4.4,-4.7,4.8,7.6,9.7,3.2,-28.7,-35.4,-37.3,-26.3,-15.7,-10.8,-5.3,-14.7,83.7,83.5,87.6,95.6,102.5,107.5,109.6,103.6,59.6,57.6,38.5,22.2,23.9,20.7,30.2,37.0,48.1,40.7,26.0,21.6,23.6,24.6,30.9,37.1,112.6,134.4,133.4,135.1,123.7,140.5,134.4,129.9,119.7,123.8,124.5,138.9,130.8,140.2,131.3,137.4,5726.0,4276.0,5128.0,5917.0,5240.0,3825.0,5616.0,7281.0,4717.0,5443.0,6629.0,7330.0,6068.0,7978.0,6671.0,6891.0,127.4,168.6,149.5,138.3,166.0,189.9,216.1,202.8,250.1,293.0,192.1,256.0,214.5,267.7,305.8,335.4,-11.3,3.0,2.8,10.3,-8.4,7.9,2.1,11.0,-9.4,4.0,-0.2,11.8,-8.5,9.2,1.1,9.9,977586.0,1107262.0,1359730.0,1243513.0,880170.0,1078321.0,947936.0,861687.0,415832.0,463394.0,368238.0,345111.0,238390.0,360505.0,266582.0,261004.0,9303.6,9373.4,9160.7,8764.2,8431.6,8313.7,7906.4,7684.2,20532.0,21520.0,22230.0,22235.0,22103.0,22684.0,21071.0,19847.0,79817.0,0,1,0,0
62,2023,3,75548.0,256328.0,199947.0,149799.0,188847.0,220687.0,251781.0,266856.0,293141.0,3248.0,3110.0,3373.0,3357.0,3349.0,3143.0,3388.0,3402.0,1147.0,1094.0,1373.0,1120.0,1127.0,1091.0,1373.0,1114.0,1707.0,1594.0,2362.0,1953.0,2098.0,1717.0,2107.0,1906.0,5445.0,5185.0,5666.0,5629.0,5755.0,5517.0,6287.0,5445.0,5690.0,5448.0,6008.0,5823.0,6213.0,5731.0,5977.0,5641.0,6.7,5.3,5.4,5.6,5.8,5.5,5.2,5.7,69.2,69.4,69.6,69.5,68.8,69.0,67.6,67.4,1872.0,1741.0,1735.0,1641.0,1667.0,1536.0,1548.0,1463.0,2.84,1.57,0.39,6.13,8.22,4.45,3.17,3.10,-1.36,-0.94,-0.06,3.41,8.13,8.05,4.89,8.51,-283.9,-76.9,-298.8,-139.7,-487.0,-237.9,498.4,-89.6,-3.0,-0.9,-3.1,-1.5,-5.4,-2.9,5.7,-1.1,-219.4,-615.1,-615.1,-272.5,2452.8,-1633.0,754.4,66.7,35.9,-382.7,-292.1,-3.8,203.7,-256.4,-48.3,-56.4,0.4,-4.3,-3.0,-0.4,2.3,-3.2,-0.6,-0.7,487.8,361.0,608.8,531.9,417.8,292.3,582.4,509.9,-18.1,-13.6,-11.3,-4.3,6.9,12.8,19.2,19.4,-2.4,-1.9,-3.9,4.2,13.6,15.2,21.2,16.0,-12.5,-11.8,-11.4,-4.4,-4.7,4.8,7.6,9.7,-28.8,-28.7,-35.4,-37.3,-26.3,-15.7,-10.8,-5.3,81.9,83.7,83.5,87.6,95.6,102.5,107.5,109.6,68.2,59.6,57.6,38.5,22.2,23.9,20.7,30.2,49.0,48.1,40.7,26.0,21.6,23.6,24.6,30.9,122.4,112.6,134.4,133.4,135.1,123.7,140.5,134.4,119.2,119.7,123.8,124.5,138.9,130.8,140.2,131.3,6511.0,5726.0,4276.0,5128.0,5917.0,5240.0,3825.0,5616.0,5894.0,4717.0,5443.0,6629.0,7330.0,6068.0,7978.0,6671.0,109.4,127.4,168.6,149.5,138.3,166.0,189.9,216.1,245.6,250.1,293.0,192.1,256.0,214.5,267.7,305.8,8.1,-11.3,3.0,2.8,10.3,-8.4,7.9,2.1,8.9,-9.4,4.0,-0.2,11.8,-8.5,9.2,1.1,1280858.0,977586.0,1107262.0,1359730.0,1243513.0,880170.0,1078321.0,947936.0,379544.0,415832.0,463394.0,368238.0,345111.0,238390.0,360505.0,266582.0,9445.5,9303.6,9373.4,9160.7,8764.2,8431.6,8313.7,7906.4,19337.0,20532.0,21520.0,22230.0,22235.0,22103.0,22684.0,21071.0,85235.0,0,0,1,0


## Create dictionaries with train and validation splits, and train and test splits respectively

### Train splits

Train split length for validation and test are different, as in case of test, train split will cover also validation split from the same fold.

In [None]:
# Create train splits (separately as it does not include NaNs)
splits_train_val = {'train': [], 'val': []}

for idx in range(len(trainvalidtest) - 16, len(trainvalidtest), 1):
    train_data = trainvalidtest.loc[:idx, :]
    valid_data = trainvalidtest.loc[idx + 1: idx + 4, :]

    train_quarters = list(zip(train_data['Year'], train_data['Quarter']))
    valid_quarters = list(zip(valid_data['Year'], valid_data['Quarter']))

    print(f'TRAIN: {train_quarters}, VALIDATION: {valid_quarters}')

    splits_train_val['train'].append(train_data)
    splits_train_val['val'].append(valid_data)

    if idx + 8 >= len(trainvalidtest) - 1:
        break

TRAIN: [(2008, 1), (2008, 2), (2008, 3), (2008, 4), (2009, 1), (2009, 2), (2009, 3), (2009, 4), (2010, 1), (2010, 2), (2010, 3), (2010, 4), (2011, 1), (2011, 2), (2011, 3), (2011, 4), (2012, 1), (2012, 2), (2012, 3), (2012, 4), (2013, 1), (2013, 2), (2013, 3), (2013, 4), (2014, 1), (2014, 2), (2014, 3), (2014, 4), (2015, 1), (2015, 2), (2015, 3), (2015, 4), (2016, 1), (2016, 2), (2016, 3), (2016, 4), (2017, 1), (2017, 2), (2017, 3), (2017, 4), (2018, 1), (2018, 2), (2018, 3), (2018, 4), (2019, 1), (2019, 2), (2019, 3), (2019, 4), (2020, 1)], VALIDATION: [(2020, 2), (2020, 3), (2020, 4), (2021, 1)]
TRAIN: [(2008, 1), (2008, 2), (2008, 3), (2008, 4), (2009, 1), (2009, 2), (2009, 3), (2009, 4), (2010, 1), (2010, 2), (2010, 3), (2010, 4), (2011, 1), (2011, 2), (2011, 3), (2011, 4), (2012, 1), (2012, 2), (2012, 3), (2012, 4), (2013, 1), (2013, 2), (2013, 3), (2013, 4), (2014, 1), (2014, 2), (2014, 3), (2014, 4), (2015, 1), (2015, 2), (2015, 3), (2015, 4), (2016, 1), (2016, 2), (2016, 3), (2

In [None]:
# Create train splits (separately as it does not include NaNs)
splits_train_test = {'train': [], 'test': []}

for idx in range(len(trainvalidtest) - 12, len(trainvalidtest), 1):
    train_data = trainvalidtest.loc[:idx, :]
    test_data = trainvalidtest.loc[idx + 1: idx + 4, :]

    train_quarters = list(zip(train_data['Year'], train_data['Quarter']))
    test_quarters = list(zip(test_data['Year'], test_data['Quarter']))

    print(f'TRAIN: {train_quarters}, TEST: {test_quarters}')

    splits_train_test['train'].append(train_data)
    splits_train_test['test'].append(test_data)

    if idx + 4 >= len(trainvalidtest) - 1:
        break

TRAIN: [(2008, 1), (2008, 2), (2008, 3), (2008, 4), (2009, 1), (2009, 2), (2009, 3), (2009, 4), (2010, 1), (2010, 2), (2010, 3), (2010, 4), (2011, 1), (2011, 2), (2011, 3), (2011, 4), (2012, 1), (2012, 2), (2012, 3), (2012, 4), (2013, 1), (2013, 2), (2013, 3), (2013, 4), (2014, 1), (2014, 2), (2014, 3), (2014, 4), (2015, 1), (2015, 2), (2015, 3), (2015, 4), (2016, 1), (2016, 2), (2016, 3), (2016, 4), (2017, 1), (2017, 2), (2017, 3), (2017, 4), (2018, 1), (2018, 2), (2018, 3), (2018, 4), (2019, 1), (2019, 2), (2019, 3), (2019, 4), (2020, 1), (2020, 2), (2020, 3), (2020, 4), (2021, 1)], TEST: [(2021, 2), (2021, 3), (2021, 4), (2022, 1)]
TRAIN: [(2008, 1), (2008, 2), (2008, 3), (2008, 4), (2009, 1), (2009, 2), (2009, 3), (2009, 4), (2010, 1), (2010, 2), (2010, 3), (2010, 4), (2011, 1), (2011, 2), (2011, 3), (2011, 4), (2012, 1), (2012, 2), (2012, 3), (2012, 4), (2013, 1), (2013, 2), (2013, 3), (2013, 4), (2014, 1), (2014, 2), (2014, 3), (2014, 4), (2015, 1), (2015, 2), (2015, 3), (2015, 4

### Validation and test splits with NaNs

In [None]:
# New validation dictionary
val_nan = {'val_nan': []}

# Identify columns with 'lag_X' in their names
lag_columns = [col for col in trainvalidtest.columns if 'lag_' in col]

for idx in range(len(trainvalidtest) - 16, len(trainvalidtest), 1):
    valid_data_nan = trainvalidtest.loc[idx + 1: idx + 4, :].copy()  # Create a copy
    valid_quarters_nan = list(zip(valid_data_nan['Year'], valid_data_nan['Quarter']))

    print(f'VALIDATION: {valid_quarters_nan}')

    # Update validation data with NaNs in lag columns using boolean indexing
    for col in lag_columns:
        lag = int(col.split('_')[-1])  # Extract lag index from column name
        nan_rows = len(valid_data_nan) - lag
        if lag <= 3:
            valid_data_nan.loc[valid_data_nan.index[-nan_rows:], col] = np.nan
        else:
            valid_data_nan[col] = valid_data_nan[col]

    # Store data in the splits_nan dictionary
    val_nan['val_nan'].append(valid_data_nan)

    if idx + 8 >= len(trainvalidtest) - 1:
        break

VALIDATION: [(2020, 2), (2020, 3), (2020, 4), (2021, 1)]
VALIDATION: [(2020, 3), (2020, 4), (2021, 1), (2021, 2)]
VALIDATION: [(2020, 4), (2021, 1), (2021, 2), (2021, 3)]
VALIDATION: [(2021, 1), (2021, 2), (2021, 3), (2021, 4)]
VALIDATION: [(2021, 2), (2021, 3), (2021, 4), (2022, 1)]
VALIDATION: [(2021, 3), (2021, 4), (2022, 1), (2022, 2)]
VALIDATION: [(2021, 4), (2022, 1), (2022, 2), (2022, 3)]
VALIDATION: [(2022, 1), (2022, 2), (2022, 3), (2022, 4)]


In [None]:
# New test dictionary
test_nan = {'test_nan': []}

# Identify columns with 'lag_X' in their names
lag_columns = [col for col in trainvalidtest.columns if 'lag_' in col]

for idx in range(len(trainvalidtest) - 16, len(trainvalidtest), 1):
    test_data_nan = trainvalidtest.loc[idx + 5: idx + 8, :].copy()
    test_quarters_nan = list(zip(test_data_nan['Year'], test_data_nan['Quarter']))

    print(f'TEST: {test_quarters_nan}')

    # Update test data with NaNs in lag columns using boolean indexing
    for col in lag_columns:
        lag = int(col.split('_')[-1])  # Extract lag index from column name
        nan_rows = len(test_data_nan) - lag
        if lag <= 3:
            test_data_nan.loc[test_data_nan.index[-nan_rows:], col] = np.nan
        else:
            test_data_nan[col] = test_data_nan[col]

    # Store data in the splits_nan dictionary
    test_nan['test_nan'].append(test_data_nan)

    if idx + 8 >= len(trainvalidtest) - 1:
        break

TEST: [(2021, 2), (2021, 3), (2021, 4), (2022, 1)]
TEST: [(2021, 3), (2021, 4), (2022, 1), (2022, 2)]
TEST: [(2021, 4), (2022, 1), (2022, 2), (2022, 3)]
TEST: [(2022, 1), (2022, 2), (2022, 3), (2022, 4)]
TEST: [(2022, 2), (2022, 3), (2022, 4), (2023, 1)]
TEST: [(2022, 3), (2022, 4), (2023, 1), (2023, 2)]
TEST: [(2022, 4), (2023, 1), (2023, 2), (2023, 3)]
TEST: [(2023, 1), (2023, 2), (2023, 3), (2023, 4)]


In [None]:
# New dictionary combining train and val_nan
splits_val = {'train': splits_train_val['train'], 'val': val_nan['val_nan']}

In [None]:
splits_val['train'][0].tail()

Unnamed: 0,Year,Quarter,NCN_sales_est,Order_book_lag_1,Order_book_lag_2,Order_book_lag_3,Order_book_lag_4,Order_book_lag_5,Order_book_lag_6,Order_book_lag_7,Order_book_lag_8,GDP_households_lag_1,GDP_households_lag_2,GDP_households_lag_3,GDP_households_lag_4,GDP_households_lag_5,GDP_households_lag_6,GDP_households_lag_7,GDP_households_lag_8,GDP_government_lag_1,GDP_government_lag_2,GDP_government_lag_3,GDP_government_lag_4,GDP_government_lag_5,GDP_government_lag_6,GDP_government_lag_7,GDP_government_lag_8,GDP_fixed_capital_lag_1,GDP_fixed_capital_lag_2,GDP_fixed_capital_lag_3,GDP_fixed_capital_lag_4,GDP_fixed_capital_lag_5,GDP_fixed_capital_lag_6,GDP_fixed_capital_lag_7,GDP_fixed_capital_lag_8,GDP_export_lag_1,GDP_export_lag_2,GDP_export_lag_3,GDP_export_lag_4,GDP_export_lag_5,GDP_export_lag_6,GDP_export_lag_7,GDP_export_lag_8,GDP_import_lag_1,GDP_import_lag_2,GDP_import_lag_3,GDP_import_lag_4,GDP_import_lag_5,GDP_import_lag_6,GDP_import_lag_7,GDP_import_lag_8,Unemployment_rate_lag_1,Unemployment_rate_lag_2,Unemployment_rate_lag_3,Unemployment_rate_lag_4,Unemployment_rate_lag_5,Unemployment_rate_lag_6,Unemployment_rate_lag_7,Unemployment_rate_lag_8,Employment_rate_lag_1,Employment_rate_lag_2,Employment_rate_lag_3,Employment_rate_lag_4,Employment_rate_lag_5,Employment_rate_lag_6,Employment_rate_lag_7,Employment_rate_lag_8,Gross_monthly_wage_lag_1,Gross_monthly_wage_lag_2,Gross_monthly_wage_lag_3,Gross_monthly_wage_lag_4,Gross_monthly_wage_lag_5,Gross_monthly_wage_lag_6,Gross_monthly_wage_lag_7,Gross_monthly_wage_lag_8,CPI_lag_1,CPI_lag_2,CPI_lag_3,CPI_lag_4,CPI_lag_5,CPI_lag_6,CPI_lag_7,CPI_lag_8,PPI_lag_1,PPI_lag_2,PPI_lag_3,PPI_lag_4,PPI_lag_5,PPI_lag_6,PPI_lag_7,PPI_lag_8,Current_account_balance_lag_1,Current_account_balance_lag_2,Current_account_balance_lag_3,Current_account_balance_lag_4,Current_account_balance_lag_5,Current_account_balance_lag_6,Current_account_balance_lag_7,Current_account_balance_lag_8,Current_account_balance_GDP_lag_1,Current_account_balance_GDP_lag_2,Current_account_balance_GDP_lag_3,Current_account_balance_GDP_lag_4,Current_account_balance_GDP_lag_5,Current_account_balance_GDP_lag_6,Current_account_balance_GDP_lag_7,Current_account_balance_GDP_lag_8,FDI_balance_lag_1,FDI_balance_lag_2,FDI_balance_lag_3,FDI_balance_lag_4,FDI_balance_lag_5,FDI_balance_lag_6,FDI_balance_lag_7,FDI_balance_lag_8,General_gov_net_lag_1,General_gov_net_lag_2,General_gov_net_lag_3,General_gov_net_lag_4,General_gov_net_lag_5,General_gov_net_lag_6,General_gov_net_lag_7,General_gov_net_lag_8,General_gov_budget_balance_lag_1,General_gov_budget_balance_lag_2,General_gov_budget_balance_lag_3,General_gov_budget_balance_lag_4,General_gov_budget_balance_lag_5,General_gov_budget_balance_lag_6,General_gov_budget_balance_lag_7,General_gov_budget_balance_lag_8,General_gov_invest_lag_1,General_gov_invest_lag_2,General_gov_invest_lag_3,General_gov_invest_lag_4,General_gov_invest_lag_5,General_gov_invest_lag_6,General_gov_invest_lag_7,General_gov_invest_lag_8,Conf_industrial_lag_1,Conf_industrial_lag_2,Conf_industrial_lag_3,Conf_industrial_lag_4,Conf_industrial_lag_5,Conf_industrial_lag_6,Conf_industrial_lag_7,Conf_industrial_lag_8,Conf_retail_lag_1,Conf_retail_lag_2,Conf_retail_lag_3,Conf_retail_lag_4,Conf_retail_lag_5,Conf_retail_lag_6,Conf_retail_lag_7,Conf_retail_lag_8,Conf_service_lag_1,Conf_service_lag_2,Conf_service_lag_3,Conf_service_lag_4,Conf_service_lag_5,Conf_service_lag_6,Conf_service_lag_7,Conf_service_lag_8,Conf_consumer_lag_1,Conf_consumer_lag_2,Conf_consumer_lag_3,Conf_consumer_lag_4,Conf_consumer_lag_5,Conf_consumer_lag_6,Conf_consumer_lag_7,Conf_consumer_lag_8,Econ_sent_ind_lag_1,Econ_sent_ind_lag_2,Econ_sent_ind_lag_3,Econ_sent_ind_lag_4,Econ_sent_ind_lag_5,Econ_sent_ind_lag_6,Econ_sent_ind_lag_7,Econ_sent_ind_lag_8,Ind_BSI_lag_1,Ind_BSI_lag_2,Ind_BSI_lag_3,Ind_BSI_lag_4,Ind_BSI_lag_5,Ind_BSI_lag_6,Ind_BSI_lag_7,Ind_BSI_lag_8,Serv_BSI_lag_1,Serv_BSI_lag_2,Serv_BSI_lag_3,Serv_BSI_lag_4,Serv_BSI_lag_5,Serv_BSI_lag_6,Serv_BSI_lag_7,Serv_BSI_lag_8,Retail_volume_idx_lag_1,Retail_volume_idx_lag_2,Retail_volume_idx_lag_3,Retail_volume_idx_lag_4,Retail_volume_idx_lag_5,Retail_volume_idx_lag_6,Retail_volume_idx_lag_7,Retail_volume_idx_lag_8,Industrial_volume_idx_lag_1,Industrial_volume_idx_lag_2,Industrial_volume_idx_lag_3,Industrial_volume_idx_lag_4,Industrial_volume_idx_lag_5,Industrial_volume_idx_lag_6,Industrial_volume_idx_lag_7,Industrial_volume_idx_lag_8,New_car_reg_lag_1,New_car_reg_lag_2,New_car_reg_lag_3,New_car_reg_lag_4,New_car_reg_lag_5,New_car_reg_lag_6,New_car_reg_lag_7,New_car_reg_lag_8,Apt_sales_lag_1,Apt_sales_lag_2,Apt_sales_lag_3,Apt_sales_lag_4,Apt_sales_lag_5,Apt_sales_lag_6,Apt_sales_lag_7,Apt_sales_lag_8,Building_permit_dwellings_lag_1,Building_permit_dwellings_lag_2,Building_permit_dwellings_lag_3,Building_permit_dwellings_lag_4,Building_permit_dwellings_lag_5,Building_permit_dwellings_lag_6,Building_permit_dwellings_lag_7,Building_permit_dwellings_lag_8,Building_permit_nonres_lag_1,Building_permit_nonres_lag_2,Building_permit_nonres_lag_3,Building_permit_nonres_lag_4,Building_permit_nonres_lag_5,Building_permit_nonres_lag_6,Building_permit_nonres_lag_7,Building_permit_nonres_lag_8,Productivity_per_employee_lag_1,Productivity_per_employee_lag_2,Productivity_per_employee_lag_3,Productivity_per_employee_lag_4,Productivity_per_employee_lag_5,Productivity_per_employee_lag_6,Productivity_per_employee_lag_7,Productivity_per_employee_lag_8,Productivity_per_hour_worked_lag_1,Productivity_per_hour_worked_lag_2,Productivity_per_hour_worked_lag_3,Productivity_per_hour_worked_lag_4,Productivity_per_hour_worked_lag_5,Productivity_per_hour_worked_lag_6,Productivity_per_hour_worked_lag_7,Productivity_per_hour_worked_lag_8,Enterprises_total_profit_lag_1,Enterprises_total_profit_lag_2,Enterprises_total_profit_lag_3,Enterprises_total_profit_lag_4,Enterprises_total_profit_lag_5,Enterprises_total_profit_lag_6,Enterprises_total_profit_lag_7,Enterprises_total_profit_lag_8,Enterprises_invest_build_lag_1,Enterprises_invest_build_lag_2,Enterprises_invest_build_lag_3,Enterprises_invest_build_lag_4,Enterprises_invest_build_lag_5,Enterprises_invest_build_lag_6,Enterprises_invest_build_lag_7,Enterprises_invest_build_lag_8,Loans_granted_to_nonfin_co_lag_1,Loans_granted_to_nonfin_co_lag_2,Loans_granted_to_nonfin_co_lag_3,Loans_granted_to_nonfin_co_lag_4,Loans_granted_to_nonfin_co_lag_5,Loans_granted_to_nonfin_co_lag_6,Loans_granted_to_nonfin_co_lag_7,Loans_granted_to_nonfin_co_lag_8,M1_lag_1,M1_lag_2,M1_lag_3,M1_lag_4,M1_lag_5,M1_lag_6,M1_lag_7,M1_lag_8,NCN_sales_est_lagged_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
44,2019,1,31421.0,100352.0,131953.0,131552.0,143589.0,144122.0,142553.0,130601.0,130109.0,3009.0,2897.0,2894.0,2689.0,2870.0,2732.0,2708.0,2657.0,1275.0,974.0,1042.0,976.0,1221.0,978.0,1046.0,976.0,1984.0,1659.0,1613.0,1432.0,1592.0,1475.0,1599.0,1387.0,4656.0,4487.0,4705.0,4217.0,4516.0,4448.0,4466.0,4126.0,4726.0,4429.0,4551.0,4085.0,4348.0,4191.0,4257.0,3998.0,4.4,5.2,5.1,6.8,5.3,5.2,7.0,5.6,69.1,68.2,68.2,66.6,68.4,68.3,66.9,66.3,1384.0,1291.0,1321.0,1242.0,1271.0,1201.0,1242.0,1153.0,0.26,1.18,1.22,1.02,0.11,0.93,1.02,1.71,-0.49,1.74,-0.31,1.03,0.7,0.87,0.41,1.24,-1.2,41.7,137.9,48.9,124.1,195.9,104.2,115.3,1.4,0.6,2.1,0.8,1.9,3.3,1.7,2.1,-381.2,-183.6,-741.1,58.9,-228.1,-479.9,-179.3,-51.9,-240.5,58.8,86.9,-48.7,-41.8,40.1,-21.0,-90.5,-3.4,0.9,1.3,-0.8,-0.7,0.7,-0.3,-1.7,416.4,417.3,340.2,198.3,410.6,388.5,363.4,192.9,4.7,6.5,7.3,7.3,8.6,9.0,8.6,5.7,9.7,10.2,9.1,11.5,10.4,11.0,13.7,9.1,9.9,8.1,6.9,17.9,16.6,13.4,12.2,9.7,-1.5,0.5,-1.5,-2.5,-3.2,-2.4,-2.1,-1.4,102.9,103.6,103.0,105.4,105.6,105.5,105.6,103.3,51.6,43.7,44.9,44.4,42.9,46.5,42.9,56.6,30.1,29.2,35.5,29.0,35.1,39.0,45.9,44.9,112.8,110.4,108.1,96.7,110.4,108.7,108.4,96.2,117.2,107.6,115.5,106.5,109.9,103.8,110.1,102.3,5172.0,6555.0,7753.0,6830.0,5791.0,6123.0,7665.0,6046.0,5888.0,5551.0,5937.0,5278.0,5995.0,5823.0,5508.0,5513.0,179.7,149.3,158.3,132.9,194.5,163.0,190.3,149.0,252.9,330.0,252.6,224.5,375.1,215.2,342.6,438.7,5.7,1.5,7.4,-5.2,6.3,-3.2,10.4,-7.6,-0.5,11.1,6.3,-3.5,1.4,1.3,13.4,-9.3,780549.0,808620.0,716592.0,598407.0,704739.0,726242.0,827002.0,612323.0,346471.0,249839.0,193602.0,217113.0,278915.0,158499.0,202304.0,170916.0,7330.1,7220.2,7207.1,7226.8,7078.1,6933.4,7582.4,7595.2,13513.0,12792.0,13112.0,12344.0,12321.0,11547.0,11539.0,11239.0,40199.0,1,0,0,0
45,2019,2,61601.0,170509.0,100352.0,131953.0,131552.0,143589.0,144122.0,142553.0,130601.0,2871.0,3009.0,2897.0,2894.0,2689.0,2870.0,2732.0,2708.0,1034.0,1275.0,974.0,1042.0,976.0,1221.0,978.0,1046.0,1488.0,1984.0,1659.0,1613.0,1432.0,1592.0,1475.0,1599.0,4487.0,4656.0,4487.0,4705.0,4217.0,4516.0,4448.0,4466.0,4304.0,4726.0,4429.0,4551.0,4085.0,4348.0,4191.0,4257.0,4.7,4.4,5.2,5.1,6.8,5.3,5.2,7.0,67.5,69.1,68.2,68.2,66.6,68.4,68.3,66.9,1341.0,1384.0,1291.0,1321.0,1242.0,1271.0,1201.0,1242.0,-0.36,0.26,1.18,1.22,1.02,0.11,0.93,1.02,-0.49,-0.49,1.74,-0.31,1.03,0.7,0.87,0.41,134.0,-1.2,41.7,137.9,48.9,124.1,195.9,104.2,2.1,1.4,0.6,2.1,0.8,1.9,3.3,1.7,-151.4,-381.2,-183.6,-741.1,58.9,-228.1,-479.9,-179.3,-125.0,-240.5,58.8,86.9,-48.7,-41.8,40.1,-21.0,-1.9,-3.4,0.9,1.3,-0.8,-0.7,0.7,-0.3,187.1,416.4,417.3,340.2,198.3,410.6,388.5,363.4,-0.3,4.7,6.5,7.3,7.3,8.6,9.0,8.6,8.2,9.7,10.2,9.1,11.5,10.4,11.0,13.7,7.7,9.9,8.1,6.9,17.9,16.6,13.4,12.2,-0.1,-1.5,0.5,-1.5,-2.5,-3.2,-2.4,-2.1,100.3,102.9,103.6,103.0,105.4,105.6,105.5,105.6,50.7,51.6,43.7,44.9,44.4,42.9,46.5,42.9,33.7,30.1,29.2,35.5,29.0,35.1,39.0,45.9,102.0,112.8,110.4,108.1,96.7,110.4,108.7,108.4,121.0,117.2,107.6,115.5,106.5,109.9,103.8,110.1,6506.0,5172.0,6555.0,7753.0,6830.0,5791.0,6123.0,7665.0,5319.0,5888.0,5551.0,5937.0,5278.0,5995.0,5823.0,5508.0,131.3,179.7,149.3,158.3,132.9,194.5,163.0,190.3,250.4,252.9,330.0,252.6,224.5,375.1,215.2,342.6,-6.7,5.7,1.5,7.4,-5.2,6.3,-3.2,10.4,-8.4,-0.5,11.1,6.3,-3.5,1.4,1.3,13.4,535522.0,780549.0,808620.0,716592.0,598407.0,704739.0,726242.0,827002.0,259694.0,346471.0,249839.0,193602.0,217113.0,278915.0,158499.0,202304.0,7440.7,7330.1,7220.2,7207.1,7226.8,7078.1,6933.4,7582.4,13819.0,13513.0,12792.0,13112.0,12344.0,12321.0,11547.0,11539.0,57832.0,0,1,0,0
46,2019,3,66551.0,179691.0,170509.0,100352.0,131953.0,131552.0,143589.0,144122.0,142553.0,3016.0,2871.0,3009.0,2897.0,2894.0,2689.0,2870.0,2732.0,1058.0,1034.0,1275.0,974.0,1042.0,976.0,1221.0,978.0,1855.0,1488.0,1984.0,1659.0,1613.0,1432.0,1592.0,1475.0,4868.0,4487.0,4656.0,4487.0,4705.0,4217.0,4516.0,4448.0,4713.0,4304.0,4726.0,4429.0,4551.0,4085.0,4348.0,4191.0,5.1,4.7,4.4,5.2,5.1,6.8,5.3,5.2,68.1,67.5,69.1,68.2,68.2,66.6,68.4,68.3,1419.0,1341.0,1384.0,1291.0,1321.0,1242.0,1271.0,1201.0,1.81,-0.36,0.26,1.18,1.22,1.02,0.11,0.93,0.29,-0.49,-0.49,1.74,-0.31,1.03,0.7,0.87,186.3,134.0,-1.2,41.7,137.9,48.9,124.1,195.9,2.7,2.1,1.4,0.6,2.1,0.8,1.9,3.3,-44.4,-151.4,-381.2,-183.6,-741.1,58.9,-228.1,-479.9,70.5,-125.0,-240.5,58.8,86.9,-48.7,-41.8,40.1,1.0,-1.9,-3.4,0.9,1.3,-0.8,-0.7,0.7,334.8,187.1,416.4,417.3,340.2,198.3,410.6,388.5,-4.0,-0.3,4.7,6.5,7.3,7.3,8.6,9.0,16.7,8.2,9.7,10.2,9.1,11.5,10.4,11.0,11.5,7.7,9.9,8.1,6.9,17.9,16.6,13.4,-0.8,-0.1,-1.5,0.5,-1.5,-2.5,-3.2,-2.4,99.7,100.3,102.9,103.6,103.0,105.4,105.6,105.5,57.4,50.7,51.6,43.7,44.9,44.4,42.9,46.5,31.6,33.7,30.1,29.2,35.5,29.0,35.1,39.0,112.4,102.0,112.8,110.4,108.1,96.7,110.4,108.7,127.4,121.0,117.2,107.6,115.5,106.5,109.9,103.8,8071.0,6506.0,5172.0,6555.0,7753.0,6830.0,5791.0,6123.0,6004.0,5319.0,5888.0,5551.0,5937.0,5278.0,5995.0,5823.0,175.2,131.3,179.7,149.3,158.3,132.9,194.5,163.0,225.3,250.4,252.9,330.0,252.6,224.5,375.1,215.2,8.6,-6.7,5.7,1.5,7.4,-5.2,6.3,-3.2,8.2,-8.4,-0.5,11.1,6.3,-3.5,1.4,1.3,816565.0,535522.0,780549.0,808620.0,716592.0,598407.0,704739.0,726242.0,298081.0,259694.0,346471.0,249839.0,193602.0,217113.0,278915.0,158499.0,7545.3,7440.7,7330.1,7220.2,7207.1,7226.8,7078.1,6933.4,13730.0,13819.0,13513.0,12792.0,13112.0,12344.0,12321.0,11547.0,60553.0,0,0,1,0
47,2019,4,53727.0,196493.0,179691.0,170509.0,100352.0,131953.0,131552.0,143589.0,144122.0,2997.0,3016.0,2871.0,3009.0,2897.0,2894.0,2689.0,2870.0,1017.0,1058.0,1034.0,1275.0,974.0,1042.0,976.0,1221.0,1822.0,1855.0,1488.0,1984.0,1659.0,1613.0,1432.0,1592.0,4856.0,4868.0,4487.0,4656.0,4487.0,4705.0,4217.0,4516.0,4733.0,4713.0,4304.0,4726.0,4429.0,4551.0,4085.0,4348.0,3.9,5.1,4.7,4.4,5.2,5.1,6.8,5.3,69.0,68.1,67.5,69.1,68.2,68.2,66.6,68.4,1397.0,1419.0,1341.0,1384.0,1291.0,1321.0,1242.0,1271.0,0.49,1.81,-0.36,0.26,1.18,1.22,1.02,0.11,-0.15,0.29,-0.49,-0.49,1.74,-0.31,1.03,0.7,219.6,186.3,134.0,-1.2,41.7,137.9,48.9,124.1,3.1,2.7,2.1,1.4,0.6,2.1,0.8,1.9,-717.4,-44.4,-151.4,-381.2,-183.6,-741.1,58.9,-228.1,92.4,70.5,-125.0,-240.5,58.8,86.9,-48.7,-41.8,1.3,1.0,-1.9,-3.4,0.9,1.3,-0.8,-0.7,401.5,334.8,187.1,416.4,417.3,340.2,198.3,410.6,-8.0,-4.0,-0.3,4.7,6.5,7.3,7.3,8.6,10.7,16.7,8.2,9.7,10.2,9.1,11.5,10.4,5.7,11.5,7.7,9.9,8.1,6.9,17.9,16.6,0.8,-0.8,-0.1,-1.5,0.5,-1.5,-2.5,-3.2,96.7,99.7,100.3,102.9,103.6,103.0,105.4,105.6,55.3,57.4,50.7,51.6,43.7,44.9,44.4,42.9,31.4,31.6,33.7,30.1,29.2,35.5,29.0,35.1,115.1,112.4,102.0,112.8,110.4,108.1,96.7,110.4,118.3,127.4,121.0,117.2,107.6,115.5,106.5,109.9,6947.0,8071.0,6506.0,5172.0,6555.0,7753.0,6830.0,5791.0,6190.0,6004.0,5319.0,5888.0,5551.0,5937.0,5278.0,5995.0,207.4,175.2,131.3,179.7,149.3,158.3,132.9,194.5,242.8,225.3,250.4,252.9,330.0,252.6,224.5,375.1,-1.0,8.6,-6.7,5.7,1.5,7.4,-5.2,6.3,8.6,8.2,-8.4,-0.5,11.1,6.3,-3.5,1.4,756223.0,816565.0,535522.0,780549.0,808620.0,716592.0,598407.0,704739.0,254985.0,298081.0,259694.0,346471.0,249839.0,193602.0,217113.0,278915.0,7528.2,7545.3,7440.7,7330.1,7220.2,7207.1,7226.8,7078.1,13980.0,13730.0,13819.0,13513.0,12792.0,13112.0,12344.0,12321.0,52000.0,0,0,0,1
48,2020,1,47534.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,131552.0,143589.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,2894.0,2689.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,1042.0,976.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,1613.0,1432.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,4705.0,4217.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,4551.0,4085.0,4.1,3.9,5.1,4.7,4.4,5.2,5.1,6.8,69.2,69.0,68.1,67.5,69.1,68.2,68.2,66.6,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,1321.0,1242.0,-0.22,0.49,1.81,-0.36,0.26,1.18,1.22,1.02,-0.91,-0.15,0.29,-0.49,-0.49,1.74,-0.31,1.03,146.0,219.6,186.3,134.0,-1.2,41.7,137.9,48.9,2.0,3.1,2.7,2.1,1.4,0.6,2.1,0.8,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,-741.1,58.9,-4.4,92.4,70.5,-125.0,-240.5,58.8,86.9,-48.7,-0.1,1.3,1.0,-1.9,-3.4,0.9,1.3,-0.8,455.8,401.5,334.8,187.1,416.4,417.3,340.2,198.3,-9.9,-8.0,-4.0,-0.3,4.7,6.5,7.3,7.3,7.9,10.7,16.7,8.2,9.7,10.2,9.1,11.5,3.8,5.7,11.5,7.7,9.9,8.1,6.9,17.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,-1.5,-2.5,95.2,96.7,99.7,100.3,102.9,103.6,103.0,105.4,65.6,55.3,57.4,50.7,51.6,43.7,44.9,44.4,35.6,31.4,31.6,33.7,30.1,29.2,35.5,29.0,117.3,115.1,112.4,102.0,112.8,110.4,108.1,96.7,121.7,118.3,127.4,121.0,117.2,107.6,115.5,106.5,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,7753.0,6830.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,5937.0,5278.0,180.9,207.4,175.2,131.3,179.7,149.3,158.3,132.9,229.3,242.8,225.3,250.4,252.9,330.0,252.6,224.5,3.7,-1.0,8.6,-6.7,5.7,1.5,7.4,-5.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,6.3,-3.5,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,716592.0,598407.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,193602.0,217113.0,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,7207.1,7226.8,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,13112.0,12344.0,31421.0,1,0,0,0


In [None]:
splits_val['val'][0]

Unnamed: 0,Year,Quarter,NCN_sales_est,Order_book_lag_1,Order_book_lag_2,Order_book_lag_3,Order_book_lag_4,Order_book_lag_5,Order_book_lag_6,Order_book_lag_7,Order_book_lag_8,GDP_households_lag_1,GDP_households_lag_2,GDP_households_lag_3,GDP_households_lag_4,GDP_households_lag_5,GDP_households_lag_6,GDP_households_lag_7,GDP_households_lag_8,GDP_government_lag_1,GDP_government_lag_2,GDP_government_lag_3,GDP_government_lag_4,GDP_government_lag_5,GDP_government_lag_6,GDP_government_lag_7,GDP_government_lag_8,GDP_fixed_capital_lag_1,GDP_fixed_capital_lag_2,GDP_fixed_capital_lag_3,GDP_fixed_capital_lag_4,GDP_fixed_capital_lag_5,GDP_fixed_capital_lag_6,GDP_fixed_capital_lag_7,GDP_fixed_capital_lag_8,GDP_export_lag_1,GDP_export_lag_2,GDP_export_lag_3,GDP_export_lag_4,GDP_export_lag_5,GDP_export_lag_6,GDP_export_lag_7,GDP_export_lag_8,GDP_import_lag_1,GDP_import_lag_2,GDP_import_lag_3,GDP_import_lag_4,GDP_import_lag_5,GDP_import_lag_6,GDP_import_lag_7,GDP_import_lag_8,Unemployment_rate_lag_1,Unemployment_rate_lag_2,Unemployment_rate_lag_3,Unemployment_rate_lag_4,Unemployment_rate_lag_5,Unemployment_rate_lag_6,Unemployment_rate_lag_7,Unemployment_rate_lag_8,Employment_rate_lag_1,Employment_rate_lag_2,Employment_rate_lag_3,Employment_rate_lag_4,Employment_rate_lag_5,Employment_rate_lag_6,Employment_rate_lag_7,Employment_rate_lag_8,Gross_monthly_wage_lag_1,Gross_monthly_wage_lag_2,Gross_monthly_wage_lag_3,Gross_monthly_wage_lag_4,Gross_monthly_wage_lag_5,Gross_monthly_wage_lag_6,Gross_monthly_wage_lag_7,Gross_monthly_wage_lag_8,CPI_lag_1,CPI_lag_2,CPI_lag_3,CPI_lag_4,CPI_lag_5,CPI_lag_6,CPI_lag_7,CPI_lag_8,PPI_lag_1,PPI_lag_2,PPI_lag_3,PPI_lag_4,PPI_lag_5,PPI_lag_6,PPI_lag_7,PPI_lag_8,Current_account_balance_lag_1,Current_account_balance_lag_2,Current_account_balance_lag_3,Current_account_balance_lag_4,Current_account_balance_lag_5,Current_account_balance_lag_6,Current_account_balance_lag_7,Current_account_balance_lag_8,Current_account_balance_GDP_lag_1,Current_account_balance_GDP_lag_2,Current_account_balance_GDP_lag_3,Current_account_balance_GDP_lag_4,Current_account_balance_GDP_lag_5,Current_account_balance_GDP_lag_6,Current_account_balance_GDP_lag_7,Current_account_balance_GDP_lag_8,FDI_balance_lag_1,FDI_balance_lag_2,FDI_balance_lag_3,FDI_balance_lag_4,FDI_balance_lag_5,FDI_balance_lag_6,FDI_balance_lag_7,FDI_balance_lag_8,General_gov_net_lag_1,General_gov_net_lag_2,General_gov_net_lag_3,General_gov_net_lag_4,General_gov_net_lag_5,General_gov_net_lag_6,General_gov_net_lag_7,General_gov_net_lag_8,General_gov_budget_balance_lag_1,General_gov_budget_balance_lag_2,General_gov_budget_balance_lag_3,General_gov_budget_balance_lag_4,General_gov_budget_balance_lag_5,General_gov_budget_balance_lag_6,General_gov_budget_balance_lag_7,General_gov_budget_balance_lag_8,General_gov_invest_lag_1,General_gov_invest_lag_2,General_gov_invest_lag_3,General_gov_invest_lag_4,General_gov_invest_lag_5,General_gov_invest_lag_6,General_gov_invest_lag_7,General_gov_invest_lag_8,Conf_industrial_lag_1,Conf_industrial_lag_2,Conf_industrial_lag_3,Conf_industrial_lag_4,Conf_industrial_lag_5,Conf_industrial_lag_6,Conf_industrial_lag_7,Conf_industrial_lag_8,Conf_retail_lag_1,Conf_retail_lag_2,Conf_retail_lag_3,Conf_retail_lag_4,Conf_retail_lag_5,Conf_retail_lag_6,Conf_retail_lag_7,Conf_retail_lag_8,Conf_service_lag_1,Conf_service_lag_2,Conf_service_lag_3,Conf_service_lag_4,Conf_service_lag_5,Conf_service_lag_6,Conf_service_lag_7,Conf_service_lag_8,Conf_consumer_lag_1,Conf_consumer_lag_2,Conf_consumer_lag_3,Conf_consumer_lag_4,Conf_consumer_lag_5,Conf_consumer_lag_6,Conf_consumer_lag_7,Conf_consumer_lag_8,Econ_sent_ind_lag_1,Econ_sent_ind_lag_2,Econ_sent_ind_lag_3,Econ_sent_ind_lag_4,Econ_sent_ind_lag_5,Econ_sent_ind_lag_6,Econ_sent_ind_lag_7,Econ_sent_ind_lag_8,Ind_BSI_lag_1,Ind_BSI_lag_2,Ind_BSI_lag_3,Ind_BSI_lag_4,Ind_BSI_lag_5,Ind_BSI_lag_6,Ind_BSI_lag_7,Ind_BSI_lag_8,Serv_BSI_lag_1,Serv_BSI_lag_2,Serv_BSI_lag_3,Serv_BSI_lag_4,Serv_BSI_lag_5,Serv_BSI_lag_6,Serv_BSI_lag_7,Serv_BSI_lag_8,Retail_volume_idx_lag_1,Retail_volume_idx_lag_2,Retail_volume_idx_lag_3,Retail_volume_idx_lag_4,Retail_volume_idx_lag_5,Retail_volume_idx_lag_6,Retail_volume_idx_lag_7,Retail_volume_idx_lag_8,Industrial_volume_idx_lag_1,Industrial_volume_idx_lag_2,Industrial_volume_idx_lag_3,Industrial_volume_idx_lag_4,Industrial_volume_idx_lag_5,Industrial_volume_idx_lag_6,Industrial_volume_idx_lag_7,Industrial_volume_idx_lag_8,New_car_reg_lag_1,New_car_reg_lag_2,New_car_reg_lag_3,New_car_reg_lag_4,New_car_reg_lag_5,New_car_reg_lag_6,New_car_reg_lag_7,New_car_reg_lag_8,Apt_sales_lag_1,Apt_sales_lag_2,Apt_sales_lag_3,Apt_sales_lag_4,Apt_sales_lag_5,Apt_sales_lag_6,Apt_sales_lag_7,Apt_sales_lag_8,Building_permit_dwellings_lag_1,Building_permit_dwellings_lag_2,Building_permit_dwellings_lag_3,Building_permit_dwellings_lag_4,Building_permit_dwellings_lag_5,Building_permit_dwellings_lag_6,Building_permit_dwellings_lag_7,Building_permit_dwellings_lag_8,Building_permit_nonres_lag_1,Building_permit_nonres_lag_2,Building_permit_nonres_lag_3,Building_permit_nonres_lag_4,Building_permit_nonres_lag_5,Building_permit_nonres_lag_6,Building_permit_nonres_lag_7,Building_permit_nonres_lag_8,Productivity_per_employee_lag_1,Productivity_per_employee_lag_2,Productivity_per_employee_lag_3,Productivity_per_employee_lag_4,Productivity_per_employee_lag_5,Productivity_per_employee_lag_6,Productivity_per_employee_lag_7,Productivity_per_employee_lag_8,Productivity_per_hour_worked_lag_1,Productivity_per_hour_worked_lag_2,Productivity_per_hour_worked_lag_3,Productivity_per_hour_worked_lag_4,Productivity_per_hour_worked_lag_5,Productivity_per_hour_worked_lag_6,Productivity_per_hour_worked_lag_7,Productivity_per_hour_worked_lag_8,Enterprises_total_profit_lag_1,Enterprises_total_profit_lag_2,Enterprises_total_profit_lag_3,Enterprises_total_profit_lag_4,Enterprises_total_profit_lag_5,Enterprises_total_profit_lag_6,Enterprises_total_profit_lag_7,Enterprises_total_profit_lag_8,Enterprises_invest_build_lag_1,Enterprises_invest_build_lag_2,Enterprises_invest_build_lag_3,Enterprises_invest_build_lag_4,Enterprises_invest_build_lag_5,Enterprises_invest_build_lag_6,Enterprises_invest_build_lag_7,Enterprises_invest_build_lag_8,Loans_granted_to_nonfin_co_lag_1,Loans_granted_to_nonfin_co_lag_2,Loans_granted_to_nonfin_co_lag_3,Loans_granted_to_nonfin_co_lag_4,Loans_granted_to_nonfin_co_lag_5,Loans_granted_to_nonfin_co_lag_6,Loans_granted_to_nonfin_co_lag_7,Loans_granted_to_nonfin_co_lag_8,M1_lag_1,M1_lag_2,M1_lag_3,M1_lag_4,M1_lag_5,M1_lag_6,M1_lag_7,M1_lag_8,NCN_sales_est_lagged_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
49,2020,2,73062.0,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,131552.0,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,2894.0,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,1042.0,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,1613.0,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,4705.0,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,4551.0,5.0,4.1,3.9,5.1,4.7,4.4,5.2,5.1,68.1,69.2,69.0,68.1,67.5,69.1,68.2,68.2,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,1321.0,-0.58,-0.22,0.49,1.81,-0.36,0.26,1.18,1.22,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,1.74,-0.31,203.8,146.0,219.6,186.3,134.0,-1.2,41.7,137.9,3.2,2.0,3.1,2.7,2.1,1.4,0.6,2.1,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,-741.1,-394.1,-4.4,92.4,70.5,-125.0,-240.5,58.8,86.9,-6.1,-0.1,1.3,1.0,-1.9,-3.4,0.9,1.3,230.6,455.8,401.5,334.8,187.1,416.4,417.3,340.2,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,6.5,7.3,9.3,7.9,10.7,16.7,8.2,9.7,10.2,9.1,5.4,3.8,5.7,11.5,7.7,9.9,8.1,6.9,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,-1.5,97.1,95.2,96.7,99.7,100.3,102.9,103.6,103.0,60.9,65.6,55.3,57.4,50.7,51.6,43.7,44.9,41.5,35.6,31.4,31.6,33.7,30.1,29.2,35.5,106.7,117.3,115.1,112.4,102.0,112.8,110.4,108.1,119.7,121.7,118.3,127.4,121.0,117.2,107.6,115.5,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,7753.0,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,5937.0,173.4,180.9,207.4,175.2,131.3,179.7,149.3,158.3,429.4,229.3,242.8,225.3,250.4,252.9,330.0,252.6,-10.8,3.7,-1.0,8.6,-6.7,5.7,1.5,7.4,-8.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,6.3,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,716592.0,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,193602.0,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,7207.1,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,13112.0,61601.0,0,1,0,0
50,2020,3,70453.0,,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,,5.0,4.1,3.9,5.1,4.7,4.4,5.2,,68.1,69.2,69.0,68.1,67.5,69.1,68.2,,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,,-0.58,-0.22,0.49,1.81,-0.36,0.26,1.18,,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,1.74,,203.8,146.0,219.6,186.3,134.0,-1.2,41.7,,3.2,2.0,3.1,2.7,2.1,1.4,0.6,,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,,-394.1,-4.4,92.4,70.5,-125.0,-240.5,58.8,,-6.1,-0.1,1.3,1.0,-1.9,-3.4,0.9,,230.6,455.8,401.5,334.8,187.1,416.4,417.3,,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,6.5,,9.3,7.9,10.7,16.7,8.2,9.7,10.2,,5.4,3.8,5.7,11.5,7.7,9.9,8.1,,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,,97.1,95.2,96.7,99.7,100.3,102.9,103.6,,60.9,65.6,55.3,57.4,50.7,51.6,43.7,,41.5,35.6,31.4,31.6,33.7,30.1,29.2,,106.7,117.3,115.1,112.4,102.0,112.8,110.4,,119.7,121.7,118.3,127.4,121.0,117.2,107.6,,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,,173.4,180.9,207.4,175.2,131.3,179.7,149.3,,429.4,229.3,242.8,225.3,250.4,252.9,330.0,,-10.8,3.7,-1.0,8.6,-6.7,5.7,1.5,,-8.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,66551.0,0,0,1,0
51,2020,4,50625.0,,,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,,,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,,,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,,,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,,,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,,,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,,,5.0,4.1,3.9,5.1,4.7,4.4,,,68.1,69.2,69.0,68.1,67.5,69.1,,,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,,,-0.58,-0.22,0.49,1.81,-0.36,0.26,,,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,,,203.8,146.0,219.6,186.3,134.0,-1.2,,,3.2,2.0,3.1,2.7,2.1,1.4,,,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,,,-394.1,-4.4,92.4,70.5,-125.0,-240.5,,,-6.1,-0.1,1.3,1.0,-1.9,-3.4,,,230.6,455.8,401.5,334.8,187.1,416.4,,,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,,,9.3,7.9,10.7,16.7,8.2,9.7,,,5.4,3.8,5.7,11.5,7.7,9.9,,,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,,,97.1,95.2,96.7,99.7,100.3,102.9,,,60.9,65.6,55.3,57.4,50.7,51.6,,,41.5,35.6,31.4,31.6,33.7,30.1,,,106.7,117.3,115.1,112.4,102.0,112.8,,,119.7,121.7,118.3,127.4,121.0,117.2,,,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,,,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,,,173.4,180.9,207.4,175.2,131.3,179.7,,,429.4,229.3,242.8,225.3,250.4,252.9,,,-10.8,3.7,-1.0,8.6,-6.7,5.7,,,-8.2,-2.7,8.6,8.2,-8.4,-0.5,,,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,,,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,,,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,,,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,53727.0,0,0,0,1
52,2021,1,46978.0,,,,229018.0,227545.0,196493.0,179691.0,170509.0,,,,2942.0,3130.0,2997.0,3016.0,2871.0,,,,1032.0,1296.0,1017.0,1058.0,1034.0,,,,1486.0,1939.0,1822.0,1855.0,1488.0,,,,4477.0,4763.0,4856.0,4868.0,4487.0,,,,4152.0,4705.0,4733.0,4713.0,4304.0,,,,5.0,4.1,3.9,5.1,4.7,,,,68.1,69.2,69.0,68.1,67.5,,,,1404.0,1472.0,1397.0,1419.0,1341.0,,,,-0.58,-0.22,0.49,1.81,-0.36,,,,-2.06,-0.91,-0.15,0.29,-0.49,,,,203.8,146.0,219.6,186.3,134.0,,,,3.2,2.0,3.1,2.7,2.1,,,,-477.4,-154.1,-717.4,-44.4,-151.4,,,,-394.1,-4.4,92.4,70.5,-125.0,,,,-6.1,-0.1,1.3,1.0,-1.9,,,,230.6,455.8,401.5,334.8,187.1,,,,-6.6,-9.9,-8.0,-4.0,-0.3,,,,9.3,7.9,10.7,16.7,8.2,,,,5.4,3.8,5.7,11.5,7.7,,,,-0.9,-0.6,0.8,-0.8,-0.1,,,,97.1,95.2,96.7,99.7,100.3,,,,60.9,65.6,55.3,57.4,50.7,,,,41.5,35.6,31.4,31.6,33.7,,,,106.7,117.3,115.1,112.4,102.0,,,,119.7,121.7,118.3,127.4,121.0,,,,5778.0,6060.0,6947.0,8071.0,6506.0,,,,5689.0,6050.0,6190.0,6004.0,5319.0,,,,173.4,180.9,207.4,175.2,131.3,,,,429.4,229.3,242.8,225.3,250.4,,,,-10.8,3.7,-1.0,8.6,-6.7,,,,-8.2,-2.7,8.6,8.2,-8.4,,,,489524.0,865767.0,756223.0,816565.0,535522.0,,,,215888.0,336164.0,254985.0,298081.0,259694.0,,,,7489.6,7286.1,7528.2,7545.3,7440.7,,,,14936.0,14546.0,13980.0,13730.0,13819.0,47534.0,1,0,0,0


In [None]:
# New dictionary combining train and test_nan
splits_test = {'train': splits_train_test['train'], 'test': test_nan['test_nan']}

In [None]:
splits_test['train'][0].tail()

Unnamed: 0,Year,Quarter,NCN_sales_est,Order_book_lag_1,Order_book_lag_2,Order_book_lag_3,Order_book_lag_4,Order_book_lag_5,Order_book_lag_6,Order_book_lag_7,Order_book_lag_8,GDP_households_lag_1,GDP_households_lag_2,GDP_households_lag_3,GDP_households_lag_4,GDP_households_lag_5,GDP_households_lag_6,GDP_households_lag_7,GDP_households_lag_8,GDP_government_lag_1,GDP_government_lag_2,GDP_government_lag_3,GDP_government_lag_4,GDP_government_lag_5,GDP_government_lag_6,GDP_government_lag_7,GDP_government_lag_8,GDP_fixed_capital_lag_1,GDP_fixed_capital_lag_2,GDP_fixed_capital_lag_3,GDP_fixed_capital_lag_4,GDP_fixed_capital_lag_5,GDP_fixed_capital_lag_6,GDP_fixed_capital_lag_7,GDP_fixed_capital_lag_8,GDP_export_lag_1,GDP_export_lag_2,GDP_export_lag_3,GDP_export_lag_4,GDP_export_lag_5,GDP_export_lag_6,GDP_export_lag_7,GDP_export_lag_8,GDP_import_lag_1,GDP_import_lag_2,GDP_import_lag_3,GDP_import_lag_4,GDP_import_lag_5,GDP_import_lag_6,GDP_import_lag_7,GDP_import_lag_8,Unemployment_rate_lag_1,Unemployment_rate_lag_2,Unemployment_rate_lag_3,Unemployment_rate_lag_4,Unemployment_rate_lag_5,Unemployment_rate_lag_6,Unemployment_rate_lag_7,Unemployment_rate_lag_8,Employment_rate_lag_1,Employment_rate_lag_2,Employment_rate_lag_3,Employment_rate_lag_4,Employment_rate_lag_5,Employment_rate_lag_6,Employment_rate_lag_7,Employment_rate_lag_8,Gross_monthly_wage_lag_1,Gross_monthly_wage_lag_2,Gross_monthly_wage_lag_3,Gross_monthly_wage_lag_4,Gross_monthly_wage_lag_5,Gross_monthly_wage_lag_6,Gross_monthly_wage_lag_7,Gross_monthly_wage_lag_8,CPI_lag_1,CPI_lag_2,CPI_lag_3,CPI_lag_4,CPI_lag_5,CPI_lag_6,CPI_lag_7,CPI_lag_8,PPI_lag_1,PPI_lag_2,PPI_lag_3,PPI_lag_4,PPI_lag_5,PPI_lag_6,PPI_lag_7,PPI_lag_8,Current_account_balance_lag_1,Current_account_balance_lag_2,Current_account_balance_lag_3,Current_account_balance_lag_4,Current_account_balance_lag_5,Current_account_balance_lag_6,Current_account_balance_lag_7,Current_account_balance_lag_8,Current_account_balance_GDP_lag_1,Current_account_balance_GDP_lag_2,Current_account_balance_GDP_lag_3,Current_account_balance_GDP_lag_4,Current_account_balance_GDP_lag_5,Current_account_balance_GDP_lag_6,Current_account_balance_GDP_lag_7,Current_account_balance_GDP_lag_8,FDI_balance_lag_1,FDI_balance_lag_2,FDI_balance_lag_3,FDI_balance_lag_4,FDI_balance_lag_5,FDI_balance_lag_6,FDI_balance_lag_7,FDI_balance_lag_8,General_gov_net_lag_1,General_gov_net_lag_2,General_gov_net_lag_3,General_gov_net_lag_4,General_gov_net_lag_5,General_gov_net_lag_6,General_gov_net_lag_7,General_gov_net_lag_8,General_gov_budget_balance_lag_1,General_gov_budget_balance_lag_2,General_gov_budget_balance_lag_3,General_gov_budget_balance_lag_4,General_gov_budget_balance_lag_5,General_gov_budget_balance_lag_6,General_gov_budget_balance_lag_7,General_gov_budget_balance_lag_8,General_gov_invest_lag_1,General_gov_invest_lag_2,General_gov_invest_lag_3,General_gov_invest_lag_4,General_gov_invest_lag_5,General_gov_invest_lag_6,General_gov_invest_lag_7,General_gov_invest_lag_8,Conf_industrial_lag_1,Conf_industrial_lag_2,Conf_industrial_lag_3,Conf_industrial_lag_4,Conf_industrial_lag_5,Conf_industrial_lag_6,Conf_industrial_lag_7,Conf_industrial_lag_8,Conf_retail_lag_1,Conf_retail_lag_2,Conf_retail_lag_3,Conf_retail_lag_4,Conf_retail_lag_5,Conf_retail_lag_6,Conf_retail_lag_7,Conf_retail_lag_8,Conf_service_lag_1,Conf_service_lag_2,Conf_service_lag_3,Conf_service_lag_4,Conf_service_lag_5,Conf_service_lag_6,Conf_service_lag_7,Conf_service_lag_8,Conf_consumer_lag_1,Conf_consumer_lag_2,Conf_consumer_lag_3,Conf_consumer_lag_4,Conf_consumer_lag_5,Conf_consumer_lag_6,Conf_consumer_lag_7,Conf_consumer_lag_8,Econ_sent_ind_lag_1,Econ_sent_ind_lag_2,Econ_sent_ind_lag_3,Econ_sent_ind_lag_4,Econ_sent_ind_lag_5,Econ_sent_ind_lag_6,Econ_sent_ind_lag_7,Econ_sent_ind_lag_8,Ind_BSI_lag_1,Ind_BSI_lag_2,Ind_BSI_lag_3,Ind_BSI_lag_4,Ind_BSI_lag_5,Ind_BSI_lag_6,Ind_BSI_lag_7,Ind_BSI_lag_8,Serv_BSI_lag_1,Serv_BSI_lag_2,Serv_BSI_lag_3,Serv_BSI_lag_4,Serv_BSI_lag_5,Serv_BSI_lag_6,Serv_BSI_lag_7,Serv_BSI_lag_8,Retail_volume_idx_lag_1,Retail_volume_idx_lag_2,Retail_volume_idx_lag_3,Retail_volume_idx_lag_4,Retail_volume_idx_lag_5,Retail_volume_idx_lag_6,Retail_volume_idx_lag_7,Retail_volume_idx_lag_8,Industrial_volume_idx_lag_1,Industrial_volume_idx_lag_2,Industrial_volume_idx_lag_3,Industrial_volume_idx_lag_4,Industrial_volume_idx_lag_5,Industrial_volume_idx_lag_6,Industrial_volume_idx_lag_7,Industrial_volume_idx_lag_8,New_car_reg_lag_1,New_car_reg_lag_2,New_car_reg_lag_3,New_car_reg_lag_4,New_car_reg_lag_5,New_car_reg_lag_6,New_car_reg_lag_7,New_car_reg_lag_8,Apt_sales_lag_1,Apt_sales_lag_2,Apt_sales_lag_3,Apt_sales_lag_4,Apt_sales_lag_5,Apt_sales_lag_6,Apt_sales_lag_7,Apt_sales_lag_8,Building_permit_dwellings_lag_1,Building_permit_dwellings_lag_2,Building_permit_dwellings_lag_3,Building_permit_dwellings_lag_4,Building_permit_dwellings_lag_5,Building_permit_dwellings_lag_6,Building_permit_dwellings_lag_7,Building_permit_dwellings_lag_8,Building_permit_nonres_lag_1,Building_permit_nonres_lag_2,Building_permit_nonres_lag_3,Building_permit_nonres_lag_4,Building_permit_nonres_lag_5,Building_permit_nonres_lag_6,Building_permit_nonres_lag_7,Building_permit_nonres_lag_8,Productivity_per_employee_lag_1,Productivity_per_employee_lag_2,Productivity_per_employee_lag_3,Productivity_per_employee_lag_4,Productivity_per_employee_lag_5,Productivity_per_employee_lag_6,Productivity_per_employee_lag_7,Productivity_per_employee_lag_8,Productivity_per_hour_worked_lag_1,Productivity_per_hour_worked_lag_2,Productivity_per_hour_worked_lag_3,Productivity_per_hour_worked_lag_4,Productivity_per_hour_worked_lag_5,Productivity_per_hour_worked_lag_6,Productivity_per_hour_worked_lag_7,Productivity_per_hour_worked_lag_8,Enterprises_total_profit_lag_1,Enterprises_total_profit_lag_2,Enterprises_total_profit_lag_3,Enterprises_total_profit_lag_4,Enterprises_total_profit_lag_5,Enterprises_total_profit_lag_6,Enterprises_total_profit_lag_7,Enterprises_total_profit_lag_8,Enterprises_invest_build_lag_1,Enterprises_invest_build_lag_2,Enterprises_invest_build_lag_3,Enterprises_invest_build_lag_4,Enterprises_invest_build_lag_5,Enterprises_invest_build_lag_6,Enterprises_invest_build_lag_7,Enterprises_invest_build_lag_8,Loans_granted_to_nonfin_co_lag_1,Loans_granted_to_nonfin_co_lag_2,Loans_granted_to_nonfin_co_lag_3,Loans_granted_to_nonfin_co_lag_4,Loans_granted_to_nonfin_co_lag_5,Loans_granted_to_nonfin_co_lag_6,Loans_granted_to_nonfin_co_lag_7,Loans_granted_to_nonfin_co_lag_8,M1_lag_1,M1_lag_2,M1_lag_3,M1_lag_4,M1_lag_5,M1_lag_6,M1_lag_7,M1_lag_8,NCN_sales_est_lagged_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
48,2020,1,47534.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,131552.0,143589.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,2894.0,2689.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,1042.0,976.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,1613.0,1432.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,4705.0,4217.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,4551.0,4085.0,4.1,3.9,5.1,4.7,4.4,5.2,5.1,6.8,69.2,69.0,68.1,67.5,69.1,68.2,68.2,66.6,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,1321.0,1242.0,-0.22,0.49,1.81,-0.36,0.26,1.18,1.22,1.02,-0.91,-0.15,0.29,-0.49,-0.49,1.74,-0.31,1.03,146.0,219.6,186.3,134.0,-1.2,41.7,137.9,48.9,2.0,3.1,2.7,2.1,1.4,0.6,2.1,0.8,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,-741.1,58.9,-4.4,92.4,70.5,-125.0,-240.5,58.8,86.9,-48.7,-0.1,1.3,1.0,-1.9,-3.4,0.9,1.3,-0.8,455.8,401.5,334.8,187.1,416.4,417.3,340.2,198.3,-9.9,-8.0,-4.0,-0.3,4.7,6.5,7.3,7.3,7.9,10.7,16.7,8.2,9.7,10.2,9.1,11.5,3.8,5.7,11.5,7.7,9.9,8.1,6.9,17.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,-1.5,-2.5,95.2,96.7,99.7,100.3,102.9,103.6,103.0,105.4,65.6,55.3,57.4,50.7,51.6,43.7,44.9,44.4,35.6,31.4,31.6,33.7,30.1,29.2,35.5,29.0,117.3,115.1,112.4,102.0,112.8,110.4,108.1,96.7,121.7,118.3,127.4,121.0,117.2,107.6,115.5,106.5,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,7753.0,6830.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,5937.0,5278.0,180.9,207.4,175.2,131.3,179.7,149.3,158.3,132.9,229.3,242.8,225.3,250.4,252.9,330.0,252.6,224.5,3.7,-1.0,8.6,-6.7,5.7,1.5,7.4,-5.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,6.3,-3.5,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,716592.0,598407.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,193602.0,217113.0,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,7207.1,7226.8,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,13112.0,12344.0,31421.0,1,0,0,0
49,2020,2,73062.0,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,131552.0,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,2894.0,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,1042.0,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,1613.0,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,4705.0,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,4551.0,5.0,4.1,3.9,5.1,4.7,4.4,5.2,5.1,68.1,69.2,69.0,68.1,67.5,69.1,68.2,68.2,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,1321.0,-0.58,-0.22,0.49,1.81,-0.36,0.26,1.18,1.22,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,1.74,-0.31,203.8,146.0,219.6,186.3,134.0,-1.2,41.7,137.9,3.2,2.0,3.1,2.7,2.1,1.4,0.6,2.1,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,-741.1,-394.1,-4.4,92.4,70.5,-125.0,-240.5,58.8,86.9,-6.1,-0.1,1.3,1.0,-1.9,-3.4,0.9,1.3,230.6,455.8,401.5,334.8,187.1,416.4,417.3,340.2,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,6.5,7.3,9.3,7.9,10.7,16.7,8.2,9.7,10.2,9.1,5.4,3.8,5.7,11.5,7.7,9.9,8.1,6.9,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,-1.5,97.1,95.2,96.7,99.7,100.3,102.9,103.6,103.0,60.9,65.6,55.3,57.4,50.7,51.6,43.7,44.9,41.5,35.6,31.4,31.6,33.7,30.1,29.2,35.5,106.7,117.3,115.1,112.4,102.0,112.8,110.4,108.1,119.7,121.7,118.3,127.4,121.0,117.2,107.6,115.5,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,7753.0,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,5937.0,173.4,180.9,207.4,175.2,131.3,179.7,149.3,158.3,429.4,229.3,242.8,225.3,250.4,252.9,330.0,252.6,-10.8,3.7,-1.0,8.6,-6.7,5.7,1.5,7.4,-8.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,6.3,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,716592.0,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,193602.0,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,7207.1,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,13112.0,61601.0,0,1,0,0
50,2020,3,70453.0,187018.0,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,131953.0,2722.0,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,2897.0,1096.0,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,974.0,1481.0,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,1659.0,3957.0,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,4487.0,3841.0,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,4429.0,7.1,5.0,4.1,3.9,5.1,4.7,4.4,5.2,65.4,68.1,69.2,69.0,68.1,67.5,69.1,68.2,1433.0,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,1291.0,-0.85,-0.58,-0.22,0.49,1.81,-0.36,0.26,1.18,-0.46,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,1.74,252.6,203.8,146.0,219.6,186.3,134.0,-1.2,41.7,3.9,3.2,2.0,3.1,2.7,2.1,1.4,0.6,-462.9,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,-183.6,-526.3,-394.1,-4.4,92.4,70.5,-125.0,-240.5,58.8,-8.1,-6.1,-0.1,1.3,1.0,-1.9,-3.4,0.9,355.4,230.6,455.8,401.5,334.8,187.1,416.4,417.3,-30.7,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,6.5,-30.2,9.3,7.9,10.7,16.7,8.2,9.7,10.2,-58.2,5.4,3.8,5.7,11.5,7.7,9.9,8.1,-17.2,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,0.5,68.4,97.1,95.2,96.7,99.7,100.3,102.9,103.6,70.3,60.9,65.6,55.3,57.4,50.7,51.6,43.7,51.1,41.5,35.6,31.4,31.6,33.7,30.1,29.2,111.0,106.7,117.3,115.1,112.4,102.0,112.8,110.4,111.9,119.7,121.7,118.3,127.4,121.0,117.2,107.6,3612.0,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,6555.0,4088.0,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,5551.0,163.1,173.4,180.9,207.4,175.2,131.3,179.7,149.3,282.5,429.4,229.3,242.8,225.3,250.4,252.9,330.0,3.8,-10.8,3.7,-1.0,8.6,-6.7,5.7,1.5,7.6,-8.2,-2.7,8.6,8.2,-8.4,-0.5,11.1,612132.0,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,808620.0,193893.0,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,249839.0,7396.8,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,7220.2,15904.0,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,12792.0,66551.0,0,0,1,0
51,2020,4,50625.0,215494.0,187018.0,229018.0,227545.0,196493.0,179691.0,170509.0,100352.0,3029.0,2722.0,2942.0,3130.0,2997.0,3016.0,2871.0,3009.0,1070.0,1096.0,1032.0,1296.0,1017.0,1058.0,1034.0,1275.0,1626.0,1481.0,1486.0,1939.0,1822.0,1855.0,1488.0,1984.0,4541.0,3957.0,4477.0,4763.0,4856.0,4868.0,4487.0,4656.0,4689.0,3841.0,4152.0,4705.0,4733.0,4713.0,4304.0,4726.0,7.7,7.1,5.0,4.1,3.9,5.1,4.7,4.4,66.3,65.4,68.1,69.2,69.0,68.1,67.5,69.1,1441.0,1433.0,1404.0,1472.0,1397.0,1419.0,1341.0,1384.0,0.71,-0.85,-0.58,-0.22,0.49,1.81,-0.36,0.26,0.79,-0.46,-2.06,-0.91,-0.15,0.29,-0.49,-0.49,-118.1,252.6,203.8,146.0,219.6,186.3,134.0,-1.2,-1.7,3.9,3.2,2.0,3.1,2.7,2.1,1.4,-1026.8,-462.9,-477.4,-154.1,-717.4,-44.4,-151.4,-381.2,-138.2,-526.3,-394.1,-4.4,92.4,70.5,-125.0,-240.5,-2.0,-8.1,-6.1,-0.1,1.3,1.0,-1.9,-3.4,459.0,355.4,230.6,455.8,401.5,334.8,187.1,416.4,-10.9,-30.7,-6.6,-9.9,-8.0,-4.0,-0.3,4.7,-1.4,-30.2,9.3,7.9,10.7,16.7,8.2,9.7,-27.7,-58.2,5.4,3.8,5.7,11.5,7.7,9.9,-13.7,-17.2,-0.9,-0.6,0.8,-0.8,-0.1,-1.5,87.0,68.4,97.1,95.2,96.7,99.7,100.3,102.9,76.6,70.3,60.9,65.6,55.3,57.4,50.7,51.6,64.4,51.1,41.5,35.6,31.4,31.6,33.7,30.1,119.9,111.0,106.7,117.3,115.1,112.4,102.0,112.8,117.2,111.9,119.7,121.7,118.3,127.4,121.0,117.2,5307.0,3612.0,5778.0,6060.0,6947.0,8071.0,6506.0,5172.0,5874.0,4088.0,5689.0,6050.0,6190.0,6004.0,5319.0,5888.0,262.9,163.1,173.4,180.9,207.4,175.2,131.3,179.7,237.3,282.5,429.4,229.3,242.8,225.3,250.4,252.9,8.1,3.8,-10.8,3.7,-1.0,8.6,-6.7,5.7,6.0,7.6,-8.2,-2.7,8.6,8.2,-8.4,-0.5,796894.0,612132.0,489524.0,865767.0,756223.0,816565.0,535522.0,780549.0,252326.0,193893.0,215888.0,336164.0,254985.0,298081.0,259694.0,346471.0,7467.4,7396.8,7489.6,7286.1,7528.2,7545.3,7440.7,7330.1,16508.0,15904.0,14936.0,14546.0,13980.0,13730.0,13819.0,13513.0,53727.0,0,0,0,1
52,2021,1,46978.0,215796.0,215494.0,187018.0,229018.0,227545.0,196493.0,179691.0,170509.0,3135.0,3029.0,2722.0,2942.0,3130.0,2997.0,3016.0,2871.0,1338.0,1070.0,1096.0,1032.0,1296.0,1017.0,1058.0,1034.0,3271.0,1626.0,1481.0,1486.0,1939.0,1822.0,1855.0,1488.0,4964.0,4541.0,3957.0,4477.0,4763.0,4856.0,4868.0,4487.0,6012.0,4689.0,3841.0,4152.0,4705.0,4733.0,4713.0,4304.0,7.4,7.7,7.1,5.0,4.1,3.9,5.1,4.7,67.0,66.3,65.4,68.1,69.2,69.0,68.1,67.5,1515.0,1441.0,1433.0,1404.0,1472.0,1397.0,1419.0,1341.0,-0.41,0.71,-0.85,-0.58,-0.22,0.49,1.81,-0.36,1.05,0.79,-0.46,-2.06,-0.91,-0.15,0.29,-0.49,-859.3,-118.1,252.6,203.8,146.0,219.6,186.3,134.0,-11.5,-1.7,3.9,3.2,2.0,3.1,2.7,2.1,-960.7,-1026.8,-462.9,-477.4,-154.1,-717.4,-44.4,-151.4,-431.4,-138.2,-526.3,-394.1,-4.4,92.4,70.5,-125.0,-5.8,-2.0,-8.1,-6.1,-0.1,1.3,1.0,-1.9,517.6,459.0,355.4,230.6,455.8,401.5,334.8,187.1,-1.0,-10.9,-30.7,-6.6,-9.9,-8.0,-4.0,-0.3,7.6,-1.4,-30.2,9.3,7.9,10.7,16.7,8.2,-2.9,-27.7,-58.2,5.4,3.8,5.7,11.5,7.7,-16.7,-13.7,-17.2,-0.9,-0.6,0.8,-0.8,-0.1,95.4,87.0,68.4,97.1,95.2,96.7,99.7,100.3,60.4,76.6,70.3,60.9,65.6,55.3,57.4,50.7,52.8,64.4,51.1,41.5,35.6,31.4,31.6,33.7,124.9,119.9,111.0,106.7,117.3,115.1,112.4,102.0,125.3,117.2,111.9,119.7,121.7,118.3,127.4,121.0,4588.0,5307.0,3612.0,5778.0,6060.0,6947.0,8071.0,6506.0,6629.0,5874.0,4088.0,5689.0,6050.0,6190.0,6004.0,5319.0,148.5,262.9,163.1,173.4,180.9,207.4,175.2,131.3,362.8,237.3,282.5,429.4,229.3,242.8,225.3,250.4,5.1,8.1,3.8,-10.8,3.7,-1.0,8.6,-6.7,1.2,6.0,7.6,-8.2,-2.7,8.6,8.2,-8.4,563392.0,796894.0,612132.0,489524.0,865767.0,756223.0,816565.0,535522.0,359488.0,252326.0,193893.0,215888.0,336164.0,254985.0,298081.0,259694.0,7604.7,7467.4,7396.8,7489.6,7286.1,7528.2,7545.3,7440.7,18378.0,16508.0,15904.0,14936.0,14546.0,13980.0,13730.0,13819.0,47534.0,1,0,0,0


In [None]:
splits_test['test'][0]

Unnamed: 0,Year,Quarter,NCN_sales_est,Order_book_lag_1,Order_book_lag_2,Order_book_lag_3,Order_book_lag_4,Order_book_lag_5,Order_book_lag_6,Order_book_lag_7,Order_book_lag_8,GDP_households_lag_1,GDP_households_lag_2,GDP_households_lag_3,GDP_households_lag_4,GDP_households_lag_5,GDP_households_lag_6,GDP_households_lag_7,GDP_households_lag_8,GDP_government_lag_1,GDP_government_lag_2,GDP_government_lag_3,GDP_government_lag_4,GDP_government_lag_5,GDP_government_lag_6,GDP_government_lag_7,GDP_government_lag_8,GDP_fixed_capital_lag_1,GDP_fixed_capital_lag_2,GDP_fixed_capital_lag_3,GDP_fixed_capital_lag_4,GDP_fixed_capital_lag_5,GDP_fixed_capital_lag_6,GDP_fixed_capital_lag_7,GDP_fixed_capital_lag_8,GDP_export_lag_1,GDP_export_lag_2,GDP_export_lag_3,GDP_export_lag_4,GDP_export_lag_5,GDP_export_lag_6,GDP_export_lag_7,GDP_export_lag_8,GDP_import_lag_1,GDP_import_lag_2,GDP_import_lag_3,GDP_import_lag_4,GDP_import_lag_5,GDP_import_lag_6,GDP_import_lag_7,GDP_import_lag_8,Unemployment_rate_lag_1,Unemployment_rate_lag_2,Unemployment_rate_lag_3,Unemployment_rate_lag_4,Unemployment_rate_lag_5,Unemployment_rate_lag_6,Unemployment_rate_lag_7,Unemployment_rate_lag_8,Employment_rate_lag_1,Employment_rate_lag_2,Employment_rate_lag_3,Employment_rate_lag_4,Employment_rate_lag_5,Employment_rate_lag_6,Employment_rate_lag_7,Employment_rate_lag_8,Gross_monthly_wage_lag_1,Gross_monthly_wage_lag_2,Gross_monthly_wage_lag_3,Gross_monthly_wage_lag_4,Gross_monthly_wage_lag_5,Gross_monthly_wage_lag_6,Gross_monthly_wage_lag_7,Gross_monthly_wage_lag_8,CPI_lag_1,CPI_lag_2,CPI_lag_3,CPI_lag_4,CPI_lag_5,CPI_lag_6,CPI_lag_7,CPI_lag_8,PPI_lag_1,PPI_lag_2,PPI_lag_3,PPI_lag_4,PPI_lag_5,PPI_lag_6,PPI_lag_7,PPI_lag_8,Current_account_balance_lag_1,Current_account_balance_lag_2,Current_account_balance_lag_3,Current_account_balance_lag_4,Current_account_balance_lag_5,Current_account_balance_lag_6,Current_account_balance_lag_7,Current_account_balance_lag_8,Current_account_balance_GDP_lag_1,Current_account_balance_GDP_lag_2,Current_account_balance_GDP_lag_3,Current_account_balance_GDP_lag_4,Current_account_balance_GDP_lag_5,Current_account_balance_GDP_lag_6,Current_account_balance_GDP_lag_7,Current_account_balance_GDP_lag_8,FDI_balance_lag_1,FDI_balance_lag_2,FDI_balance_lag_3,FDI_balance_lag_4,FDI_balance_lag_5,FDI_balance_lag_6,FDI_balance_lag_7,FDI_balance_lag_8,General_gov_net_lag_1,General_gov_net_lag_2,General_gov_net_lag_3,General_gov_net_lag_4,General_gov_net_lag_5,General_gov_net_lag_6,General_gov_net_lag_7,General_gov_net_lag_8,General_gov_budget_balance_lag_1,General_gov_budget_balance_lag_2,General_gov_budget_balance_lag_3,General_gov_budget_balance_lag_4,General_gov_budget_balance_lag_5,General_gov_budget_balance_lag_6,General_gov_budget_balance_lag_7,General_gov_budget_balance_lag_8,General_gov_invest_lag_1,General_gov_invest_lag_2,General_gov_invest_lag_3,General_gov_invest_lag_4,General_gov_invest_lag_5,General_gov_invest_lag_6,General_gov_invest_lag_7,General_gov_invest_lag_8,Conf_industrial_lag_1,Conf_industrial_lag_2,Conf_industrial_lag_3,Conf_industrial_lag_4,Conf_industrial_lag_5,Conf_industrial_lag_6,Conf_industrial_lag_7,Conf_industrial_lag_8,Conf_retail_lag_1,Conf_retail_lag_2,Conf_retail_lag_3,Conf_retail_lag_4,Conf_retail_lag_5,Conf_retail_lag_6,Conf_retail_lag_7,Conf_retail_lag_8,Conf_service_lag_1,Conf_service_lag_2,Conf_service_lag_3,Conf_service_lag_4,Conf_service_lag_5,Conf_service_lag_6,Conf_service_lag_7,Conf_service_lag_8,Conf_consumer_lag_1,Conf_consumer_lag_2,Conf_consumer_lag_3,Conf_consumer_lag_4,Conf_consumer_lag_5,Conf_consumer_lag_6,Conf_consumer_lag_7,Conf_consumer_lag_8,Econ_sent_ind_lag_1,Econ_sent_ind_lag_2,Econ_sent_ind_lag_3,Econ_sent_ind_lag_4,Econ_sent_ind_lag_5,Econ_sent_ind_lag_6,Econ_sent_ind_lag_7,Econ_sent_ind_lag_8,Ind_BSI_lag_1,Ind_BSI_lag_2,Ind_BSI_lag_3,Ind_BSI_lag_4,Ind_BSI_lag_5,Ind_BSI_lag_6,Ind_BSI_lag_7,Ind_BSI_lag_8,Serv_BSI_lag_1,Serv_BSI_lag_2,Serv_BSI_lag_3,Serv_BSI_lag_4,Serv_BSI_lag_5,Serv_BSI_lag_6,Serv_BSI_lag_7,Serv_BSI_lag_8,Retail_volume_idx_lag_1,Retail_volume_idx_lag_2,Retail_volume_idx_lag_3,Retail_volume_idx_lag_4,Retail_volume_idx_lag_5,Retail_volume_idx_lag_6,Retail_volume_idx_lag_7,Retail_volume_idx_lag_8,Industrial_volume_idx_lag_1,Industrial_volume_idx_lag_2,Industrial_volume_idx_lag_3,Industrial_volume_idx_lag_4,Industrial_volume_idx_lag_5,Industrial_volume_idx_lag_6,Industrial_volume_idx_lag_7,Industrial_volume_idx_lag_8,New_car_reg_lag_1,New_car_reg_lag_2,New_car_reg_lag_3,New_car_reg_lag_4,New_car_reg_lag_5,New_car_reg_lag_6,New_car_reg_lag_7,New_car_reg_lag_8,Apt_sales_lag_1,Apt_sales_lag_2,Apt_sales_lag_3,Apt_sales_lag_4,Apt_sales_lag_5,Apt_sales_lag_6,Apt_sales_lag_7,Apt_sales_lag_8,Building_permit_dwellings_lag_1,Building_permit_dwellings_lag_2,Building_permit_dwellings_lag_3,Building_permit_dwellings_lag_4,Building_permit_dwellings_lag_5,Building_permit_dwellings_lag_6,Building_permit_dwellings_lag_7,Building_permit_dwellings_lag_8,Building_permit_nonres_lag_1,Building_permit_nonres_lag_2,Building_permit_nonres_lag_3,Building_permit_nonres_lag_4,Building_permit_nonres_lag_5,Building_permit_nonres_lag_6,Building_permit_nonres_lag_7,Building_permit_nonres_lag_8,Productivity_per_employee_lag_1,Productivity_per_employee_lag_2,Productivity_per_employee_lag_3,Productivity_per_employee_lag_4,Productivity_per_employee_lag_5,Productivity_per_employee_lag_6,Productivity_per_employee_lag_7,Productivity_per_employee_lag_8,Productivity_per_hour_worked_lag_1,Productivity_per_hour_worked_lag_2,Productivity_per_hour_worked_lag_3,Productivity_per_hour_worked_lag_4,Productivity_per_hour_worked_lag_5,Productivity_per_hour_worked_lag_6,Productivity_per_hour_worked_lag_7,Productivity_per_hour_worked_lag_8,Enterprises_total_profit_lag_1,Enterprises_total_profit_lag_2,Enterprises_total_profit_lag_3,Enterprises_total_profit_lag_4,Enterprises_total_profit_lag_5,Enterprises_total_profit_lag_6,Enterprises_total_profit_lag_7,Enterprises_total_profit_lag_8,Enterprises_invest_build_lag_1,Enterprises_invest_build_lag_2,Enterprises_invest_build_lag_3,Enterprises_invest_build_lag_4,Enterprises_invest_build_lag_5,Enterprises_invest_build_lag_6,Enterprises_invest_build_lag_7,Enterprises_invest_build_lag_8,Loans_granted_to_nonfin_co_lag_1,Loans_granted_to_nonfin_co_lag_2,Loans_granted_to_nonfin_co_lag_3,Loans_granted_to_nonfin_co_lag_4,Loans_granted_to_nonfin_co_lag_5,Loans_granted_to_nonfin_co_lag_6,Loans_granted_to_nonfin_co_lag_7,Loans_granted_to_nonfin_co_lag_8,M1_lag_1,M1_lag_2,M1_lag_3,M1_lag_4,M1_lag_5,M1_lag_6,M1_lag_7,M1_lag_8,NCN_sales_est_lagged_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
53,2021,2,65862.0,281431.0,215796.0,215494.0,187018.0,229018.0,227545.0,196493.0,179691.0,2912.0,3135.0,3029.0,2722.0,2942.0,3130.0,2997.0,3016.0,1100.0,1338.0,1070.0,1096.0,1032.0,1296.0,1017.0,1058.0,2140.0,3271.0,1626.0,1481.0,1486.0,1939.0,1822.0,1855.0,4803.0,4964.0,4541.0,3957.0,4477.0,4763.0,4856.0,4868.0,5293.0,6012.0,4689.0,3841.0,4152.0,4705.0,4733.0,4713.0,7.1,7.4,7.7,7.1,5.0,4.1,3.9,5.1,65.9,67.0,66.3,65.4,68.1,69.2,69.0,68.1,1406.0,1515.0,1441.0,1433.0,1404.0,1472.0,1397.0,1419.0,1.19,-0.41,0.71,-0.85,-0.58,-0.22,0.49,1.81,2.31,1.05,0.79,-0.46,-2.06,-0.91,-0.15,0.29,-423.0,-859.3,-118.1,252.6,203.8,146.0,219.6,186.3,-6.2,-11.5,-1.7,3.9,3.2,2.0,3.1,2.7,-1318.3,-960.7,-1026.8,-462.9,-477.4,-154.1,-717.4,-44.4,-429.5,-431.4,-138.2,-526.3,-394.1,-4.4,92.4,70.5,-6.2,-5.8,-2.0,-8.1,-6.1,-0.1,1.3,1.0,263.4,517.6,459.0,355.4,230.6,455.8,401.5,334.8,6.1,-1.0,-10.9,-30.7,-6.6,-9.9,-8.0,-4.0,8.3,7.6,-1.4,-30.2,9.3,7.9,10.7,16.7,0.9,-2.9,-27.7,-58.2,5.4,3.8,5.7,11.5,-15.7,-16.7,-13.7,-17.2,-0.9,-0.6,0.8,-0.8,99.5,95.4,87.0,68.4,97.1,95.2,96.7,99.7,44.7,60.4,76.6,70.3,60.9,65.6,55.3,57.4,39.9,52.8,64.4,51.1,41.5,35.6,31.4,31.6,112.0,124.9,119.9,111.0,106.7,117.3,115.1,112.4,123.6,125.3,117.2,111.9,119.7,121.7,118.3,127.4,5904.0,4588.0,5307.0,3612.0,5778.0,6060.0,6947.0,8071.0,6185.0,6629.0,5874.0,4088.0,5689.0,6050.0,6190.0,6004.0,183.6,148.5,262.9,163.1,173.4,180.9,207.4,175.2,322.6,362.8,237.3,282.5,429.4,229.3,242.8,225.3,-6.7,5.1,8.1,3.8,-10.8,3.7,-1.0,8.6,-10.1,1.2,6.0,7.6,-8.2,-2.7,8.6,8.2,508514.0,563392.0,796894.0,612132.0,489524.0,865767.0,756223.0,816565.0,225775.0,359488.0,252326.0,193893.0,215888.0,336164.0,254985.0,298081.0,7720.4,7604.7,7467.4,7396.8,7489.6,7286.1,7528.2,7545.3,19542.0,18378.0,16508.0,15904.0,14936.0,14546.0,13980.0,13730.0,73062.0,0,1,0,0
54,2021,3,84815.0,,281431.0,215796.0,215494.0,187018.0,229018.0,227545.0,196493.0,,2912.0,3135.0,3029.0,2722.0,2942.0,3130.0,2997.0,,1100.0,1338.0,1070.0,1096.0,1032.0,1296.0,1017.0,,2140.0,3271.0,1626.0,1481.0,1486.0,1939.0,1822.0,,4803.0,4964.0,4541.0,3957.0,4477.0,4763.0,4856.0,,5293.0,6012.0,4689.0,3841.0,4152.0,4705.0,4733.0,,7.1,7.4,7.7,7.1,5.0,4.1,3.9,,65.9,67.0,66.3,65.4,68.1,69.2,69.0,,1406.0,1515.0,1441.0,1433.0,1404.0,1472.0,1397.0,,1.19,-0.41,0.71,-0.85,-0.58,-0.22,0.49,,2.31,1.05,0.79,-0.46,-2.06,-0.91,-0.15,,-423.0,-859.3,-118.1,252.6,203.8,146.0,219.6,,-6.2,-11.5,-1.7,3.9,3.2,2.0,3.1,,-1318.3,-960.7,-1026.8,-462.9,-477.4,-154.1,-717.4,,-429.5,-431.4,-138.2,-526.3,-394.1,-4.4,92.4,,-6.2,-5.8,-2.0,-8.1,-6.1,-0.1,1.3,,263.4,517.6,459.0,355.4,230.6,455.8,401.5,,6.1,-1.0,-10.9,-30.7,-6.6,-9.9,-8.0,,8.3,7.6,-1.4,-30.2,9.3,7.9,10.7,,0.9,-2.9,-27.7,-58.2,5.4,3.8,5.7,,-15.7,-16.7,-13.7,-17.2,-0.9,-0.6,0.8,,99.5,95.4,87.0,68.4,97.1,95.2,96.7,,44.7,60.4,76.6,70.3,60.9,65.6,55.3,,39.9,52.8,64.4,51.1,41.5,35.6,31.4,,112.0,124.9,119.9,111.0,106.7,117.3,115.1,,123.6,125.3,117.2,111.9,119.7,121.7,118.3,,5904.0,4588.0,5307.0,3612.0,5778.0,6060.0,6947.0,,6185.0,6629.0,5874.0,4088.0,5689.0,6050.0,6190.0,,183.6,148.5,262.9,163.1,173.4,180.9,207.4,,322.6,362.8,237.3,282.5,429.4,229.3,242.8,,-6.7,5.1,8.1,3.8,-10.8,3.7,-1.0,,-10.1,1.2,6.0,7.6,-8.2,-2.7,8.6,,508514.0,563392.0,796894.0,612132.0,489524.0,865767.0,756223.0,,225775.0,359488.0,252326.0,193893.0,215888.0,336164.0,254985.0,,7720.4,7604.7,7467.4,7396.8,7489.6,7286.1,7528.2,,19542.0,18378.0,16508.0,15904.0,14936.0,14546.0,13980.0,70453.0,0,0,1,0
55,2021,4,74396.0,,,281431.0,215796.0,215494.0,187018.0,229018.0,227545.0,,,2912.0,3135.0,3029.0,2722.0,2942.0,3130.0,,,1100.0,1338.0,1070.0,1096.0,1032.0,1296.0,,,2140.0,3271.0,1626.0,1481.0,1486.0,1939.0,,,4803.0,4964.0,4541.0,3957.0,4477.0,4763.0,,,5293.0,6012.0,4689.0,3841.0,4152.0,4705.0,,,7.1,7.4,7.7,7.1,5.0,4.1,,,65.9,67.0,66.3,65.4,68.1,69.2,,,1406.0,1515.0,1441.0,1433.0,1404.0,1472.0,,,1.19,-0.41,0.71,-0.85,-0.58,-0.22,,,2.31,1.05,0.79,-0.46,-2.06,-0.91,,,-423.0,-859.3,-118.1,252.6,203.8,146.0,,,-6.2,-11.5,-1.7,3.9,3.2,2.0,,,-1318.3,-960.7,-1026.8,-462.9,-477.4,-154.1,,,-429.5,-431.4,-138.2,-526.3,-394.1,-4.4,,,-6.2,-5.8,-2.0,-8.1,-6.1,-0.1,,,263.4,517.6,459.0,355.4,230.6,455.8,,,6.1,-1.0,-10.9,-30.7,-6.6,-9.9,,,8.3,7.6,-1.4,-30.2,9.3,7.9,,,0.9,-2.9,-27.7,-58.2,5.4,3.8,,,-15.7,-16.7,-13.7,-17.2,-0.9,-0.6,,,99.5,95.4,87.0,68.4,97.1,95.2,,,44.7,60.4,76.6,70.3,60.9,65.6,,,39.9,52.8,64.4,51.1,41.5,35.6,,,112.0,124.9,119.9,111.0,106.7,117.3,,,123.6,125.3,117.2,111.9,119.7,121.7,,,5904.0,4588.0,5307.0,3612.0,5778.0,6060.0,,,6185.0,6629.0,5874.0,4088.0,5689.0,6050.0,,,183.6,148.5,262.9,163.1,173.4,180.9,,,322.6,362.8,237.3,282.5,429.4,229.3,,,-6.7,5.1,8.1,3.8,-10.8,3.7,,,-10.1,1.2,6.0,7.6,-8.2,-2.7,,,508514.0,563392.0,796894.0,612132.0,489524.0,865767.0,,,225775.0,359488.0,252326.0,193893.0,215888.0,336164.0,,,7720.4,7604.7,7467.4,7396.8,7489.6,7286.1,,,19542.0,18378.0,16508.0,15904.0,14936.0,14546.0,50625.0,0,0,0,1
56,2022,1,64830.0,,,,281431.0,215796.0,215494.0,187018.0,229018.0,,,,2912.0,3135.0,3029.0,2722.0,2942.0,,,,1100.0,1338.0,1070.0,1096.0,1032.0,,,,2140.0,3271.0,1626.0,1481.0,1486.0,,,,4803.0,4964.0,4541.0,3957.0,4477.0,,,,5293.0,6012.0,4689.0,3841.0,4152.0,,,,7.1,7.4,7.7,7.1,5.0,,,,65.9,67.0,66.3,65.4,68.1,,,,1406.0,1515.0,1441.0,1433.0,1404.0,,,,1.19,-0.41,0.71,-0.85,-0.58,,,,2.31,1.05,0.79,-0.46,-2.06,,,,-423.0,-859.3,-118.1,252.6,203.8,,,,-6.2,-11.5,-1.7,3.9,3.2,,,,-1318.3,-960.7,-1026.8,-462.9,-477.4,,,,-429.5,-431.4,-138.2,-526.3,-394.1,,,,-6.2,-5.8,-2.0,-8.1,-6.1,,,,263.4,517.6,459.0,355.4,230.6,,,,6.1,-1.0,-10.9,-30.7,-6.6,,,,8.3,7.6,-1.4,-30.2,9.3,,,,0.9,-2.9,-27.7,-58.2,5.4,,,,-15.7,-16.7,-13.7,-17.2,-0.9,,,,99.5,95.4,87.0,68.4,97.1,,,,44.7,60.4,76.6,70.3,60.9,,,,39.9,52.8,64.4,51.1,41.5,,,,112.0,124.9,119.9,111.0,106.7,,,,123.6,125.3,117.2,111.9,119.7,,,,5904.0,4588.0,5307.0,3612.0,5778.0,,,,6185.0,6629.0,5874.0,4088.0,5689.0,,,,183.6,148.5,262.9,163.1,173.4,,,,322.6,362.8,237.3,282.5,429.4,,,,-6.7,5.1,8.1,3.8,-10.8,,,,-10.1,1.2,6.0,7.6,-8.2,,,,508514.0,563392.0,796894.0,612132.0,489524.0,,,,225775.0,359488.0,252326.0,193893.0,215888.0,,,,7720.4,7604.7,7467.4,7396.8,7489.6,,,,19542.0,18378.0,16508.0,15904.0,14936.0,46978.0,1,0,0,0


In [None]:
# Calculate number of available attributes for each forecast horizon step
non_nan_counts = []

# Iterate over each row in the DataFrame
for index, row in splits_test['test'][0].iterrows():
    # Count non-NaN values in the current row and append to the list
    non_nan_counts.append(row.count())

# Convert the list to a Series
non_nan_counts_series = pd.Series(non_nan_counts, index=splits_test['test'][0].index)

print(non_nan_counts_series - 3) #subtract 'Year', 'Quarter' and 'NCN_sales_est'

53    293
54    257
55    221
56    185
dtype: int64


## Regression test with NaN values in validation and test

In [None]:
regressors = {
    "Ridge": Ridge(),
    "Elastic Net": ElasticNet(),
    "Random Forest": RandomForestRegressor(),
    "K-Nearest Neighbors": KNeighborsRegressor(),
    "XGBoost": XGBRegressor(),
}

### Validation

In [None]:
# ITERATIONS WITH RANDOM SEEDS
reg_names_nan = []
cv_MAPE_nan = []
cv_R2_nan = []
cv_RMSE_nan = []
cv_folds_nan = []
all_preds = {}  # To store predictions for each regressor
preds_and_actuals = []

random_seeds = [42, 24, 99, 2024, 69, 1989, 2, 1972, 3, 16]

for seed in random_seeds:
    print(f"Random Seed: {seed}")
    f = 1

    for train, val in zip(splits_val['train'], splits_val['val']):
        print('Fold: ', f)

        X_train = train.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_train = train['NCN_sales_est']
        X_val = val.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_val = val['NCN_sales_est']

        sc = MinMaxScaler()
        X_train = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
        X_val = pd.DataFrame(sc.transform(X_val), columns=X_val.columns)

        for reg_name, reg in regressors.items():
            reg_names_nan.append(reg_name)

            y_val_pred = []

            for i in range(0, len(X_val)):
                X_val_test = X_val.iloc[i]
                X_val_test = X_val_test.to_frame().T
                nan_columns = X_val_test.columns[X_val_test.isna().any()].tolist()
                X_val_clean = X_val_test.drop(columns=nan_columns)
                X_train_clean = X_train.drop(columns=nan_columns)

                # Skip setting random seed for this model (not a parameter)
                if reg_name not in ['K-Nearest Neighbors']:
                    reg.set_params(random_state=seed)  # Set random seed

                reg.fit(X_train_clean, y_train)
                prediction = reg.predict(X_val_clean)
                y_val_pred.append(prediction)

            y_val_pred = [val[0] for val in y_val_pred]  # Flatten

            rmse = mean_squared_error(y_val, y_val_pred, squared=False)
            mape = mean_absolute_percentage_error(y_val, y_val_pred)
            r2 = r2_score(y_val, y_val_pred)
            cv_MAPE_nan.append(mape)
            cv_R2_nan.append(r2)
            cv_RMSE_nan.append(rmse)
            cv_folds_nan.append(i)

            if reg_name not in all_preds:
                all_preds[reg_name] = []
            all_preds[reg_name].append(np.array(y_val_pred))

            preds_and_actuals.append({
                'reg_name': reg_name,
                'train_preds': np.array(reg.predict(X_train_clean)),
                'val_preds': np.array(y_val_pred),
                'train_actuals': np.array(y_train),
                'val_actuals': np.array(y_val)
            })

            print(reg_name)
            print('RMSE:', rmse)
            print('MAPE:', mape)
            print('R2:', r2)

        f += 1
        print('###')
        print()

# Convert the lists of predictions to numpy arrays for each regressor
for reg_name, preds_list in all_preds.items():
    all_preds[reg_name] = np.array(preds_list)


# Convert the list of predictions to a numpy array
preds_and_actuals = np.array(preds_and_actuals)

Random Seed: 42
Fold:  1
Ridge
RMSE: 6511.022443025221
MAPE: 0.0787959907789991
R2: 0.684239014170353
Elastic Net
RMSE: 9448.636233446532
MAPE: 0.11350114356092367
R2: 0.33503567978888993
Random Forest
RMSE: 10775.565716134119
MAPE: 0.11474433091118126
R2: 0.13515108615929117
K-Nearest Neighbors
RMSE: 10305.755831087792
MAPE: 0.12889309805536187
R2: 0.20892113439393145
XGBoost
RMSE: 8527.701857104363
MAPE: 0.12171049263665037
R2: 0.4583433228501751
###

Fold:  2
Ridge
RMSE: 5173.236062068044
MAPE: 0.0784503050687834
R2: 0.7268056354964542
Elastic Net
RMSE: 8671.769327346743
MAPE: 0.11779400174598098
R2: 0.2323511960198048
Random Forest
RMSE: 15154.12907392817
MAPE: 0.16982411904631595
R2: -1.344277977413907
K-Nearest Neighbors
RMSE: 5861.618585339719
MAPE: 0.09342737283259579
R2: 0.649262457393054
XGBoost
RMSE: 16793.28337724029
MAPE: 0.21498348068720893
R2: -1.8788456240364382
###

Fold:  3
Ridge
RMSE: 6188.580709854844
MAPE: 0.0899838653173774
R2: 0.8279627713852392
Elastic Net
RMSE:

In [None]:
# Calculate MAPE for each fold
mape_per_fold = []

for fold_data in preds_and_actuals:
    reg_name = fold_data['reg_name']
    train_preds = fold_data['train_preds']
    val_preds = fold_data['val_preds']
    train_actuals = fold_data['train_actuals']
    val_actuals = fold_data['val_actuals']

    train_mape = mean_absolute_percentage_error(train_actuals, train_preds)
    val_mape = mean_absolute_percentage_error(val_actuals, val_preds)

    mape_per_fold.append({
        'reg_name': reg_name,
        'train_mape': train_mape,
        'val_mape': val_mape
    })

# Display MAPE for each fold
for fold_data in mape_per_fold:
    print(f"Regressor: {fold_data['reg_name']}")
    print(f"Train MAPE: {fold_data['train_mape']}")
    print(f"Validation MAPE: {fold_data['val_mape']}")
    print("###")

Regressor: Ridge
Train MAPE: 0.05574999292123823
Validation MAPE: 0.0787959907789991
###
Regressor: Elastic Net
Train MAPE: 0.15150235132121054
Validation MAPE: 0.11350114356092367
###
Regressor: Random Forest
Train MAPE: 0.07375346408692625
Validation MAPE: 0.11474433091118126
###
Regressor: K-Nearest Neighbors
Train MAPE: 0.16729427070100067
Validation MAPE: 0.12889309805536187
###
Regressor: XGBoost
Train MAPE: 7.198723520677984e-08
Validation MAPE: 0.12171049263665037
###
Regressor: Ridge
Train MAPE: 0.055624729191632874
Validation MAPE: 0.0784503050687834
###
Regressor: Elastic Net
Train MAPE: 0.15169193465179084
Validation MAPE: 0.11779400174598098
###
Regressor: Random Forest
Train MAPE: 0.0754630230771723
Validation MAPE: 0.16982411904631595
###
Regressor: K-Nearest Neighbors
Train MAPE: 0.16452948171462986
Validation MAPE: 0.09342737283259579
###
Regressor: XGBoost
Train MAPE: 7.424912864819501e-08
Validation MAPE: 0.21498348068720893
###
Regressor: Ridge
Train MAPE: 0.0546956

In [None]:
# Initialize dictionaries to store mean MAPEs for train and validation sets
mean_train_mape_per_reg = {}
mean_val_mape_per_reg = {}

# Loop through each regressor
for reg_name in regressors.keys():
    # Initialize lists to store MAPEs for each fold
    train_mapes = []
    val_mapes = []

    # Loop through each fold data
    for fold_data in mape_per_fold:
        if fold_data['reg_name'] == reg_name:
            train_mapes.append(fold_data['train_mape'])
            val_mapes.append(fold_data['val_mape'])

    # Calculate mean MAPEs for train and validation sets
    mean_train_mape = np.mean(train_mapes)
    mean_val_mape = np.mean(val_mapes)

    # Store mean MAPEs in dictionaries
    mean_train_mape_per_reg[reg_name] = mean_train_mape
    mean_val_mape_per_reg[reg_name] = mean_val_mape

# Display mean MAPEs for train and validation sets
for reg_name in regressors.keys():
    print(f"Regressor: {reg_name}")
    print(f"Mean Train MAPE: {mean_train_mape_per_reg[reg_name]}")
    print(f"Mean Validation MAPE: {mean_val_mape_per_reg[reg_name]}")
    print("###")


Regressor: Ridge
Mean Train MAPE: 0.05495962930487166
Mean Validation MAPE: 0.058393196156307316
###
Regressor: Elastic Net
Mean Train MAPE: 0.15276239342222678
Mean Validation MAPE: 0.14859821807385598
###
Regressor: Random Forest
Mean Train MAPE: 0.07194397587350429
Mean Validation MAPE: 0.1539173947900482
###
Regressor: K-Nearest Neighbors
Mean Train MAPE: 0.16098642006616684
Mean Validation MAPE: 0.1642259327390098
###
Regressor: XGBoost
Mean Train MAPE: 7.170221795029363e-08
Mean Validation MAPE: 0.17235031525747443
###


In [None]:
# Initialize a dictionary to store mean MAPEs for train and validation sets
reg_results_data = {'Regressor': [], 'Mean Train MAPE': [], 'Mean Validation MAPE': []}

# Loop through each regressor
for reg_name in regressors.keys():
    # Initialize lists to store MAPEs for each fold
    train_mapes = []
    val_mapes = []

    # Loop through each fold data
    for fold_data in mape_per_fold:
        if fold_data['reg_name'] == reg_name:
            train_mapes.append(fold_data['train_mape'])
            val_mapes.append(fold_data['val_mape'])

    # Calculate mean MAPEs for train and validation sets
    mean_train_mape = np.mean(train_mapes)
    mean_val_mape = np.mean(val_mapes)

    # Store results in the dictionary
    reg_results_data['Regressor'].append(reg_name)
    reg_results_data['Mean Train MAPE'].append(round(mean_train_mape*100, 2))
    reg_results_data['Mean Validation MAPE'].append(round(mean_val_mape*100, 2))

# Create a DataFrame from the dictionary
reg_results = pd.DataFrame(reg_results_data)

# Display the DataFrame
reg_results


Unnamed: 0,Regressor,Mean Train MAPE,Mean Validation MAPE
0,Ridge,5.5,5.84
1,Elastic Net,15.28,14.86
2,Random Forest,7.19,15.39
3,K-Nearest Neighbors,16.1,16.42
4,XGBoost,0.0,17.24


In [None]:
reg_results_sort = reg_results.sort_values(by=('Mean Validation MAPE'), ascending=True)
reg_results_sort

Unnamed: 0,Regressor,Mean Train MAPE,Mean Validation MAPE
0,Ridge,5.5,5.84
1,Elastic Net,15.28,14.86
2,Random Forest,7.19,15.39
3,K-Nearest Neighbors,16.1,16.42
4,XGBoost,0.0,17.24


In [None]:
# Dataframe for all error metrics
df_results_nan = pd.DataFrame()
df_results_nan['fold'] = cv_folds_nan
df_results_nan['regressor'] = reg_names_nan
df_results_nan['RMSE'] = cv_RMSE_nan
df_results_nan['MAPE'] = cv_MAPE_nan
df_results_nan['R2'] = cv_R2_nan

In [None]:
df_results_agg_nan = df_results_nan.groupby('regressor').agg({'RMSE': ['mean'], 'MAPE': ['mean'], 'R2': ['mean']})
df_results_agg_nan = df_results_agg_nan.sort_values(by=('MAPE', 'mean'), ascending=True)
df_results_agg_nan

Unnamed: 0_level_0,RMSE,MAPE,R2
Unnamed: 0_level_1,mean,mean,mean
regressor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Ridge,4780.021936,0.058393,0.74411
Elastic Net,12109.37895,0.148598,-0.79401
Random Forest,13806.725537,0.153917,-1.492176
K-Nearest Neighbors,12807.132048,0.164226,-1.519523
XGBoost,13594.553963,0.17235,-1.576266


### Test

In [None]:
# ITERATIONS WITH RANDOM SEEDS
reg_names_nan = []
test_MAPE_nan = []
test_R2_nan = []
test_RMSE_nan = []
test_folds_nan = []
all_preds_test = {}  # To store predictions for each regressor
preds_and_actuals_test = []

random_seeds = [42, 24, 99, 2024, 69, 1989, 2, 1972, 3, 16]

for seed in random_seeds:
    print(f"Random Seed: {seed}")
    f = 1

    for train, test in zip(splits_test['train'], splits_test['test']):
        print('Fold: ', f)

        X_train = train.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_train = train['NCN_sales_est']
        X_test = test.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_test = test['NCN_sales_est']

        sc = MinMaxScaler()
        X_train = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
        X_test = pd.DataFrame(sc.transform(X_test), columns=X_test.columns)

        for reg_name, reg in regressors.items():
            reg_names_nan.append(reg_name)

            y_test_pred = []

            for i in range(0, len(X_test)):
                X_test_test = X_test.iloc[i]
                X_test_test = X_test_test.to_frame().T
                nan_columns = X_test_test.columns[X_test_test.isna().any()].tolist()
                X_test_clean = X_test_test.drop(columns=nan_columns)
                X_train_clean = X_train.drop(columns=nan_columns)

                # Skip setting random seed for this model
                if reg_name not in ['K-Nearest Neighbors']:
                    reg.set_params(random_state=seed)  # Set random seed

                reg.fit(X_train_clean, y_train)
                prediction = reg.predict(X_test_clean)
                y_test_pred.append(prediction)

            y_test_pred = [test[0] for test in y_test_pred]  # Flatten

            rmse = mean_squared_error(y_test, y_test_pred, squared=False)
            mape = mean_absolute_percentage_error(y_test, y_test_pred)
            r2 = r2_score(y_test, y_test_pred)
            test_MAPE_nan.append(mape)
            test_R2_nan.append(r2)
            test_RMSE_nan.append(rmse)
            test_folds_nan.append(i)

            if reg_name not in all_preds_test:
                all_preds_test[reg_name] = []
            all_preds_test[reg_name].append(np.array(y_test_pred))

            preds_and_actuals_test.append({
                'reg_name': reg_name,
                'train_preds': np.array(reg.predict(X_train_clean)),
                'test_preds': np.array(y_test_pred),
                'train_actuals': np.array(y_train),
                'test_actuals': np.array(y_test)
            })

            print(reg_name)
            print('RMSE:', rmse)
            print('MAPE:', mape)
            print('R2:', r2)

        f += 1
        print('###')
        print()

# Convert the lists of predictions to numpy arrays for each regressor
for reg_name, preds_list in all_preds_test.items():
    all_preds_test[reg_name] = np.array(preds_list)


# Convert the list of predictions to a numpy array
preds_and_actuals_test = np.array(preds_and_actuals_test)

Random Seed: 42
Fold:  1
Ridge
RMSE: 5914.078764651401
MAPE: 0.06390820477782191
R2: 0.458033076208026
Elastic Net
RMSE: 15894.429142723633
MAPE: 0.20949905471259012
R2: -2.9146094766199475
Random Forest
RMSE: 16838.21507638354
MAPE: 0.20195888414051028
R2: -3.393298164098142
K-Nearest Neighbors
RMSE: 17075.45243792972
MAPE: 0.22742483968498794
R2: -3.5179665731654586
XGBoost
RMSE: 16267.737364471763
MAPE: 0.18212524633857824
R2: -3.10065165877202
###

Fold:  2
Ridge
RMSE: 2605.8798325634266
MAPE: 0.03365260507537943
R2: 0.8763153285656118
Elastic Net
RMSE: 14651.821460707943
MAPE: 0.18931934340039597
R2: -2.9101223415520985
Random Forest
RMSE: 16942.046859516417
MAPE: 0.20570374783273696
R2: -4.228039585869501
K-Nearest Neighbors
RMSE: 18375.346031843867
MAPE: 0.24379119722839765
R2: -5.150043143723954
XGBoost
RMSE: 15124.872242038477
MAPE: 0.1979515140945559
R2: -3.166683759901468
###

Fold:  3
Ridge
RMSE: 2487.5835739096437
MAPE: 0.02358680753399451
R2: 0.8910440921724191
Elastic Ne

In [None]:
# Calculate MAPE for each fold
mape_per_fold_test = []

for fold_data in preds_and_actuals_test:
    reg_name = fold_data['reg_name']
    train_preds = fold_data['train_preds']
    test_preds = fold_data['test_preds']
    train_actuals = fold_data['train_actuals']
    test_actuals = fold_data['test_actuals']

    train_mape = mean_absolute_percentage_error(train_actuals, train_preds)
    test_mape = mean_absolute_percentage_error(test_actuals, test_preds)

    mape_per_fold_test.append({
        'reg_name': reg_name,
        'train_mape': train_mape,
        'test_mape': test_mape
    })

# Display MAPE for each fold
for fold_data in mape_per_fold_test:
    print(f"Regressor: {fold_data['reg_name']}")
    print(f"Train MAPE: {fold_data['train_mape']}")
    print(f"Test MAPE: {fold_data['test_mape']}")
    print("###")

Regressor: Ridge
Train MAPE: 0.055605802835355336
Test MAPE: 0.06390820477782191
###
Regressor: Elastic Net
Train MAPE: 0.15010548699806145
Test MAPE: 0.20949905471259012
###
Regressor: Random Forest
Train MAPE: 0.07391977381129979
Test MAPE: 0.20195888414051028
###
Regressor: K-Nearest Neighbors
Train MAPE: 0.1577666190015726
Test MAPE: 0.22742483968498794
###
Regressor: XGBoost
Train MAPE: 6.615298308638667e-08
Test MAPE: 0.18212524633857824
###
Regressor: Ridge
Train MAPE: 0.055496006202434674
Test MAPE: 0.03365260507537943
###
Regressor: Elastic Net
Train MAPE: 0.1541614504776757
Test MAPE: 0.18931934340039597
###
Regressor: Random Forest
Train MAPE: 0.07095576990834164
Test MAPE: 0.20570374783273696
###
Regressor: K-Nearest Neighbors
Train MAPE: 0.1581755626955348
Test MAPE: 0.24379119722839765
###
Regressor: XGBoost
Train MAPE: 6.50102008599842e-08
Test MAPE: 0.1979515140945559
###
Regressor: Ridge
Train MAPE: 0.05356129956567891
Test MAPE: 0.02358680753399451
###
Regressor: Elas

In [None]:
# Initialize dictionaries to store mean MAPEs for train and test sets
mean_train_mape_per_reg = {}
mean_test_mape_per_reg = {}

# Loop through each regressor
for reg_name in regressors.keys():
    # Initialize lists to store MAPEs for each fold
    train_mapes = []
    test_mapes = []

    # Loop through each fold data
    for fold_data in mape_per_fold_test:
        if fold_data['reg_name'] == reg_name:
            train_mapes.append(fold_data['train_mape'])
            test_mapes.append(fold_data['test_mape'])

    # Calculate mean MAPEs for train and test sets
    mean_train_mape = np.mean(train_mapes)
    mean_test_mape = np.mean(test_mapes)

    # Store mean MAPEs in dictionaries
    mean_train_mape_per_reg[reg_name] = mean_train_mape
    mean_test_mape_per_reg[reg_name] = mean_test_mape

# Display mean MAPEs for train and test sets
for reg_name in regressors.keys():
    print(f"Regressor: {reg_name}")
    print(f"Mean Train MAPE: {mean_train_mape_per_reg[reg_name]}")
    print(f"Mean Test MAPE: {mean_test_mape_per_reg[reg_name]}")
    print("###")

Regressor: Ridge
Mean Train MAPE: 0.054096943345458726
Mean Test MAPE: 0.08958994275166202
###
Regressor: Elastic Net
Mean Train MAPE: 0.1589985931988386
Mean Test MAPE: 0.14377738319960487
###
Regressor: Random Forest
Mean Train MAPE: 0.07031893978048716
Mean Test MAPE: 0.19051333730269232
###
Regressor: K-Nearest Neighbors
Mean Train MAPE: 0.1579659305968789
Mean Test MAPE: 0.18382627637770946
###
Regressor: XGBoost
Mean Train MAPE: 7.44204480772955e-08
Mean Test MAPE: 0.18378702299780497
###


In [None]:
# Initialize a dictionary to store mean MAPEs for train and test sets
reg_results_data_test = {'Regressor': [], 'Mean Train MAPE': [], 'Mean Test MAPE': []}

# Loop through each regressor
for reg_name in regressors.keys():
    # Initialize lists to store MAPEs for each fold
    train_mapes = []
    test_mapes = []

    # Loop through each fold data
    for fold_data in mape_per_fold_test:
        if fold_data['reg_name'] == reg_name:
            train_mapes.append(fold_data['train_mape'])
            test_mapes.append(fold_data['test_mape'])

    # Calculate mean MAPEs for train and test sets
    mean_train_mape = np.mean(train_mapes)
    mean_test_mape = np.mean(test_mapes)

    # Store results in the dictionary
    reg_results_data_test['Regressor'].append(reg_name)
    reg_results_data_test['Mean Train MAPE'].append(round(mean_train_mape*100, 2))
    reg_results_data_test['Mean Test MAPE'].append(round(mean_test_mape*100, 2))

# Create a DataFrame from the dictionary
reg_results_test = pd.DataFrame(reg_results_data_test)

# Display the DataFrame
reg_results_test

Unnamed: 0,Regressor,Mean Train MAPE,Mean Test MAPE
0,Ridge,5.41,8.96
1,Elastic Net,15.9,14.38
2,Random Forest,7.03,19.05
3,K-Nearest Neighbors,15.8,18.38
4,XGBoost,0.0,18.38


### Results

In [None]:
reg_results_besttest = reg_results.merge(reg_results_test[['Regressor', 'Mean Test MAPE']], on='Regressor', how='left')
reg_results_besttest = reg_results_besttest.sort_values(by=('Mean Test MAPE'), ascending=True)
reg_results_besttest = reg_results_besttest.reset_index(drop=True)
reg_results_besttest

Unnamed: 0,Regressor,Mean Train MAPE,Mean Validation MAPE,Mean Test MAPE
0,Ridge,5.5,5.84,8.96
1,Elastic Net,15.28,14.86,14.38
2,K-Nearest Neighbors,16.1,16.42,18.38
3,XGBoost,0.0,17.24,18.38
4,Random Forest,7.19,15.39,19.05


# HPO

In [None]:
random_state = 0

In [None]:
# Labels of regression models
names = [
    "Ridge",
    "Elastic Net",
    "Random Forest",
    "K-Nearest Neighbors",
    "XGBoost"
]

# Default/baseline regression model classes
regression_models = [
    Ridge(),
    ElasticNet(),
    RandomForestRegressor(),
    KNeighborsRegressor(),
    XGBRegressor()
]

In [None]:
# Define individual search spaces manually
regression_search_spaces = [
    # Ridge
    {
        'alpha': hp.uniform('alpha', 0.01, 200.0),
        'fit_intercept': hp.choice('fit_intercept', [True, False]),
        'solver': hp.choice('solver', ['auto']),
        'max_iter': hp.choice('max_iter', range(100, 2000, 10)),
        'tol': hp.uniform('tol', 1e-6, 1e-3),
        'random_state': random_state
    },

    # Elastic Net
    {
       'alpha': hp.uniform('alpha', 0.01, 200.0),
       'copy_X': hp.choice('copy_X', [True, False]),
       'l1_ratio': hp.uniform('l1_ratio', 0, 1),
       'fit_intercept': hp.choice('fit_intercept', [True, False]),
       'max_iter': hp.choice('max_iter', range(100, 2000, 10)),
       'tol': hp.uniform('tol', 1e-6, 1e-3),
       'random_state': random_state
   },

    # Random Forest
    {
        'max_depth': hp.choice('max_depth', range(1, 40)),
        'min_samples_split': hp.uniform('min_samples_split', 0.0, 1.0),
        'max_leaf_nodes': hp.choice('max_leaf_nodes', range(2, 50)),
        'n_estimators': hp.choice('n_estimators', range(50, 250, 1)),
        'random_state': random_state
    },

    # K-Nearest Neighbors
    {
        'n_neighbors': hp.choice('n_neighbors', range(2, 20, 1)),
        'p': hp.choice('p', [1, 5]),
        'weights': hp.choice('weights', ['uniform', 'distance']),
        'algorithm': hp.choice('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
        'leaf_size': hp.choice('leaf_size', range(10, 40, 1)),
        'metric': hp.choice('metric', ['minkowski', 'euclidean', 'manhattan'])
    },

    # XGBoost
    {
        'learning_rate': hp.uniform('learning_rate', 0.001, 0.1),
        'reg_lambda': hp.uniform('reg_lambda', 0.01, 1),
        'reg_alpha': hp.uniform('reg_alpha', 0.01, 1),
        'updater': hp.choice('updater', ['shotgun', 'coord_descent']),
        'feature_selector': hp.choice('feature_selector', ['cyclic', 'shuffle']),
        'booster': hp.choice('booster', ['gblinear']),
        'random_state': random_state
    }

]



### Loop

In [None]:
ITERATIONS = 300

In [None]:
# Initialize a dataframe for results collection
hpo_results = pd.DataFrame(columns = ['regressor_name', 'HPO train MAPE', 'Train std', 'HPO valid MAPE',
                                      'Valid std', 'HPO test MAPE', 'Test std', 'runtime_hpo', 'best_params',
                                      'FH_MAPEs', 'train_MAPE_per_split', 'cv_MAPE_per_split', 'test_MAPE_per_split'])

# Define the best baseline score
best_baseline_score = reg_results_besttest.loc[0, 'Mean Test MAPE']

# Define how many iterations should be done
n_trials = ITERATIONS
SEED = 1972
# All individual trials will be saved here
trials_dict = {}

# Start the timer to measure the runtime of the entire pipeline
hpo_time_start = time.time()

# Run the HPO and get the best params for each regressor
for i in range(len(reg_results_besttest)):

    # To improve the readability of code, creating the following objects:
    regressor_name = reg_results_besttest.loc[i, 'Regressor']
    regressor_class = regression_models[names.index(regressor_name)] # fetch from the list not df for baseline
    regressor_search_space = regression_search_spaces[names.index(regressor_name)]

    # To improve the readability of output:
    print()
    print('----------------------------------------------------------------------')
    print(f'Using *{regressor_name}* for estimation.')
    print(regressor_search_space)
    print('----------------------------------------------------------------------')

    # A objective function for receiving the CV and train scores for each model
    def hyperopt_cv_score(params):
        cv_MAPE_nan = []
        train_MAPE_nan = []

        if 'n_estimators' in params:
            # Convert 'n_estimators' to an integer
            params['n_estimators'] = int(params['n_estimators'])

        for train, val in zip(splits_val['train'], splits_val['val']):
            X_train = train.drop(columns=['NCN_sales_est', 'Year', 'Quarter'], axis=1)
            y_train = train['NCN_sales_est']
            X_val = val.drop(columns=['NCN_sales_est', 'Year', 'Quarter'], axis=1)
            y_val = val['NCN_sales_est']
            sc = MinMaxScaler()
            X_train = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
            X_val = pd.DataFrame(sc.transform(X_val), columns=X_val.columns)

            y_val_pred = []

            model = regressor_class.set_params(**params)  # use the regressor from the list

            for i in range(0, len(X_val)):
                X_val_test = X_val.iloc[i]
                X_val_test = X_val_test.to_frame().T
                nan_columns = X_val_test.columns[X_val_test.isna().any()].tolist()
                X_val_clean = X_val_test.drop(columns=nan_columns)
                X_train_clean = X_train.drop(columns=nan_columns)
                model.fit(X_train_clean, y_train)
                prediction = model.predict(X_val_clean)
                y_val_pred.append(prediction)

            y_val_pred = [val[0] for val in y_val_pred]  # Flatten

            mape = mean_absolute_percentage_error(y_val, y_val_pred)

            cv_MAPE_nan.append(mape)

            # Calculate MAPE on the training set
            y_train_pred = model.predict(X_train_clean)
            train_mape = mean_absolute_percentage_error(y_train, y_train_pred)
            train_MAPE_nan.append(train_mape)

        mean_mape = np.mean(cv_MAPE_nan)
        mean_train_mape = np.mean(train_MAPE_nan)

        return mean_mape, mean_train_mape, cv_MAPE_nan, train_MAPE_nan

    # A helper function for finding the best model
    def f(params):
        global best_cv_score
        global best_train_score
        global best_params
        global best_time
        global best_cv_MAPEs
        global best_train_MAPEs

        cv_score, train_score, cv_MAPE_nan, train_MAPE_nan = hyperopt_cv_score(params)

        if cv_score < best_cv_score:
          best_cv_score = cv_score
          best_train_score = train_score
          best_params = params
          best_time = round(time.time() - start_time, 4)  # track how much time it took to find the best params
          # Store the MAPEs for the best model
          best_cv_MAPEs = cv_MAPE_nan
          best_train_MAPEs = train_MAPE_nan

          print(f'Better CV score: {best_cv_score}')
          print(f'Better train score: {best_train_score}')
          print(f'Parameter combination: {best_params}')
          print(f'Time until beating the baseline: {best_time}s')

        return {'loss': cv_score,
                'status': STATUS_OK}

    # Defining global variables to be updated
    best_cv_score = float('inf')
    best_train_score = float('inf')
    best_params = None # best hyperparameter combination
    best_time = 0 # runtime until the best CV score is computed
    best_cv_MAPEs = []  # Added to store the best CV MAPE
    best_train_MAPEs = []  # Added to store the best training MAPE

    trials = Trials() # store info at each step

    # Start running the algorithm and track time
    start_time = time.time()
    ## Hyperopt function
    best = fmin(f,regressor_search_space, # use the search space associated with the regressor
                algo = tpe.suggest,
                max_evals = n_trials, # how many evaluations?
                trials = trials,
                rstate=np.random.default_rng(SEED))
    # Save all trials
    trials_dict[regressor_name] = trials # save into regressor-trials
    print()
    print('######################################################################')
    # Print the summary of the best results
    print(f'Best training score in {n_trials} iterations: {best_train_score}.')
    print(f'Best validation score in {n_trials} iterations: {best_cv_score} ({best_time}s until found).')


    # Initialize an empty list to store test MAPE for each fold
    test_mape_list = []
    fh_mapes = []

    # Iterate through each fold in splits_test
    for fold_train, fold_test in zip(splits_test['train'], splits_test['test']):
        X_train_fold = fold_train.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_train_fold = fold_train['NCN_sales_est']
        X_test_fold = fold_test.drop(columns=['NCN_sales_est','Year', 'Quarter'], axis=1)
        y_test_fold = fold_test['NCN_sales_est']

        # Scale to [0,1] range using the same scaler as in training
        X_train_fold_scaled = pd.DataFrame(sc.transform(X_train_fold), columns=X_train_fold.columns)
        X_test_fold_scaled = pd.DataFrame(sc.transform(X_test_fold), columns=X_test_fold.columns)

        # Make predictions for the current fold
        y_test_pred_fold = []
        for i in range(len(X_test_fold_scaled)):
            X_test_nan_fold = X_test_fold_scaled.iloc[i]
            X_test_nan_fold = X_test_nan_fold.to_frame().T
            nan_columns_fold = X_test_nan_fold.columns[X_test_nan_fold.isna().any()].tolist()
            X_test_clean_fold = X_test_nan_fold.drop(columns=nan_columns_fold)
            X_train_clean_fold = X_train_fold_scaled.drop(columns=nan_columns_fold)
            regressor_instance = regressor_class.set_params(**best_params)
            regressor_instance.fit(X_train_clean_fold, y_train_fold)
            prediction_fold = regressor_instance.predict(X_test_clean_fold)
            y_test_pred_fold.append(prediction_fold[0])

        # Calculate MAPE for the current fold
        test_mape_fold = mean_absolute_percentage_error(y_test_fold, y_test_pred_fold)
        test_mape_list.append(test_mape_fold)
        # Initialize an empty list to store the absolute percentage errors
        apes_split = []

        # Iterate over each pair of predicted and actual values
        for pred, act in zip(y_test_pred_fold, y_test_fold):
            # Calculate the absolute percentage error
            abs_percent_error = abs((pred - act) / act)
            # Append the absolute percentage error to the list
            apes_split.append(abs_percent_error)
        fh_mapes.append(apes_split)

    # Calculate the mean MAPE across all folds
    test_mape_mean = np.mean(test_mape_list)
    test_mape_std = np.std(test_mape_list)
    # Calculate mean value for each member of the lists
    mean_values = [np.mean(member) for member in zip(*fh_mapes)]

    print(f'{regressor_name} MAPE for test data across all folds: {test_mape_mean}.')

    # Append the best results to the df
    hpo_results = hpo_results.append({'regressor_name': regressor_name,
                             'HPO train MAPE': best_train_score,
                             'Train std': np.std(best_train_MAPEs),
                             'HPO valid MAPE': best_cv_score,
                             'Valid std': np.std(best_cv_MAPEs),
                             'HPO test MAPE': test_mape_mean,
                             'Test std': test_mape_std,
                             'runtime_hpo': best_time,
                             'best_params': best_params,
                             'FH_MAPEs': mean_values,
                             'train_MAPE_per_split': best_train_MAPEs,
                             'cv_MAPE_per_split': best_cv_MAPEs,
                             'test_MAPE_per_split': test_mape_list}, ignore_index = True)
# Mark the end of the entire pipeline
hpo_time_end = time.time()
print()
print('######################################################################')
print(f'The duration of the entire HPO pipeline for {len(reg_results_besttest)} regressors across {n_trials} trials each: ')
print(f'{round(hpo_time_end - hpo_time_start, 5)} seconds')


----------------------------------------------------------------------
Using *Ridge* for estimation.
{'alpha': <hyperopt.pyll.base.Apply object at 0x7eabec3d86d0>, 'fit_intercept': <hyperopt.pyll.base.Apply object at 0x7eabec3db010>, 'solver': <hyperopt.pyll.base.Apply object at 0x7eabec3d8790>, 'max_iter': <hyperopt.pyll.base.Apply object at 0x7eabec3daef0>, 'tol': <hyperopt.pyll.base.Apply object at 0x7eabec3da080>, 'random_state': 0}
----------------------------------------------------------------------
Better CV score: 0.055474842830418786
Better train score: 0.042945627215358294
Parameter combination: {'alpha': 0.5457579345138338, 'fit_intercept': True, 'max_iter': 500, 'random_state': 0, 'solver': 'auto', 'tol': 0.0009659227193673929}
Time until beating the baseline: 0.6761s
Better CV score: 0.05547261000435553
Better train score: 0.042912587247484105
Parameter combination: {'alpha': 0.5448414388501107, 'fit_intercept': True, 'max_iter': 500, 'random_state': 0, 'solver': 'auto',

In [None]:
hpo_results

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,runtime_hpo,best_params,FH_MAPEs,train_MAPE_per_split,cv_MAPE_per_split,test_MAPE_per_split
0,Ridge,0.042876,0.000482,0.05547,0.024359,0.098656,0.056013,92.9519,"{'alpha': 0.5438252426248501, 'fit_intercept':...","[0.08261525279811104, 0.06980042020881116, 0.1...","[0.04317532704468448, 0.042872883135561674, 0....","[0.09653562498312972, 0.06985888342346794, 0.0...","[0.07654920307129107, 0.04588245032601229, 0.0..."
1,Elastic Net,0.033444,0.001317,0.055751,0.026746,0.105524,0.052839,254.9202,"{'alpha': 9.973108826045255, 'copy_X': True, '...","[0.08847214244434784, 0.07839945934019509, 0.1...","[0.03198652928251123, 0.032189130781534146, 0....","[0.11724506861407157, 0.07023870950289694, 0.0...","[0.09367995606956717, 0.07080361652676895, 0.0..."
2,K-Nearest Neighbors,0.0,0.0,0.148266,0.047779,0.186641,0.0431,1.0994,"{'algorithm': 'brute', 'leaf_size': 39, 'metri...","[0.18547693791215658, 0.22947100158692083, 0.2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.12798394574174418, 0.05815775988032873, 0.1...","[0.20924416332038512, 0.2362024093676182, 0.15..."
3,XGBoost,0.067642,0.001235,0.057717,0.021651,0.086134,0.045209,717.1597,"{'booster': 'gblinear', 'feature_selector': 'c...","[0.07883665244247692, 0.06773080034489458, 0.0...","[0.06743986537476192, 0.06881291584713467, 0.0...","[0.0703990174229799, 0.06667889811138855, 0.09...","[0.06114039102739344, 0.01854096454474325, 0.0..."
4,Random Forest,0.07649,0.002545,0.146966,0.028906,0.183533,0.031418,556.6586,"{'max_depth': 38, 'max_leaf_nodes': 21, 'min_s...","[0.19007065800038656, 0.20236809026495145, 0.1...","[0.07942972944919592, 0.0805806648357526, 0.07...","[0.10649519204948915, 0.12835186644190952, 0.1...","[0.17870582493116696, 0.19241889064618928, 0.1..."


In [None]:
# Sort the model results by test set MAPE
hpo_results = hpo_results.sort_values('HPO test MAPE', ascending=True).reset_index(drop=True)
hpo_results

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,runtime_hpo,best_params,FH_MAPEs,train_MAPE_per_split,cv_MAPE_per_split,test_MAPE_per_split
0,XGBoost,0.067642,0.001235,0.057717,0.021651,0.086134,0.045209,717.1597,"{'booster': 'gblinear', 'feature_selector': 'c...","[0.07883665244247692, 0.06773080034489458, 0.0...","[0.06743986537476192, 0.06881291584713467, 0.0...","[0.0703990174229799, 0.06667889811138855, 0.09...","[0.06114039102739344, 0.01854096454474325, 0.0..."
1,Ridge,0.042876,0.000482,0.05547,0.024359,0.098656,0.056013,92.9519,"{'alpha': 0.5438252426248501, 'fit_intercept':...","[0.08261525279811104, 0.06980042020881116, 0.1...","[0.04317532704468448, 0.042872883135561674, 0....","[0.09653562498312972, 0.06985888342346794, 0.0...","[0.07654920307129107, 0.04588245032601229, 0.0..."
2,Elastic Net,0.033444,0.001317,0.055751,0.026746,0.105524,0.052839,254.9202,"{'alpha': 9.973108826045255, 'copy_X': True, '...","[0.08847214244434784, 0.07839945934019509, 0.1...","[0.03198652928251123, 0.032189130781534146, 0....","[0.11724506861407157, 0.07023870950289694, 0.0...","[0.09367995606956717, 0.07080361652676895, 0.0..."
3,Random Forest,0.07649,0.002545,0.146966,0.028906,0.183533,0.031418,556.6586,"{'max_depth': 38, 'max_leaf_nodes': 21, 'min_s...","[0.19007065800038656, 0.20236809026495145, 0.1...","[0.07942972944919592, 0.0805806648357526, 0.07...","[0.10649519204948915, 0.12835186644190952, 0.1...","[0.17870582493116696, 0.19241889064618928, 0.1..."
4,K-Nearest Neighbors,0.0,0.0,0.148266,0.047779,0.186641,0.0431,1.0994,"{'algorithm': 'brute', 'leaf_size': 39, 'metri...","[0.18547693791215658, 0.22947100158692083, 0.2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.12798394574174418, 0.05815775988032873, 0.1...","[0.20924416332038512, 0.2362024093676182, 0.15..."


In [None]:
# Check whether MAPE value of optimised regressors beat the best baseline value

hpo_results['HPO train MAPE'] = (hpo_results['HPO train MAPE'] * 100).round(2)
hpo_results['Train std'] = (hpo_results['Train std'] * 100).round(2)
hpo_results['HPO valid MAPE'] = (hpo_results['HPO valid MAPE'] * 100).round(2)
hpo_results['Valid std'] = (hpo_results['Valid std'] * 100).round(2)
hpo_results['HPO test MAPE'] = (hpo_results['HPO test MAPE'] * 100).round(2)
hpo_results['Test std'] = (hpo_results['Test std'] * 100).round(2)

# Add a column which says if the regressor beat the baseline
hpo_results['beats_bl'] = np.where(hpo_results.loc[:,'HPO test MAPE'] < best_baseline_score, 'yes', 'no')

# See the HPO results
hpo_results

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,runtime_hpo,best_params,FH_MAPEs,train_MAPE_per_split,cv_MAPE_per_split,test_MAPE_per_split,beats_bl
0,XGBoost,6.76,0.12,5.77,2.17,8.61,4.52,717.1597,"{'booster': 'gblinear', 'feature_selector': 'c...","[0.07883665244247692, 0.06773080034489458, 0.0...","[0.06743986537476192, 0.06881291584713467, 0.0...","[0.0703990174229799, 0.06667889811138855, 0.09...","[0.06114039102739344, 0.01854096454474325, 0.0...",yes
1,Ridge,4.29,0.05,5.55,2.44,9.87,5.6,92.9519,"{'alpha': 0.5438252426248501, 'fit_intercept':...","[0.08261525279811104, 0.06980042020881116, 0.1...","[0.04317532704468448, 0.042872883135561674, 0....","[0.09653562498312972, 0.06985888342346794, 0.0...","[0.07654920307129107, 0.04588245032601229, 0.0...",no
2,Elastic Net,3.34,0.13,5.58,2.67,10.55,5.28,254.9202,"{'alpha': 9.973108826045255, 'copy_X': True, '...","[0.08847214244434784, 0.07839945934019509, 0.1...","[0.03198652928251123, 0.032189130781534146, 0....","[0.11724506861407157, 0.07023870950289694, 0.0...","[0.09367995606956717, 0.07080361652676895, 0.0...",no
3,Random Forest,7.65,0.25,14.7,2.89,18.35,3.14,556.6586,"{'max_depth': 38, 'max_leaf_nodes': 21, 'min_s...","[0.19007065800038656, 0.20236809026495145, 0.1...","[0.07942972944919592, 0.0805806648357526, 0.07...","[0.10649519204948915, 0.12835186644190952, 0.1...","[0.17870582493116696, 0.19241889064618928, 0.1...",no
4,K-Nearest Neighbors,0.0,0.0,14.83,4.78,18.66,4.31,1.0994,"{'algorithm': 'brute', 'leaf_size': 39, 'metri...","[0.18547693791215658, 0.22947100158692083, 0.2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.12798394574174418, 0.05815775988032873, 0.1...","[0.20924416332038512, 0.2362024093676182, 0.15...",no


In [None]:
bl_beaters = hpo_results[(hpo_results['beats_bl'] == 'yes')]
bl_beaters[bl_beaters['runtime_hpo'] == bl_beaters['runtime_hpo'].min()]

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,runtime_hpo,best_params,FH_MAPEs,train_MAPE_per_split,cv_MAPE_per_split,test_MAPE_per_split,beats_bl
0,XGBoost,6.76,0.12,5.77,2.17,8.61,4.52,717.1597,"{'booster': 'gblinear', 'feature_selector': 'c...","[0.07883665244247692, 0.06773080034489458, 0.0...","[0.06743986537476192, 0.06881291584713467, 0.0...","[0.0703990174229799, 0.06667889811138855, 0.09...","[0.06114039102739344, 0.01854096454474325, 0.0...",yes


### Final results

In [None]:
# Merge dataframes on regressor_name
merged_df = pd.merge(hpo_results, reg_results_besttest[['Regressor',	'Mean Train MAPE',	'Mean Validation MAPE',	'Mean Test MAPE']], left_on='regressor_name', right_on='Regressor', how='left')

# Add a column indicating whether HPO test_acc beats the baseline regression_r2
merged_df['improved_after_hpo'] = np.where(merged_df['HPO test MAPE'] < merged_df['Mean Test MAPE'], 'yes', 'no')

columns_to_drop = ['runtime_hpo', 'best_params', 'Regressor', 'FH_MAPEs', 'train_MAPE_per_split', 'cv_MAPE_per_split', 'test_MAPE_per_split']
merged_df = merged_df.drop(columns=columns_to_drop)

# Display the final dataframe
merged_df

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,beats_bl,Mean Train MAPE,Mean Validation MAPE,Mean Test MAPE,improved_after_hpo
0,XGBoost,6.76,0.12,5.77,2.17,8.61,4.52,yes,0.0,17.24,18.38,yes
1,Ridge,4.29,0.05,5.55,2.44,9.87,5.6,no,5.5,5.84,8.96,no
2,Elastic Net,3.34,0.13,5.58,2.67,10.55,5.28,no,15.28,14.86,14.38,yes
3,Random Forest,7.65,0.25,14.7,2.89,18.35,3.14,no,7.19,15.39,19.05,yes
4,K-Nearest Neighbors,0.0,0.0,14.83,4.78,18.66,4.31,no,16.1,16.42,18.38,no


In [None]:
baselines

Unnamed: 0.1,Unnamed: 0,sNaive valid,sNaive test,wNaive valid,wNaive test
0,Mean MAPE,0.1434,0.1721,0.148,0.1711
1,MAPE std,0.0633,0.0372,0.0653,0.0412
2,Mean RMSE,12002.2089,13204.6307,12630.405,12937.8088
3,Mean R2,-1.4037,-1.4762,-1.7852,-1.5385
4,FH MAPEs,***,"[0.1878773278197889, 0.18045408841835758, 0.17...",***,"[0.18873723128298953, 0.18269765563726237, 0.1..."


In [None]:
columns_to_convert = ['sNaive valid', 'sNaive test', 'wNaive valid', 'wNaive test']
baselines.loc[:3, columns_to_convert] = baselines.loc[:3, columns_to_convert].astype(float)

In [None]:
# Add sNaive and wNaive rows
snaive_row = {'regressor_name': 'sNaive', 'HPO train MAPE': '***', 'Train std': '***','HPO valid MAPE': round(baselines.iloc[0][1]*100,2), 'Valid std': round(baselines.iloc[1][1]*100,2), 'HPO test MAPE': round(baselines.iloc[0][2]*100,2),
              'Test std': round(baselines.iloc[1][2]*100,2), 'Mean Train MAPE': '***','Mean Validation MAPE': '***','Mean Test MAPE': '***','improved_after_hpo': '***'}
wnaive_row = {'regressor_name': 'wNaive', 'HPO train MAPE': '***', 'Train std': '***', 'HPO valid MAPE': round(baselines.iloc[0][3]*100,2), 'Valid std': round(baselines.iloc[1][3]*100,2), 'HPO test MAPE': round(baselines.iloc[0][4]*100,2),
              'Test std': round(baselines.iloc[1][4]*100,2), 'Mean Train MAPE': '***','Mean Validation MAPE': '***','Mean Test MAPE': '***','improved_after_hpo': '***'}
merged_df = pd.concat([pd.DataFrame([snaive_row, wnaive_row]), merged_df], ignore_index=True)

# Calculate 'formula*' for the added rows
merged_df.loc[merged_df['regressor_name'].isin(['sNaive', 'wNaive']), 'beats_bl'] = (
    merged_df['HPO test MAPE'] < best_baseline_score
).map({True: 'yes', False: 'no'})

# Insert 'beats_bl' column after 'HPO test MAPE'
merged_df.insert(7, 'beats_bl', merged_df.pop('beats_bl'))

merged_df

Unnamed: 0,regressor_name,HPO train MAPE,Train std,HPO valid MAPE,Valid std,HPO test MAPE,Test std,beats_bl,Mean Train MAPE,Mean Validation MAPE,Mean Test MAPE,improved_after_hpo
0,sNaive,***,***,14.34,6.33,17.21,3.72,no,***,***,***,***
1,wNaive,***,***,14.8,6.53,17.11,4.12,no,***,***,***,***
2,XGBoost,6.76,0.12,5.77,2.17,8.61,4.52,yes,0.0,17.24,18.38,yes
3,Ridge,4.29,0.05,5.55,2.44,9.87,5.6,no,5.5,5.84,8.96,no
4,Elastic Net,3.34,0.13,5.58,2.67,10.55,5.28,no,15.28,14.86,14.38,yes
5,Random Forest,7.65,0.25,14.7,2.89,18.35,3.14,no,7.19,15.39,19.05,yes
6,K-Nearest Neighbors,0.0,0.0,14.83,4.78,18.66,4.31,no,16.1,16.42,18.38,no


In [None]:
hpo_results.to_csv('reg_hpo_results_ar_300_v4.csv', index=False)
merged_df.to_csv('reg_results_merged_ar_300_v4.csv', index=False)