In [9]:
import os
import numpy as np
import pandas as pd

In [10]:
test_ids = np.empty((0, ))
test_labels = np.empty((0, 28))
test_forecasts = np.empty((0, 28))

for level in range(1, 13):
    level_dir = os.path.join("../result", f"level {level}")
    for root, dirs, files in os.walk(level_dir):
        for file in files:
            if file.startswith('test_labels'):
                with open(os.path.join(root, file), 'rb') as pickle_file:
                    datas = pd.read_pickle(pickle_file)
                    arrays = np.array(datas)
                    arrays = np.squeeze(arrays, axis=-1)
                    arrays = arrays[:, -28:]
                    test_labels = np.concatenate((test_labels, arrays), axis=0)
            if file.startswith('test_forecasts'):
                with open(os.path.join(root, file), 'rb') as pickle_file:
                    datas = pd.read_pickle(pickle_file)
                    ids = np.array([data.item_id for data in datas])
                    test_ids = np.concatenate((test_ids, ids), axis=0)
                    arrays = np.array([data.quantile(0.5) for data in datas])
                    test_forecasts = np.concatenate((test_forecasts, arrays), axis=0)

In [None]:
def calculate_wrmsse(y_true, y_pred):
    sales = pd.read_csv('../data/original/sales_train_validation.csv')
    sell_prices = pd.read_csv('../data/original/sell_prices.csv')

    sales = sales.iloc[:, 6:].values

    sell_prices['id'] = sell_prices['store_id'] + '_' + sell_prices['item_id']
    sell_prices = sell_prices[sell_prices['wm_yr_wk'] <= 11613]
    sell_prices = sell_prices.pivot(index='id', columns='wm_yr_wk', values='sell_price')
    sell_prices = sell_prices.values

    N, h = y_true.shape 
    w = sell_prices.shape[1]  

    daily_prices = np.repeat(sell_prices, repeats=7, axis=1)[:, -sales.shape[1]:]
    daily_prices = np.where(np.isnan(daily_prices), np.nan, daily_prices)
    
    squared_errors = np.mean((y_true - y_pred) ** 2, axis=1)
    scale = np.mean(np.diff(sales, axis=1) ** 2, axis=1)
    rmsse = np.sqrt(squared_errors / (scale + 1e-10))

    total_revenue = np.nansum(sales[:, -28:] * daily_prices[:, -28:], axis=1) 
    weight = total_revenue / np.nansum(total_revenue) 

    wrmsse = np.nansum(weight * rmsse)
    
    return wrmsse

In [None]:
calculate_wrmsse(test_labels, test_forecasts) # 12

In [None]:
from tqdm import tqdm
from scipy.linalg import pinv

def create_S(y_id):
    sales = pd.read_csv('../data/original/sales_train_validation.csv')
    sales['id'] = sales['id'].str.replace('_validation', '') # 30490

    states = sales['state_id'].unique()  
    stores = sales['store_id'].unique()
    cats = sales['cat_id'].unique()
    depts = sales['dept_id'].unique()
    states_cats = [f"{state}_{cat}" for state in states for cat in cats]
    states_depts = [f"{state}_{dept}" for state in states for dept in depts]
    stores_cats = [f"{store}_{cat}" for store in stores for cat in cats]
    stores_depts = [f"{store}_{dept}" for store in stores for dept in depts]
    items = sales['item_id'].unique()
    items_states = [f"{item}_{state}" for item in items for state in states]
    items_stores = [f"{item}_{store}" for item in items for store in stores]

    S = np.zeros((42840, 30490))

    for i, id in tqdm(enumerate(y_id), total=len(y_id)):
        if id == 'total':
            S[0, :] = 1            
        elif id in states:
            S[i, :] = sales['id'].isin(sales[sales['state_id'] == id]['id']).astype(int).values
        elif id in stores:
            S[i, :] = sales['id'].isin(sales[sales['store_id'] == id]['id']).astype(int).values
        elif id in cats:
            S[i, :] = sales['id'].isin(sales[sales['cat_id'] == id]['id']).astype(int).values
        elif id in depts:
            S[i, :] = sales['id'].isin(sales[sales['dept_id'] == id]['id']).astype(int).values
        elif id in states_cats:
            state, cat = id.split('_')
            S[i, :] = sales['id'].isin(sales[(sales['state_id'] == state) & (sales['cat_id'] == cat)]['id']).astype(int).values
        elif id in states_depts:
            splitted_id = id.split('_')
            state, dept = splitted_id[0], '_'.join(splitted_id[1:])
            S[i, :] = sales['id'].isin(sales[(sales['state_id'] == state) & (sales['dept_id'] == dept)]['id']).astype(int).values
        elif id in stores_cats:
            splitted_id = id.split('_')
            store, cat = '_'.join(splitted_id[:2]), splitted_id[2]
            S[i, :] = sales['id'].isin(sales[(sales['store_id'] == store) & (sales['cat_id'] == cat)]['id']).astype(int).values
        elif id in stores_depts:
            splitted_id = id.split('_')
            store, dept = '_'.join(splitted_id[:2]), '_'.join(splitted_id[2:])
            S[i, :] = sales['id'].isin(sales[(sales['store_id'] == store) & (sales['dept_id'] == dept)]['id']).astype(int).values
        elif id in items:
            S[i, :] = sales['id'].isin(sales[sales['item_id'] == id]['id']).astype(int).values
        elif id in items_states:
            splitted_id = id.split('_')
            item, state = '_'.join(splitted_id[:3]), '_'.join(splitted_id[3:])
            S[i, :] = sales['id'].isin(sales[(sales['item_id'] == item) & (sales['state_id'] == state)]['id']).astype(int).values
        elif id in items_stores:
            splitted_id = id.split('_')
            item, store = '_'.join(splitted_id[:3]), '_'.join(splitted_id[3:])
            S[i, :] = sales['id'].isin(sales[(sales['item_id'] == item) & (sales['store_id'] == store)]['id']).astype(int).values
        else:
            print(f"Error: {id} not found")

    return S

def compute_W(y_actual, y_pred):
    E = y_actual - y_pred
    W = (1 / (E.shape[1] - 1)) * (E @ E.T)
    return W

S = create_S(test_ids)
W = compute_W(test_labels, test_forecasts)

In [None]:
from scipy.linalg import inv
W_inv = inv(W)

In [None]:
def mint_reconciliation(S, W, y_pred):
    W_inv = pinv(W)
    P = S @ pinv(S @ W_inv @ S.T) @ S @ W_inv
    return P @ y_pred

test_forecasts_reconciled = mint_reconciliation(S, W, test_forecasts)

print("Before reconcilation:", test_forecasts[:10])
print("After reconcilation:", test_forecasts_reconciled[:10])