In [None]:
import pandas as pd

df_val = pd.read_csv('raw/sales_train_validation.csv', nrows=100)
df_val.head(1)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1


In [None]:
class AverageForecast:    
    def __init__(self, df_input):
        self.df_input = df_input.copy()
        day_cols = [col for col in df_input.columns if col.startswith('d_')]
        self.averages = pd.DataFrame({'mean':df_input[day_cols].mean(axis=1)})
        self.averages['item_id'] = df_input['item_id']
        self.averages['store_id'] = df_input['store_id']
        self.averages.set_index(['item_id', 'store_id'], inplace=True)

    def __call__(self, df_target):
        return ave.averages.merge(df_target, on=['item_id', 'store_id'], how='right').fillna(0.0)['mean']
        
ave = AverageForecast(df_val)
ave(df_val)

0     0.313643
1     0.257710
2     0.150549
3     1.718766
4     0.966545
        ...   
95    0.965499
96    0.711971
97    0.115003
98    4.151072
99    0.104025
Name: mean, Length: 100, dtype: float64

In [None]:
ave(pd.DataFrame({'item_id': ['not_existing'], 'store_id': ['not_existing']}))

0    0.0
Name: mean, dtype: float64

In [None]:
ave(pd.DataFrame({'item_id': ['HOBBIES_1_105'], 'store_id': ['CA_1']}))

0    0.0
Name: mean, dtype: float64

In [None]:
# TODO: it might be easier to predict a bunch of days first and then pack them into 'double-layered' struct requested
def submission_df_series(df, pred_fn, n_days=28, prefix='_validation'):
    ids = df['item_id'] + '_' + df['store_id'] + prefix
    date_col_names = [f'F{i}' for i in range(1,n_days + 1)]
    d = {'id': ids}
    ave = pred_fn(df)
    d.update({col: ave for col in date_col_names})
    return pd.DataFrame(d)

def submission_df(pred_fn, df_val, df_eval):
    assert not df_val is df_eval
    sub_df1 = submission_df_series(df_val, pred_fn, prefix='_validation')
    sub_df2 = submission_df_series(df_eval, pred_fn, prefix='_evaluation')
    sub_df = pd.concat([sub_df1, sub_df2])
    return sub_df

In [None]:
df_val = pd.read_csv('raw/sales_train_validation.csv')
df_eval = pd.read_csv('raw/sales_train_evaluation.csv')

In [None]:
ave = AverageForecast(df_val)

In [None]:
sub_df = submission_df(ave, df_val, df_eval)

In [None]:
sub_df.query('id == "HOBBIES_1_105_CA_1_validation"')

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
100,HOBBIES_1_105_CA_1_validation,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,...,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474,0.838474


In [None]:
fn = 'submissions/v0001_item_average.csv'
sub_df.to_csv(fn, index=False)

In [None]:
len(sub_df)

60980

In [None]:
!wc -l {fn}

60981 submissions/v0001_item_average.csv


In [None]:
!tail {fn}

FOODS_3_818_WI_3_evaluation,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568,1.2310507056978568
FOODS_3_819_WI_3_evaluation,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837428123,2.7260846837

In [None]:
kaggle_competition='m5-forecasting-accuracy'
!KAGGLE_CONFIG_DIR=.. kaggle competitions submit \
              -f {fn} \
              -m "Known mean" {kaggle_competition}

100%|███████████████████████████████████████| 33.0M/33.0M [00:44<00:00, 779kB/s]
Successfully submitted to M5 Forecasting - Accuracy

In [None]:
!KAGGLE_CONFIG_DIR=.. kaggle competitions submissions {kaggle_competition}

fileName                date                 description            status    publicScore  privateScore  
----------------------  -------------------  ---------------------  --------  -----------  ------------  
v0001_item_average.csv  2021-05-19 02:07:23  All zero cmd line sub  pending   None         None          
v0000_all_zero.csv      2021-05-18 02:54:53  All zero cmd line sub  complete  5.44561      5.39065       
all_zero_v0000.csv      2021-05-18 02:49:33  All zero cmd line sub  error     None         None          
all_zero_v0000.csv      2021-05-18 02:49:27  All zero cmd line sub  error     None         None          
all_zero_v0000.csv      2021-05-18 02:48:15  All zero cmd line sub  error     None         None          
0400-fastai-sample.csv  2020-04-05 00:29:47  None                   complete  5.44561      5.39065       
0200-naive.csv          2020-03-22 23:26:07                         complete  0.83770      5.39065       
0200-naive.csv          2020-03-22 23