In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))

import functions as fcns # custom functions Artem/Chris wrote


In [None]:
run_data = pd.read_csv('data/run_data.csv') 
articles_of_interest = np.load('data/articles_of_interest.npy', allow_pickle=True).tolist()
file_names = np.load('data/file_names.npy', allow_pickle=True).tolist()


In [None]:

seasonality_dat = (
    run_data[['article_number', 'brand', 'year', 'week', 'country', 'season', 'net_qty', 'sports_cat_desc', 
              'rmh_cat_desc', 'gender_desc', 'age_group_desc', 'franchise', 'prod_grp_desc']].
    dropna().
    sort_values(['article_number', 'year', 'week']).
    copy())

seasonality_dat['net_qty2'] = seasonality_dat.groupby(['article_number', 'brand', 'season', 'year', 'week'])['net_qty'].transform(sum)
seasonality_dat = seasonality_dat.drop(['brand', 'country', 'net_qty'], axis = 1).drop_duplicates().rename(columns = {'net_qty2': 'net_qty'})

# -- Reliable, mirror seasons --
seasonality_dat = seasonality_dat[seasonality_dat.season.isin(file_names[6])] 


In [None]:
# ---- Calculate cat-level weekly means across *ALL SEASONS* ---- 
seasonality_sport = fcns.calc_cat_level_means(df = seasonality_dat, cat = 'sports_cat_desc', new_col_name = 'sport_weekly_mean')
seasonality_rmh = fcns.calc_cat_level_means(seasonality_dat, 'rmh_cat_desc', 'rmh_weekly_mean')
seasonality_gndr = fcns.calc_cat_level_means(seasonality_dat, 'gender_desc', 'gender_weekly_mean')
seasonality_agegrp = fcns.calc_cat_level_means(seasonality_dat, 'age_group_desc', 'age_weekly_mean')
seasonality_frnchse = fcns.calc_cat_level_means(seasonality_dat, 'franchise', 'franchise_weekly_mean')
seasonality_prdgrp = fcns.calc_cat_level_means(seasonality_dat, 'prod_grp_desc', 'prd_grp_weekly_mean')


seasonality_dat = seasonality_dat[(seasonality_dat.article_number.isin(articles_of_interest))].sort_values(['article_number', 'year', 'week'])



In [None]:
# regress articles of interest on seasonality
preds = (seasonality_dat.groupby(['article_number']).
         apply(fcns.regress, 
               cat1 = seasonality_sport, cat2 = seasonality_rmh, cat3 = seasonality_gndr,
               cat4 = seasonality_agegrp, cat5 = seasonality_frnchse, cat6 = seasonality_prdgrp).
         reset_index()
        )

In [None]:
# Merge back with net_qty
preds = (pd.merge(preds, seasonality_dat[seasonality_dat.season == file_names[5]], how = 'left').
         sort_values(['article_number', 'year', 'week'])[['article_number', 'year', 'week', 'net_qty', 'seas_preds']]
        )
         


In [None]:
# Non-negative forecast rule
preds['seas_preds'] = np.where(preds.seas_preds > 0, preds.seas_preds, 0) 

In [None]:
preds.to_excel(file_names[1], index=False)

In [None]:
file_names[1]