# Modules, functions

In [4]:
# Modules, functions -- 

import numpy as np
import pandas as pd
from pandas import DataFrame, Series

import multiprocessing

import matplotlib.pyplot as plt
import bokeh
import bokeh.io
from bokeh.plotting import figure
from bokeh.io import output_notebook, show

# init_notebook_mode()

import seaborn as sns

import re
import math
import copy

from collections import defaultdict
import csv
import itertools
import datetime 
from datetime import datetime
import time
import dateutil.parser
import pickle
import random

import gc
import zipfile
import sys, getopt
import os

from IPython.core.interactiveshell import InteractiveShell
from io import StringIO

# import dask.dataframe as dd
#from chest import Chest

InteractiveShell.ast_node_interactivity = "all"
# InteractiveShell.ast_node_interactivity = "last"

# Magic function to make matplotlib inline
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

# import dask.dataframe as ddf
# import dask.array as da

pd.set_option('max_columns', 500)
pd.set_option('max_rows', 1700)

import scipy

import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.tsatools import detrend

import datetime as dt

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
# CHRIS and ARTEM code 

def initialize_parameters(par = np.array([0.5, 0.9, 0, 1, 0])):
    # np.random.seed(3)
    parameters = {}

    parameters['alpha'] = par[0]
    parameters['beta'] = par[1]
    parameters['omega'] = par[2]* (1-par[1])    # one way to choose that is omega/(1-beta) = unconditional mean 
    parameters['sigma'] = par[3]
    parameters['f0'] = par[4]                   # one way to choose is unconditional mean 

    return parameters

def loglik(y, f, x, sigma):
    ll = -1/2*np.log(2*np.pi ) - 1/2*np.log(sigma) - 1/(2*sigma)*(y - x*f)**2 
    return ll

def score_compute(y, f, x, parameters, epsilon = 1e-7 ):
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    omega = parameters['omega']
    sigma = parameters["sigma"]
    f0 = parameters["f0"]
    
    score = (y - x*f)/sigma
    
    return score

def filterGAS(y, x, parameters):
    
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    omega = parameters['omega']
    sigma = parameters["sigma"]
    f0 = parameters["f0"]
    score0 = score_compute(y[0,:],  f0, x[0,:], parameters, epsilon = 1e-7) 
    f = np.zeros((len(y),1))
    
    f[0,:] = f0
    for t in range(1,len(y)):
        scoret = score_compute(y[t-1,:], f[t-1,:], x[t-1,:], parameters, epsilon = 1e-7) 
        f[t,:] = omega + alpha*scoret + beta*f[t-1,:] 

    return f

def loglikest(par, y, x):
    parameters = initialize_parameters(par)
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    sigma = parameters["sigma"]
    # f0 = parameters["f0"]
    
    f = filterGAS(y, x, parameters) 
    ll = np.zeros((len(y), 1))
    m = len(y)

    for t in range(0, len(y)):
         ll[t,:] = loglik(y[t,:], f[t,:], x[t,:], sigma)
    loglik_res = -(np.sum(ll))/m

    return loglik_res

# ----------------------------------------------------------------

def score_compute_2(y, f, x, parameters, epsilon = 1e-7 ):
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    omega = parameters['omega']
    sigma = parameters["sigma"]
    f0 = parameters["f0"]
    
    score = (y - x*f) 
    
    return score

def filterGAS_2(y, x, parameters):
    
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    omega = parameters['omega']
    sigma = parameters["sigma"]
    f0 = parameters["f0"]
    score0 = score_compute_2(y[0,:],  f0, x[0,:], parameters, epsilon = 1e-7) 
    f = np.zeros((len(y),1))
    
    f[0,:] = f0
    for t in range(1,len(y)):
        scoret = score_compute_2(y[t-1,:], f[t-1,:], x[t-1,:], parameters, epsilon = 1e-7) 
        f[t,:] = omega + alpha*scoret + beta*f[t-1,:] 

    return f

def loglikest_2(par, y, x):
    parameters = initialize_parameters(par)
    alpha = parameters["alpha"]
    beta = parameters["beta"]
    sigma = parameters["sigma"]
    # f0 = parameters["f0"]
    
    f = filterGAS_2(y, x, parameters) 
    ll = np.zeros((len(y), 1))
    m = len(y)

    for t in range(0, len(y)):
         ll[t,:] = loglik(y[t,:], f[t,:], x[t,:], sigma)
    loglik_res = -(np.sum(ll))/m

    return loglik_res

# ----------------------------------------------------------------

def GAS_est(df):
    
    y = df.net_qty.values          # observed demand (response)
    x = df.buy_availability.values # buy_availability (explanatory)

    y = y.reshape((len(y),1)) 
    x = x.reshape((len(y),1))
    
    ret = pd.DataFrame()
    ret['year'] = df['year']
    ret['week'] = df['week']
        
    abc = scipy.optimize.minimize(
        loglikest,                                       # function to minimize (log likelihood y|x,theta)
        np.array([0.8, 0.9, np.mean(y), 1, np.mean(y)]), # initial parameter values (starting)
        args=(y, x), 
        options ={'eps':1e-09, 'maxiter': 600, 'ftol': 1e-12},
        method='L-BFGS-B', 
        bounds=((0,  None),             # alpha
                (-1, 1),                # beta
                (0.001, np.mean(y)*2),  # omega 
                (0.001, None),          # sigma
                (0.001, np.mean(y)*2)   # f
               )
    )
    
    
    # --- CONVERGENCE control flow ---
    if abc.success == True:
        
        x1par = initialize_parameters(abc.x) 
        GAS = filterGAS(y, x, x1par)
        
        ret['GAS_est'] = GAS
        ret['Convergence'] = [abc.success] * len(y)
        ret['Convg type'] = ['One'] * len(y)
        
    # **Modification if first algorithm fails
    elif abc.success == False:
        
        abc = scipy.optimize.minimize(
            loglikest_2,                                       # function to minimize (log likelihood y|x,theta)
            np.array([0.8, 0.9, np.mean(y), 1, np.mean(y)]),   # initial parameter values (starting)
            args=(y, x), 
            options ={'eps':1e-09, 'maxiter': 600, 'ftol': 1e-12},
            method='L-BFGS-B', 
            bounds=((0,  None),             # alpha
                    (-1, 1),                # beta
                    (0.001, np.mean(y)*2),  # omega 
                    (0.001, None),          # sigma
                    (0.001, np.mean(y)*2)   # f
                   )
                )

        x1par = initialize_parameters(abc.x) 
        GAS = filterGAS_2(y, x, x1par)
        
        ret['GAS_est'] = GAS
        ret['Convergence'] = [abc.success] * len(y)
        ret['Convg type'] = ['Two'] * len(y)

    return ret

# Data

In [11]:
# Season zero articles

SSYY = 'FW19' # Season zero: SeasonSeasonYearYear

In [None]:
buyer_table = pd.read_csv('data/EU_seasons.csv', low_memory = False, error_bad_lines = False, sep = ",").sort_values(['article_number', 'brand', 'season']) # 26 Aug
buyer_table = buyer_table[(buyer_table.season == SSYY)]
# S0 = buyer_table[buyer_table.season == SSYY].article_number # Season zero articles

In [None]:
# Forecasts -- 
SS20 = (pd.read_excel('data/DTC_Range_SS20.xlsx', sheet_name='Range', skiprows=5, 
    usecols=['Article Number (6 digits)', 'Carry Forward', 'eCom Market FC', 'Ecom Ranged RMA (PIM)'])
        .rename(columns = {'Article Number (6 digits)': 'article_number'}))


In [None]:
# adi + reebok + both -- 

# ----- adidas ------ 

# rma1_adi = pd.read_csv('data/article_range_rma1_adidas_fw20.csv', low_memory = False, error_bad_lines = False, sep = ",", usecols = ['Article Number', 'Market Retail Price', 'WE eCom', 'eCom Range']) # (13618, 4)

rma2_adi = pd.read_excel('data/RMA-02_ Market Range Plan_24102019.xlsx', sheet_name='BSO', skiprows=2) # adidas RMA2 -- 21 October

rma2_adi = rma2_adi[['Article Number', 'WE eCom 06.11.2019', 'Market Retail Price']]

rma2_adi = rma2_adi[rma2_adi['WE eCom 06.11.2019'] != 0]


# ----- rbk ------ 

# rma1_rbk = pd.read_csv('data/article_range_rma1_reebok_fw20.csv', low_memory = False, error_bad_lines = False, sep = ",", usecols = ['Article_number', 'Total Marketing Forecast', '  Total Net Sales  ', '  Total Inline Forecast  ']) # rma1_rbk.columns[40:]

rma2_rbk = pd.read_excel('data/eCom Range Download_Reebok_FW20.xlsx') # Reebok RMA2 - 21 October
rma2_rbk = rma2_rbk[['Article Number', 'RBK WE eCom', 'Hub Retail Price']]

clssc_rbk = pd.read_csv('data/reebok_classics.csv') # Reebok RMA2 - 21 October

addtl_classics = (
    set(clssc_rbk.article_number).
    symmetric_difference(rma2_rbk['Article Number']).
    intersection(clssc_rbk.article_number)
                 )

# both -- 

carryovers = (set(buyer_table.article_number).
              intersection(set(rma2_adi['Article Number'])).
              union(set(buyer_table.article_number).
                    intersection(set(rma2_rbk['Article Number'])))
             )
print('Carryovers: ', len(carryovers))
print()

print('adi carryovers: ', len(set(buyer_table.article_number).intersection(set(rma2_adi['Article Number']))))
print('rbk carryovers: ', len(set(buyer_table.article_number).intersection(set(rma2_rbk['Article Number']))))


In [6]:
dat0 = pd.read_csv('data/ch4k.csv')
ref_dat0 = pd.read_csv('data/Article reference data.csv', low_memory = False, error_bad_lines = False, 
                       usecols = ['article_no', 'model_no', 'art_desc', 'sports_cat_desc', 'rmh_cat_desc', 
                                  'franchise', 'gender_desc', 'age_group_desc', 'prod_grp_desc', 'prod_type_desc',
                                  'brand_desc', 'bus_unit_desc', 'rmh_cat_desc'])

# dat0 = dat0[dat0.season.isin(['SS17', 'SS18', 'SS19'])]
dat0 = dat0[dat0.season.isin(['FW17', 'FW18', 'FW19', 'FW20'])]

# Remove clearance transactions
# dat0['clearance'] = dat0.clearance.fillna(0) 
# dat0['net_qty'] = (1 - dat0.clearance)*dat0.net_qty



In [None]:
# --- INTEGRATE THIS --- to replace duplicated shenanigans below

# stealths -- 

# stealth = pd.read_csv('data/stealth_carryovers_FW2020.csv', low_memory = False, error_bad_lines = False, sep = ";")

# # stealth2 = pd.read_csv('data/stealth_carryovers_eu_RMA2.csv').rename(columns = {'article1': 'article', 'article2': 'stealth_article'})
# # stealth3 = pd.concat([stealth, stealth2])

# FW19_range = buyer_table.article_number.unique()
# FW20_range = set(rma2_adi['Article Number']).union(set(rma2_rbk['Article Number']))

# # New forecasts: IN FW20 --- NOT IN FW19
# additions = stealth[(stealth.article.isin(FW20_range)) & (~(stealth.article.isin(carryovers)))] 

# find_me = additions.stealth_article

In [8]:
rbk = pd.read_excel('data/rbk_1920.xlsx')

In [9]:
articles_of_interest = rbk.article_number_FW19.unique()

# GAS

In [12]:
dat_GAS = dat0.copy()[['article_number', 'year', 'week', 'country', 'season', 'net_qty', 'buy_availability', 'brand']] 

# dat_GAS = dat_GAS[(dat_GAS.season == SSYY) & (dat_GAS.article_number.isin(carryovers))].sort_values(['article_number', 'country', 'year', 'week'])

# Only SSS19 articles **ALSO IN** SS20 range
dat_GAS = (dat_GAS[(dat_GAS.season == SSYY) & (dat_GAS.article_number.isin(articles_of_interest))].
           sort_values(['article_number', 'brand', 'country', 'year', 'week']))


In [13]:
dat_GAS = dat_GAS.groupby(['article_number', 'brand', 'country']).apply(GAS_est).reset_index() 

  app.launch_new_instance()
  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]


In [14]:

dat_GAS = pd.DataFrame(dat_GAS.groupby(['article_number', 'year', 'week'])['GAS_est'].sum()).reset_index()


In [None]:
# With/out clearance -- 

# With clearance --
# dat_GAS0 = pd.DataFrame(dat_GAS.groupby('article_number')['GAS_est'].sum().round())
# dat_GAS0.head()

# Without clearance --
# dat_GAS1 = pd.DataFrame(dat_GAS.groupby('article_number')['GAS_est'].sum().round())
# dat_GAS1.head()

# dat_GAS_both = pd.merge(
#     dat_GAS0,
#     dat_GAS1,
#     left_index = True, right_index = True
# )

# dat_GAS_both['diff'] = dat_GAS0['GAS_est'] - dat_GAS1['GAS_est']

# dat_GAS_both['diff'].hist()
# dat_GAS_both['diff'].describe().round()


# dat_GAS_both['lt0'] = (dat_GAS_both['diff'] <= 0)*1

# dat_GAS_both['lt0'].mean().round(2)

# Seasonality

In [15]:
def calc_cat_level_means(df, cat, new_col_name):
    return pd.DataFrame(df.groupby([cat, 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': new_col_name})
    

In [16]:
# Create df, wrangle, calculate category means -- 

seasonality_dat = (
    dat0[['article_number', 'brand', 'year', 'week', 'country', 
          'season', 'net_qty', 'sports_cat_desc', 'rmh_cat_desc', 
          'gender_desc', 'age_group_desc', 'franchise', 'prod_grp_desc']].
    dropna().
    sort_values(['article_number', 'year', 'week']).
    copy())

seasonality_dat['net_qty2'] = seasonality_dat.groupby(['article_number', 'brand', 'season', 'year', 'week'])['net_qty'].transform(sum)
seasonality_dat = seasonality_dat.drop(['brand', 'country', 'net_qty'], axis = 1).drop_duplicates().rename(columns = {'net_qty2': 'net_qty'})

# -- Reliable, mirror seasons --
seasonality_dat = seasonality_dat[~seasonality_dat.season.isin(['FW14', 'FW15', 'FW16', 'SS14','SS15', 'SS16'])] # Exclude these seasons

In [19]:
a = calc_cat_level_means(df = seasonality_dat, cat = 'sports_cat_desc', new_col_name = 'sport_weekly_mean')
b = pd.DataFrame(seasonality_dat.groupby(['sports_cat_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'sport_weekly_mean'})

a.equals(b)


# HUZZAH!! IMPLEMENT!!

True

In [None]:
# ---- Calculate cat-level weekly means across *ALL SEASONS* ---- 
seasonality_sport   = pd.DataFrame(seasonality_dat.groupby(['sports_cat_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'sport_weekly_mean'})
seasonality_rmh     = pd.DataFrame(seasonality_dat.groupby(['rmh_cat_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'rmh_weekly_mean'})
seasonality_gndr    = pd.DataFrame(seasonality_dat.groupby(['gender_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'gender_weekly_mean'})
seasonality_agegrp  = pd.DataFrame(seasonality_dat.groupby(['age_group_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'age_weekly_mean'})
seasonality_frnchse = pd.DataFrame(seasonality_dat.groupby(['franchise', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'franchise_weekly_mean'})
seasonality_prdgrp  = pd.DataFrame(seasonality_dat.groupby(['prod_grp_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'prd_grp_weekly_mean'})

seasonality_dfs = [seasonality_sport, seasonality_rmh, seasonality_gndr, seasonality_agegrp, seasonality_frnchse, seasonality_prdgrp]

# seasonality_dat = seasonality_dat[(seasonality_dat.article_number.isin(carryovers))].sort_values(['article_number', 'year', 'week'])
seasonality_dat = seasonality_dat[(seasonality_dat.article_number.isin(articles_of_interest))].sort_values(['article_number', 'year', 'week'])

In [None]:
# Function: regress one against many -- 

def regress(df):
    # function for regressing article net_qty on seasonalities
    # for article a's level of each category, retreive weekly means, then regress 
    
    df = df.sort_values(['article_number', 'year', 'week'])
        
    # article net_demand_qty
    y = df[['net_qty', 'week']].set_index('week')

    # article category-level weekly means; set_index() for joining
    x_sport   = seasonality_sport[seasonality_sport.sports_cat_desc == df.sports_cat_desc.unique()[0]].set_index('week')
    x_rmh     = seasonality_rmh[seasonality_rmh.rmh_cat_desc == df.rmh_cat_desc.unique()[0]].set_index('week')
    x_gndr    = seasonality_gndr[seasonality_gndr.gender_desc == df.gender_desc.unique()[0]].set_index('week')
    x_agegrp  = seasonality_agegrp[seasonality_agegrp.age_group_desc == df.age_group_desc.unique()[0]].set_index('week')
    x_frnchse = seasonality_frnchse[seasonality_frnchse.franchise == df.franchise.unique()[0]].set_index('week')
    x_prdgrp  = seasonality_prdgrp[seasonality_prdgrp.prod_grp_desc == df.prod_grp_desc.unique()[0]].set_index('week')

    # design matrix (ensure 'week' alignment)
    yX = (pd.merge(y, x_sport, left_index=True, right_index=True, how = 'outer').
          merge(x_rmh, left_index=True, right_index=True, how = 'outer').
          merge(x_gndr, left_index=True, right_index=True, how = 'outer').
          merge(x_agegrp, left_index=True, right_index=True, how = 'outer').
          merge(x_frnchse, left_index=True, right_index=True, how = 'outer').
          merge(x_prdgrp, left_index=True, right_index=True, how = 'outer').
          drop(['sports_cat_desc', 'rmh_cat_desc', 'gender_desc',
               'age_group_desc', 'franchise', 'prod_grp_desc'], axis = 1))

    # predict article 'a' net_demand_qty with 5 article 'a' category-level seasonalities

    y = yX.net_qty
    
    X = yX.drop('net_qty', axis = 1)
    # X = sm.add_constant(X) # ****** ******* *******
    
    mod = sm.OLS(y, X, missing='drop').fit()
    # print(df.article_number.unique(), round(mod.rsquared, 2))

    ret = pd.DataFrame(index = X.index)
    ret['seas_preds'] = mod.predict(X).round()
    
    ret = ret.reset_index()
    ret = pd.DataFrame(ret.groupby('week')['seas_preds'].mean())

    return ret

In [None]:
# regress articles of interest on seasonality
preds = seasonality_dat.groupby(['article_number']).apply(regress).reset_index()

In [None]:
# Merge back with net_qty
preds = (pd.merge(preds, seasonality_dat[seasonality_dat.season == SSYY], how = 'left').
         sort_values(['article_number', 'year', 'week'])[['article_number', 'year', 'week', 'net_qty', 'seas_preds']]
        )
         


In [None]:
# Zero out negative preds
preds['seas_preds'] = np.where(preds.seas_preds > 0, preds.seas_preds, 0) 

# Combine: preds = All weeks --AND-- dat_GAS = observed weeks
preds = pd.merge(preds, dat_GAS.round(), how = 'left', on =['article_number', 'year', 'week'])

# preds = preds.astype({'year': 'int', 'net_qty': 'int', 'seas_preds': 'int', 'GAS_est': 'int'})

# Aggregate to Season

In [None]:
# eCom forecasts -- 

# rma2_rbk = rma2_rbk.rename(columns = {'RBK WE eCom': 'WE eCom'})
# rma2_adi = rma2_adi.rename(columns = {'WE eCom 06.11.2019': 'WE eCom'})
# rma2 = pd.concat([rma2_adi[['Article Number', 'WE eCom']], rma2_rbk[['Article Number', 'WE eCom']]])


In [None]:
# weekly assignment of y_hat = f(observed, GAS, seasonality)

preds['y_hat'] = np.where(np.isnan(preds.GAS_est), preds.seas_preds, (preds.GAS_est + preds.seas_preds)/2) # Evgeniy step

preds['y_hat'] = np.where(preds.y_hat > preds.net_qty, preds.y_hat, preds.net_qty) # Risk management step (judgment call)



In [None]:
rma2_rbk = rma2_rbk.rename(columns = {'Article Number': 'article_number', 'RBK WE eCom': 'eCom Market FC'})


In [None]:
# Sum over season
preds_season = pd.DataFrame(preds.groupby('article_number')['y_hat', 'net_qty'].apply(sum).round())

# Growth
preds_season['y_hat'] = preds_season.y_hat * 1.1 # default growth rate

# Combine DAA + eCom
# preds_season = pd.merge(preds_season, SS20[['article_number', 'eCom Market FC']], how = 'left', on = 'article_number').round()
preds_season = pd.merge(preds_season, rma2_rbk[['article_number', 'eCom Market FC']], how = 'left', on = 'article_number').round()


# preds_season = pd.merge(preds_season.drop('WE eCom', axis = 1), rma2, how = 'left', left_on='article_number', right_on='Article Number').drop('Article Number', axis = 1).round()



In [None]:
# Add column with # of observed weeks
# preds_season = (
#     pd.merge(
#         preds_season, 
#         pd.DataFrame(preds[(~preds.net_qty.isna()) & (preds.net_qty > 0)].article_number.value_counts()).rename(columns = {'article_number': 'week_count'}), 
#         how = 'left', left_on='article_number', right_index = True
#     )

# # Retain articles with at least 4 observed weeks
# preds_season = preds_season[(preds_season.week_count > 3) & (preds_season.net_qty > 40)]

In [None]:
# Magnifying glass -- 
aoi = 'G27706'
a = aoi

#dat0[dat0.article_number == aoi].sort_values(['country', 'year', 'week'])#.iloc[1,]


preds_season[preds_season.article_number == a]

dat_a = preds[preds.article_number == a]
dat_a.sort_values(['year', 'week']).set_index(['year', 'week']).drop('article_number', axis = 1).apply(np.sum)

dat_a[~dat_a.net_qty.isna()]

plt.rcParams["figure.figsize"] = [10,7]
dat_a.sort_values(['year', 'week']).set_index(['year', 'week']).plot(linewidth = 3)

dat_a.sort_values(['year', 'week']).set_index(['year', 'week'])

# dat0[dat0.article_number == aoi].sort_values(['country', 'year', 'week'])#.iloc[1,]

# plot -- 
dat_aoi = dat0[dat0.article_number == aoi].copy()
dat_aoi = pd.merge(pd.DataFrame(dat_aoi.groupby(['year', 'week'])['net_qty'].sum()).reset_index(), dat_aoi[['year', 'week']].drop_duplicates())
dat_aoi.year = [str(x) for x in dat_aoi.year]
dat_aoi.week = [str(x) for x in dat_aoi.week]
dat_aoi['date'] = [dt.datetime.strptime(x[0] + '-' + x[1] + '-1', "%Y-%W-%w") for x in zip(dat_aoi.year, dat_aoi.week)]
plt.rcParams["figure.figsize"] = [10,7]
dat_aoi[['date', 'net_qty']].set_index('date').plot(linewidth = 4)

# Overbuy

In [None]:
# functions -- 

from functools import partial
from scipy import optimize
from scipy import integrate
import scipy.stats as stats

# Loss --- demand, buy, margin, cost
def L(d, b, margin, cost):
    if d > b:
        return (d - b)*margin
    elif d < b:
        return (b - d)*cost
    elif d == b:
        return 0
    else:
        print('Error')

# E[L | buy, article_mean, article_sd, article_margin, article_cost]
def EL(mu, sigma, margin, cost, b):
    I = lambda x: L(x, b, margin, cost) * stats.norm.pdf(x, mu, sigma) # I for integrand
    Exp_loss = integrate.quad(I, 0, mu + 3*sigma)/(1 - stats.norm.cdf(0, loc = mu, scale = sigma)) # Expected value of Loss function
    return round(Exp_loss[0], 2) 

def minimize_EL(mu, sigma, margin, cost):
    if pd.isna([margin, cost]).sum() > 0:
        print(mu, sigma, margin, cost)
        return 'Error'
    if(mu < 1100): # Judgment call; does not work as designed for low demand forecasts
        return 1.2*mu 
    p = partial(EL, mu, sigma, margin, cost) # Make EL function of only one var: b_0
    buy_opt = optimize.minimize_scalar(p, bounds = (mu, mu + 2*sigma))
    return int(buy_opt['x']) # optimal buy quantity

In [None]:
# Load cost/margin data
cost_margin = dat0.copy()
# cost_margin = cost_margin[cost_margin.season.isin(['SS18', 'FW18', 'SS19', 'FW19'])]

cost_margin = cost_margin[cost_margin.article_number.isin(articles_of_interest)]

cost_margin = pd.DataFrame(cost_margin.groupby('article_number')['price', 'cost', 'margin'].mean().round(2)).dropna() # All but one NA is season_net_qty < 10


In [None]:
preds_season[:2]

In [None]:
# Add cost and margin for optimal overbuy estimation                                                                           
preds_season = pd.merge(preds_season, cost_margin, how = 'left', left_on = 'article_number', right_index=True).round()

In [None]:
preds_season.shape
preds_season.article_number.unique().size

In [None]:
Opt_Ovb = preds_season.apply(lambda row: minimize_EL(row['y_hat'], 550 + 0.2*row['y_hat'], row['margin'], row['cost']), axis=1)


In [None]:
preds_season['Opt_Ovb'] = Opt_Ovb


In [None]:
rbk_stealth[:2]
rbk_stealth.columns

In [None]:
preds_season = pd.merge(preds_season.drop('eCom Market FC', axis = 1), rbk_stealth[['article_number_FW20', 'article_number_FW19']], 
                        left_on = 'article_number', right_on = 'article_number_FW19')

preds_season = (pd.merge(preds_season.drop('article_number_FW19', axis = 1), rma2_rbk, 
                         left_on='article_number_FW20', right_on='article_number').
                drop('article_number_y', axis = 1)
               )

preds_season = preds_season.replace('Error', 'NA')

preds_season = preds_season.drop_duplicates(subset = 'article_number_FW20', keep = 'first')

In [None]:
preds_season = pd.merge(
    pd.DataFrame(preds_season.groupby('article_number_FW20')['y_hat'].mean().round(0)),
    preds_season.drop('y_hat', axis = 1), 
    left_index = True, right_on = 'article_number_FW20'
)

In [None]:
preds_season = preds_season.drop('article_number_x', axis = 1)


In [None]:
preds_season = preds_season.rename(columns = {'article_number_FW20': 'article_number'})

In [None]:
preds_season[:2]

In [None]:
(preds_season['eCom Market FC']*preds_season['Hub Retail Price']).sum()

# Stealth carryovers

In [None]:
carryovers.intersection(['EE6147', 'B22716', 'EE6145', 'EE6146'])
carryovers.intersection(['FW5947', 'FV5946', 'FV5943', 'FV5943'])



In [None]:
# all -- 

stealth = pd.read_csv('data/stealth_carryovers_FW2020.csv', low_memory = False, error_bad_lines = False, sep = ";")

# stealth2 = pd.read_csv('data/stealth_carryovers_eu_RMA2.csv').rename(columns = {'article1': 'article', 'article2': 'stealth_article'})
# stealth3 = pd.concat([stealth, stealth2])

FW19_range = buyer_table.article_number.unique()
FW20_range = set(rma2_adi['Article Number']).union(set(rma2_rbk['Article Number']))

# New forecasts: IN FW20 --- NOT IN FW19
additions = stealth[(stealth.article.isin(FW20_range)) & (~(stealth.article.isin(carryovers)))] 

find_me = additions.stealth_article

In [None]:
dat_stealth = dat0[(dat0.article_number.isin(['BB9103', 'BB9104'])) & (dat0.season.isin(['FW18', 'FW17', 'FW16', 'FW15']))].copy()

dat_stealth = dat_stealth[['article_number', 'year', 'week', 'country', 'season', 'net_qty', 'buy_availability']].sort_values(['article_number', 'country', 'year', 'week'])

# GAS step

dat_GAS_stealth = dat_stealth.groupby(['article_number', 'country']).apply(GAS_est).reset_index()
dat_GAS_stealth = pd.DataFrame(dat_GAS_stealth.groupby(['article_number', 'year', 'week'])['GAS_est'].sum()).reset_index()

# SEASONALITY step
seasonality_dat_stealth = (dat0[dat0.article_number.isin(['BB9103', 'BB9104'])].
    copy()[['article_number', 'year', 'week', 'country', 'season', 'net_qty', 
            'sports_cat_desc', 'rmh_cat_desc', 'gender_desc', 
            'age_group_desc', 'franchise', 'prod_grp_desc']].
    dropna().sort_values(['article_number', 'year', 'week'])
                  )

# -- Sum over UK/EU, ADD article reference data --
seasonality_dat_stealth = pd.merge(
    pd.DataFrame(seasonality_dat_stealth.groupby(['article_number', 'season', 'year', 'week'])['net_qty'].sum()).reset_index(), # sum over UK & EU
    seasonality_dat_stealth[['article_number', 'sports_cat_desc', 'rmh_cat_desc', 'gender_desc', 'age_group_desc', 'franchise', 'prod_grp_desc']].drop_duplicates() # add reference information
    ).dropna().sort_values(['article_number', 'year', 'week'])

seasonality_dat_stealth = seasonality_dat_stealth[seasonality_dat_stealth.season == 'FW18']

preds_stealth = seasonality_dat_stealth.groupby(['article_number']).apply(regress).reset_index()

# Merge back with reference data
preds_stealth = pd.merge(
    preds_stealth,
    seasonality_dat_stealth[seasonality_dat_stealth.season == 'FW18'],
    how = 'left').sort_values(['article_number', 'year', 'week'])[['article_number', 'year', 'week', 'net_qty', 'seas_preds']] # .fillna(method='ffill')


# Zero out negative preds
preds_stealth['seas_preds'] = np.where(preds_stealth.seas_preds > 0, preds_stealth.seas_preds, 0) 



# Combined observed weeks (partial season)  --- AND --- regression predicted (all) weeks
preds_stealth = pd.merge(
    preds_stealth,   # all weeks
    dat_GAS_stealth, # observed weeks
    how = 'left')



# weekly assignment of GAS, seasonality, or combination
preds_stealth['y_hat'] = np.where(np.isnan(preds_stealth.GAS_est), preds_stealth.seas_preds, (preds_stealth.GAS_est + preds_stealth.seas_preds)/2)



# Sum over season
preds_season_stealth = pd.DataFrame(preds_stealth.groupby('article_number')['y_hat'].apply(sum).round())



# Growth
preds_season_stealth['y_hat'] = preds_season_stealth.y_hat # * 1.1 # default growth rate

# Match stealth to its carryover
preds_season_stealth = pd.merge(
    preds_season_stealth,
    additions,
    how = 'left', left_index = True, right_on = 'stealth_article'
).drop('stealth_article', axis = 1).rename(columns = {'article': 'article_number'}).set_index('article_number')

# Combine DAA + eCom
preds_season_stealth = pd.merge(
    preds_season_stealth.reset_index(),  # DAA forecasts
    rma2,                                # eCom RMA2 forecast
    how = 'left', left_on='article_number', right_on='Article Number'
).drop('Article Number', axis = 1).round()

# add price/cost for optimal overbuy
preds_season_stealth = pd.merge(preds_season_stealth, cost_margin, how = 'left', left_on = 'article_number', right_index=True).round()



# see evaluation.ipynb for sd estimation 

opt_ovb_stealth = pd.DataFrame(preds_season_stealth.
                           apply(lambda row: minimize_EL(row['y_hat'], 550 + 0.2*row['y_hat'], row['margin'], row['cost']), axis=1)
                          )

opt_ovb_stealth = opt_ovb_stealth.rename(columns = {opt_ovb_stealth.columns[0]: 'Opt_Ovb'})

# Combine with data
preds_season_stealth = pd.merge(
    preds_season_stealth, # everything
    opt_ovb_stealth,      # optimal overbuy
    right_index= True, left_index= True
)

# IMPACT
# preds_season_stealth['impact'] = preds_season_stealth.apply(lambda row: np.abs(row['y_hat'] - row['WE eCom'])*(row['cost'] + row['margin']), axis = 1).round()                                                
# preds_season_stealth = preds_season_stealth.sort_values('impact', ascending = False).round()
             
# Reorder for concatenating
preds_season_stealth = preds_season_stealth[['article_number', 'y_hat', 'WE eCom', 'price', 'cost', 'margin', 'Opt_Ovb']]
                                            
                                            
                                            

In [None]:
# Combine stealth with the rest

preds_season = pd.concat([preds_season, preds_season_stealth], sort=True)[['article_number', 'y_hat', 'WE eCom', 'price', 'cost', 'margin', 'Opt_Ovb']]


# Saving Checkpoint

In [None]:
# preds_season.to_excel('data/SS20_forecasts_inclusive.xlsx')

In [None]:
dat = pd.read_excel('data/SS20_forecasts_all.xlsx')

dat = dat[dat.net_forecast > dat.eCom_FC]

dat = dat.set_index('article_number')

dat.to_excel('data/SS20_forecasts_6Dec2019.xlsx')

In [None]:
# CLEARANCE y/n -- 

# INCLUDED --  

# preds.to_excel('data/preds_all.xlsx')
# preds_season.to_excel('data/preds_season_all.xlsx')


# NOT included --

# preds.to_excel('data/preds.xlsx')
preds.head()

# preds_season.to_excel('data/preds_season.xlsx')
preds_season.head()


In [None]:
# --- READ IN non/all DATA --- 

# preds_all = pd.read_excel('data/preds_all.xlsx').drop('Unnamed: 0', axis = 1)
# preds_season_all = pd.read_excel('data/preds_season_all.xlsx').drop('Unnamed: 0', axis = 1)

# preds = pd.read_excel('data/preds.xlsx').drop('Unnamed: 0', axis = 1)
# preds_season = pd.read_excel('data/preds_season.xlsx').drop('Unnamed: 0', axis = 1)

# Columns (add, rename, AA), Rounding

In [None]:
preds_season.head()

In [None]:
preds_season['pct_difference'] = ((preds_season.y_hat - preds_season['eCom Market FC'])/preds_season['eCom Market FC'] * 100).round(2)


In [None]:
# April Allen requested columns 

# aa_cols = pd.read_csv('data/columns_22october.csv', ...)
# aa_cols = pd.read_csv('data/columns_11November.csv', ...)

aa_cols = pd.read_csv('data/columns_21November.csv', low_memory = False, error_bad_lines = False, sep = ";")
aa_cols.at[0, 'FW19_total_market_FC'] = 400

# Add leading zero to short article numbers
for i in aa_cols.index:
    if len(aa_cols.iloc[i]['article_number']) == 5:
        aa_cols.at[i, 'article_number'] = '0' + aa_cols.iloc[i]['article_number']
        
aa_cols = aa_cols.drop_duplicates(subset = 'article_number') # One duplicate

preds_season = pd.merge(preds_season, aa_cols, how = 'left', left_on = 'article_number', right_on = 'article_number')

preds_season['FW19_total_ecom_SO'] = [np.float(str(x).replace('.', '')) for x in preds_season.FW19_total_ecom_SO] 

In [None]:
ref_dat0.columns

In [None]:
preds_season = pd.merge(preds_season, ref_dat0[['article_no', 'model_no', 'art_desc', 'brand_desc',
       'bus_unit_desc', 'rmh_cat_desc', 'prod_grp_desc', 'prod_type_desc',
       'gender_desc', 'age_group_desc']], left_on='article_number', right_on='article_no', how = 'left').drop('article_no', axis = 1)


In [None]:
preds_season = preds_season.rename(columns = {'y_hat': 'net_forecast', 'Opt_Ovb': 'buy_recommendation', 'art_desc': 'description', 'eCom Market FC': 'eCom_FC', 
                                              'prod_type_desc': 'type', 'brand_desc': 'brand', 'bus_unit_desc': 'BU', 'rmh_cat_desc': 'RMH'})

In [None]:
preds_season = preds_season[['article_number',  'brand', 'model_no', 'description', 'type', 'BU', 'RMH', 
                             'price', 'cost', 'margin', 'net_forecast', 'buy_recommendation', 'eCom_FC']] # 'eCom_ILS1', 
                             #'FW19_total_market_FC', 'FW19_total_ecom_SO', 'FW19_total_ecom_RDP']] # space after RDP?

In [None]:
preds_season = preds_season.set_index('article_number')


In [None]:
# Convert to integers to remove '.0' endings
for c in preds_season.columns:
    if type(preds_season[c][1]) == np.float64:
        preds_season.loc[:, c] = preds_season[c].fillna(0).replace(np.inf, 0).round().astype(int)

In [None]:
preds_season = preds_season.replace('Error', 0)

In [None]:
preds_season = preds_season[['brand', 'model_no', 'description', 'price',
       'cost', 'margin', 'net_forecast', 'buy_recommendation', 'eCom_FC']]

In [None]:
preds_season

In [None]:
# Managing 'inclusive' irregularities -- 

# preds_season.loc[:, 'impact'] = preds_season.apply(lambda row: np.abs(row['net_forecast'] - row['eCom_ILS1'])*(row['cost'] + row['margin']), axis = 1).round()                                                
             
# preds_season_all = pd.read_excel('data/preds_season_all.xlsx').drop('Unnamed: 0', axis = 1).rename(columns = {'y_hat': 'net_forecast_incl_clearance', 'Opt_Ovb': 'buy_rec_incl_clearance'})


# preds_season.shape

# preds_season_both = pd.merge(preds_season, preds_season_all[['article_number', 'net_forecast_incl_clearance', 'buy_rec_incl_clearance']], how = 'left')

# preds_season_both = preds_season_both[['article_number', 'brand', 'model_no', 'description', 'type', 'BU', 'RMH', 'price', 'cost', 'margin', 
#                                        'net_forecast', 'buy_recommendation', 'net_forecast_incl_clearance', 'buy_rec_incl_clearance', 'eCom_ILS1', # 'pct_difference',
#                                        'FW19_total_market_FC', 'FW19_total_ecom_SO', 'FW19_total_ecom_RDP']].set_index('article_number')

# preds_season_both['buy_rec_incl_clearance'] = np.where(preds_season_both['buy_rec_incl_clearance'] > preds_season_both['buy_recommendation'], preds_season_both['buy_rec_incl_clearance'], preds_season_both['buy_recommendation'])
# preds_season_both['net_forecast_incl_clearance'] = np.where(preds_season_both['net_forecast_incl_clearance'] > preds_season_both['net_forecast'], preds_season_both['net_forecast_incl_clearance'], preds_season_both['net_forecast'])

# # Convert to integers to remove '.0' endings
# for c in preds_season_both.columns:
#     if type(preds_season_both[c][1]) == np.float64:
#         preds_season_both.loc[:, c] = preds_season_both[c].fillna(0).replace(np.inf, 0).astype(int)


# preds_season_both = preds_season_both[~preds_season_both.index.isin(unreliable)]



# Load including-clearance forecasts

In [None]:
# preds_season.to_excel('FW20_rbk_model_stealths_incl_cl.xlsx')

In [None]:
# preds_all = pd.read_excel('data/preds_all.xlsx').drop('Unnamed: 0', axis = 1)
# preds_season_all = pd.read_excel('data/preds_season_all.xlsx').drop('Unnamed: 0', axis = 1)

# preds = pd.read_excel('data/preds.xlsx').drop('Unnamed: 0', axis = 1)
# preds_season = pd.read_excel('data/preds_season.xlsx').drop('Unnamed: 0', axis = 1)

preds_season_both = pd.read_excel('FW20_forecasts_incl_cl.xlsx')#.drop('Unnamed: 0', axis = 1)

In [None]:
preds_season_both = preds_season_both.rename(columns = {'eCom_ILS1': 'eCom_21Nov'}).set_index('article_number')

In [None]:
# preds_season_both['net_forecast_diff'] = preds_season_both['net_forecast_incl_clearance'] - preds_season_both['net_forecast']
# preds_season_both.sort_values('net_forecast_diff', ascending = False)


# adidas stealth eda 

In [None]:
preds_season_both = pd.read_excel('FW20_forecasts_incl_cl.xlsx')


In [None]:
ref_dat0 = pd.read_csv('data/article_reference_data.csv', low_memory = False, error_bad_lines = False,
                       usecols = ['article_no', 'model_no', 'art_desc', 'brand_desc', 'colorway_long_descr',
                                  'primary_color', 'secondary_color', 'tertiary_color', 'quarternary_color'])

In [None]:
ref_dat0.head()


# Magnifying Glass

In [None]:
# unreliable forecasts -- 

unreliable = list(['G26523', 'EJ9682', 'EE9391', 'EE8947', 'EE8943', 'BS0980', 'EE4727']) 

# preds_season = preds_season[~preds_season.article_number.isin(unreliable)]
# preds_season = preds_season[~preds_season.index.isin(unreliable)]


In [None]:
# Compare DAA & eCom -- 
preds_season.loc[:, 'diff'] = preds_season['net_forecast'] - preds_season['eCom_FC']

preds_season.sort_values('diff', ascending = False)[['net_forecast', 'buy_recommendation', 'eCom_FC',
       'diff', 'brand', 'model_no', 'description', 'type', 'BU', 'RMH']]



In [None]:
# Magnifying glass - article 

aoi = 'S21489'
a = aoi

In [None]:
# Magnifying class -- 

preds_season[preds_season.article_number == a][['article_number', 'net_forecast', 'buy_recommendation', 
        'eCom_FC', 'diff', 'brand', 'model_no', 'description', 'type', 'BU', 'RMH']]

dat_a = preds[preds.article_number == a][['week', 'net_qty', 'corrected', 'y_hat']].round()


plt.rcParams["figure.figsize"] = [10,7]
dat_a.sort_values('week').set_index('week').plot(linewidth = 3)

dat_a.set_index('week').apply(np.sum).round()
dat_a.sort_values('week').set_index('week').round()

# dat0[dat0.article_number == aoi].sort_values(['country', 'year', 'week'])#.iloc[1,]

# plot -- 
dat_aoi = dat0[dat0.article_number == aoi].copy()
dat_aoi = pd.merge(pd.DataFrame(dat_aoi.groupby(['year', 'week'])['net_qty'].sum()).reset_index(), dat_aoi[['year', 'week']].drop_duplicates())
dat_aoi.year = [str(x) for x in dat_aoi.year]
dat_aoi.week = [str(x) for x in dat_aoi.week]
dat_aoi['date'] = [dt.datetime.strptime(x[0] + '-' + x[1] + '-1', "%Y-%W-%w") for x in zip(dat_aoi.year, dat_aoi.week)]
plt.rcParams["figure.figsize"] = [10,7]
dat_aoi[['date', 'net_qty']].set_index('date').plot(linewidth = 4)

In [None]:
# ---- Comments ---- 

if preds_season.index.name != 'article_number':
    preds_season = preds_season.set_index('article_number')

preds_season['notes'] = '-'
preds_season.loc['EF0371', 'notes'] = 'Short FW19; our forecast assumes full FW20, with performance on par with observed FW19'
preds_season.loc['EE7570', 'notes'] = 'Short FW19, so not a lot of data to work with. Our forecast assumes full FW20, with performance on par with observed FW19'
preds_season.loc['F36641', 'notes'] = 'Forecast assumes FULL availability throughout FULL FW20'
preds_season.loc['EE6999', 'notes'] = 'late drop, our forecast assumes a full FW20'
preds_season.loc['CG6193', 'notes'] = 'Seasonality component of model is predicting strong end to FW19'
preds_season.loc['EE6464', 'notes'] = 'Late drop in FW19, but our forecast assumes full FW20.'
preds_season.loc['EE7773', 'notes'] = 'Shortened FW19, but forecast assumes full FW20.'
preds_season.loc['EE7775', 'notes'] = 'Short FW19, but our forecast assumes full FW20'

# preds_season.loc['DV0152', 'notes'] = 'All FW19 transactions are clearance.'
# preds_season.loc['DV0169', 'notes'] = 'All FW19 transactions are clearance.'
# preds_season.loc['DV2848', 'notes'] = 'Lots of clearance transactions (out of scope) pushed our numbers down'

# preds_season.loc['x', 'notes'] =
# preds_season.loc['x', 'notes'] =

In [None]:
# ---- Comments ---- 

if preds_season_both.index.name != 'article_number':
    preds_season_both = preds_season_both.set_index('article_number')

preds_season_both['notes'] = '-'
preds_season_both.loc['EF0371', 'notes'] = 'Short FW19; our forecast assumes full FW20, with performance on par with observed FW19'
preds_season_both.loc['EE7570', 'notes'] = 'Short FW19, so not a lot of data to work with. Our forecast assumes full FW20, with performance on par with observed FW19'
preds_season_both.loc['F36641', 'notes'] = 'Forecast assumes FULL availability throughout FULL FW20'
preds_season_both.loc['EE6999', 'notes'] = 'late drop, our forecast assumes a full FW20'
preds_season_both.loc['CG6193', 'notes'] = 'Seasonality component of model is predicting strong end to FW19'
preds_season_both.loc['EE6464', 'notes'] = 'Late drop in FW19, but our forecast assumes full FW20.'
preds_season_both.loc['EE7773', 'notes'] = 'Shortened FW19, but forecast assumes full FW20.'
preds_season_both.loc['EE7775', 'notes'] = 'Short FW19, but our forecast assumes full FW20'

In [None]:
# Save -- 

# preds_season.to_excel("FW20_forecasts.xlsx")
# preds_season_both.to_excel('FW20_forecasts_incl_cl.xlsx')


# Non-clearance vs. All

## Investigate w/ one article focus

In [None]:
dat0 = pd.read_csv('data/ch4k.csv')
ref_dat0 = pd.read_csv('data/Article reference data.csv', low_memory = False, error_bad_lines = False, 
                       usecols = ['article_no', 'model_no', 'art_desc', 'sports_cat_desc', 'rmh_cat_desc', 
                                  'franchise', 'gender_desc', 'age_group_desc', 'prod_grp_desc', 'prod_type_desc',
                                  'brand_desc', 'bus_unit_desc', 'rmh_cat_desc'])

# All transactions
dat_all = dat0.copy()

# Non-clearance transactions

# dat = dat0.copy()
# dat['clearance'] = dat.clearance.fillna(0) 
# dat['net_qty'] = (1 - dat.clearance)*dat.net_qty

In [None]:
aoi = 'EE7570'

#dat_eda = dat[(dat.article_number.isin([aoi])) & (dat.season.isin(['FW19', 'FW18', 'FW17', 'FW16', 'FW15']))].copy()
dat_eda = dat_all[(dat_all.article_number.isin([aoi])) & (dat_all.season.isin(['FW19', 'FW18', 'FW17', 'FW16', 'FW15']))].copy()

dat_eda = dat_eda[['article_number', 'year', 'week', 'country', 'season', 'net_qty', 'buy_availability']].sort_values(['article_number', 'country', 'year', 'week'])

# GAS step

dat_GAS_eda = dat_eda.groupby(['article_number', 'country']).apply(GAS_est).reset_index()
dat_GAS_eda = pd.DataFrame(dat_GAS_eda.groupby(['article_number', 'year', 'week'])['GAS_est'].sum()).reset_index()

# both = pd.read_excel('data/both.xlsx')
# both['diff'] = both.apply(lambda row: row['net_forecast_y'] - row['net_forecast_x'], axis = 1)


In [None]:
# SEASONALITY step

seasonality_dat_eda = (dat_all[['article_number', 'year', 'week', 'country', 'season', 'net_qty', 
                                'sports_cat_desc', 'rmh_cat_desc', 'gender_desc', 
                                'age_group_desc', 'franchise', 'prod_grp_desc']].
                       dropna().
                       sort_values(['article_number', 'year', 'week']).
                       copy()
                  )

# -- Sum over UK/EU, ADD article reference data --
seasonality_dat_eda = pd.merge(
    pd.DataFrame(seasonality_dat_eda.groupby(['article_number', 'season', 'year', 'week'])['net_qty'].sum()).reset_index(), # sum over UK & EU
    seasonality_dat_eda[['article_number', 'sports_cat_desc', 'rmh_cat_desc', 'gender_desc', 'age_group_desc', 'franchise', 'prod_grp_desc']].drop_duplicates() # add reference information
    ).dropna().sort_values(['article_number', 'year', 'week'])



# -- Reliable, mirror seasons --
seasonality_dat_eda = seasonality_dat_eda[seasonality_dat_eda.season.isin(['FW15', 'FW16', 'FW17', 'FW18', 'FW19'])]

# ---- Calculate cat-level weekly means across *ALL SEASONS* ---- 

seasonality_sport   = pd.DataFrame(seasonality_dat_eda.groupby(['sports_cat_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'sport_weekly_mean'})
seasonality_rmh     = pd.DataFrame(seasonality_dat_eda.groupby(['rmh_cat_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'rmh_weekly_mean'})
seasonality_gndr    = pd.DataFrame(seasonality_dat_eda.groupby(['gender_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'gender_weekly_mean'})
seasonality_agegrp  = pd.DataFrame(seasonality_dat_eda.groupby(['age_group_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'age_weekly_mean'})
seasonality_frnchse = pd.DataFrame(seasonality_dat_eda.groupby(['franchise', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'franchise_weekly_mean'})
seasonality_prdgrp  = pd.DataFrame(seasonality_dat_eda.groupby(['prod_grp_desc', 'week'])['net_qty'].mean()).reset_index().rename(columns = {'net_qty': 'prd_grp_weekly_mean'})

seasonality_dfs = [seasonality_sport, seasonality_rmh, seasonality_gndr, seasonality_agegrp, seasonality_frnchse, seasonality_prdgrp]


In [None]:
seasonality_dat_eda = seasonality_dat_eda[(seasonality_dat_eda.season == 'FW19') & (seasonality_dat_eda.article_number == aoi)]



preds_eda = seasonality_dat_eda.groupby(['article_number']).apply(regress).reset_index()

# Merge back with reference data
preds_eda = pd.merge(
    preds_eda,
    seasonality_dat_eda,
    how = 'left').sort_values(['article_number', 'week'])[['article_number', 'year', 'week', 'net_qty', 'seas_preds']] # .fillna(method='ffill')

# Zero out negative preds
preds_eda['seas_preds'] = np.where(preds_eda.seas_preds > 0, preds_eda.seas_preds, 0) 

# Combined observed weeks (partial season)  --- AND --- regression predicted (all) weeks
preds_eda = pd.merge(
    preds_eda,   # all weeks
    dat_GAS_eda, # observed weeks
    how = 'left')

# weekly assignment of GAS, seasonality, or combination
preds_eda['y_hat'] = np.where(np.isnan(preds_eda.GAS_est), preds_eda.seas_preds, (preds_eda.GAS_est + preds_eda.seas_preds)/2).round()

# Sum over season
# preds_season_eda = pd.DataFrame(preds_eda.groupby('article_number')['y_hat'].apply(sum).round())

# Growth
# preds_season_eda['y_hat'] = preds_season_eda.y_hat # * 1.1 # default growth rate
                                           
                                            
                                            

In [None]:
# EE7570_0 = preds_eda.copy()
EE7570_0.head()

# EE7570_1 = preds_eda.copy()

EE7570_0.head()
EE7570_1.head()

both[both.article_number == aoi].head()

both.shape
# both[['net_forecast_x', 'net_forecast_y', 'diff']]

both['diff'].describe().round()
both['diff'].hist(bins = [-2500, 0, 1000, 12000])
# without clearance ---- with clearance

In [None]:
# both.to_excel('data/both.xlsx')

## Compare, Combine

In [None]:
both = pd.read_excel('data/SS20_forecasts_all.xlsx')

In [None]:
# preds_season = pd.read_excel('data/SS20_forecasts_5Dec2019.xlsx')#.drop('Unnamed: 0', axis = 1)
# preds_season_all = pd.read_excel('data/preds_season_all.xlsx').drop('Unnamed: 0', axis = 1)

In [None]:
# preds_season_all0 = preds_season.copy()
# preds_season_all = preds_season.copy()

In [None]:
preds_season_all = preds_season_all.rename(columns = {'net_forecast': 'net_forecast_w_clearance', 'buy_recommendation': 'buy_recommendation_w_clearance',})

both = pd.merge(preds_season, preds_season_all[['article_number', 'net_forecast_w_clearance', 'buy_recommendation_w_clearance']],
                left_on='article_number', right_on = 'article_number')

both['buy_recommendation_w_clearance'] = np.where(both['buy_recommendation_w_clearance'] > both['buy_recommendation'], both['buy_recommendation_w_clearance'], both['buy_recommendation'])
both['net_forecast_w_clearance'] = np.where(both['net_forecast_w_clearance'] > both['net_forecast'], both['net_forecast_w_clearance'], both['net_forecast'])



In [None]:
# both['net_forecast_diff'] = both['net_forecast_w_clearance'] - both['net_forecast']
both['buy_rec_diff'] = both['buy_recommendation_w_clearance'] - both['buy_recommendation']

both = both.sort_values('buy_rec_diff', ascending = False)


In [None]:
# Remove clearance transactions
dat3 = dat0.copy()
dat3['clearance'] = dat3.clearance.fillna(0) 
dat3['net_qty0'] = (1 - dat3.clearance)*dat3.net_qty

both = pd.merge(both, dat3[dat3.article_number.isin(both.article_number) & (dat3.season == 'SS19')].groupby('article_number')['net_qty0', 'net_qty'].sum().astype('int'), 
         left_on = 'article_number', right_on = 'article_number').rename(columns = {'net_qty0': 'SS19_net_qty0', 'net_qty': 'SS19_net_qty'})

In [None]:
both.columns

In [None]:
both[['article_number', 'description', 
       'RMH', 'SS19_net_qty0', 'SS19_net_qty', 'net_forecast', 'buy_recommendation',
       'net_forecast_w_clearance', 'buy_recommendation_w_clearance', 'eCom_FC',
       'buy_rec_diff']]#.to_excel('data/forecast_eda.xlsx')

In [None]:
both[['article_number',  'brand', 'model_no', 'description', 'type', 'BU', 'RMH', 
        'price', 'cost', 'margin', 'net_forecast', 'buy_recommendation', 'net_forecast_w_clearance', 'buy_recommendation_w_clearance', 'eCom_FC']]

In [None]:
both = both#[['article_number',  'brand', 'model_no', 'description', 'type', 'BU', 'RMH', 
        #'price', 'cost', 'margin', 'net_forecast', 'buy_recommendation', 'net_forecast_w_clearance', 'buy_recommendation_w_clearance', 'eCom_FC']].set_index('article_number')

# both.to_excel('data/SS20_forecasts_all.xlsx')

# Appendix

In [None]:
# # adidas v rbk
# from pandas import DataFrame, Series
# fw20['brand'].value_counts()

# # Carryover coverage
# fw20['rev'] = fw20.price * fw20.eCom_RMA1
# fw20.groupby('brand')['rev'].sum()

In [None]:
# # DAVID code

# # Function Definitions

# class parameters():
#     def __init__(self, param_array):
#         self.alpha = param_array[0]
#         self.beta = param_array[1]
#         self.omega = param_array[2] / (1-self.beta) # one way to choose that is omega/(1-beta) = uncoMnditional mean
#         self.sigma = param_array[3]
#         self.f0 = param_array[4] # one way to choose is unconditional mean

# def loglik(x, y, f, sigma):
#     ll = -1/2*np.log(2*np.pi ) - 1/2*np.log(sigma) - 1/(2*sigma)*(y - x*f)**2
#     return ll

# def score_compute(x, y, f, sigma):
#     return (y - x*f)/sigma

# def score_compute_2(x, y, f, sigma=None):
#     return(y - x*f) # ** The 'type = 2' modification **

# def filterGAS(p, x, y, score_fun):
#     score0 = score_fun(x[0,:], y[0,:],p.f0, p.sigma)
#     f = np.zeros((len(y),1))
#     f[0,:] = p.f0
#     for t in range(1,len(y)):
#         scoret = score_fun(x[t-1,:], y[t-1,:], f[t-1,:], p.sigma)
#         f[t,:] = p.omega + p.alpha*scoret + p.beta*f[t-1,:]
#     return f


# def loglikest(params, x, y, score_fun):
#     p = parameters(params)
#     f = filterGAS(p, x, y, score_fun)
#     ll = np.zeros((len(y), 1))
#     m = len(y)
#     for t in range(0, len(y)):
#         ll[t,:] = loglik(x[t,:], y[t,:], f[t,:], p.sigma)
#     loglik_res = -(np.sum(ll))/m
#     return loglik_res


# def GAS_optimize(x, y, score_fun, marker_str):
#     return scipy.optimize.minimize(
#        loglikest,                              # function to minimize (log likelihood y|x,theta)
#        np.array([0.8, 0.9, np.mean(y), 1, np.mean(y)]), # initial parameter values (starting)
#        args=(x, y, score_fun),
#        options ={'eps':1e-09, 'maxiter': 600, 'ftol': 1e-12}, # TODO pass as parameter or create config file
#        method='L-BFGS-B',
#        bounds=((0,  None),             # alpha
#                (-1, 1),                # beta
#                (0.001, np.mean(y)*2),  # omega
#                (0.001, None),          # sigma
#                (0.001, np.mean(y)*2)   # f
#               )
#        )



# def GAS_est(df):
#     """ <High level description of function>
#     Parameters
#     ----------
#     df : pandas DataFrame
#        <Description>
#     Returns
#     -------
#     ret: pandas DataFrame
#        <Description>
#     Raises
#     ------
#     (List and description of specific errors generated and thrown based on intenal function requirements)
#     OtherError when an other error
#     """
#     y = df.net_qty.values.reshape(-1,1)          # observed demand (response)
#     x = df.buy_availability.values.reshape(-1,1)   # buy_availability (explanatory)
    
#     ret = pd.DataFrame()
#     ret[['year','week']] = df[['year','week']]
    
#     score_fun = score_compute
#     marker_str = 'One'
    
#     opt_result = GAS_optimize(x, y, score_fun, marker_str)
    
#     if opt_result.success == False:
#         score_fun= score_compute_2
#         marker_str = 'Two'
#         opt_result = GAS_optimize(x, y, score_fun, marker_str)
        
#     x1par = parameters(opt_result.x)
#     GAS = filterGAS(x1par, x, y, score_fun)
    
#     ret['GAS_est'] = GAS
#     ret['Convergence'] = [opt_result.success] * len(y)
#     ret['Convg type'] = [marker_str] * len(y)
    
#     return ret

In [None]:
# April articles

aas = ['DV1549', 'EE1152', 'DV1508', 'ED6024', 'CY4574', 'ED9384', 'BK7345', 'DV2400', 'DH5798']

# aoi = 'DV1549'
# a = aoi

preds_aa = (preds[preds.article_number.isin(aas)][['article_number', 'week', 'net_qty', 'GAS_est', 'seas_preds', 'y_hat']].
            sort_values(['article_number', 'week']).
            set_index('week')).round()
        

preds_aa[preds_aa.article_number == 'DV1549']

pred_aggs_aa = preds_aa.groupby('article_number')[['net_qty', 'GAS_est', 'seas_preds', 'y_hat']].apply(sum).round()



for c in pred_aggs_aa.columns:
    if type(pred_aggs_aa[c][1]) == np.float64:
        pred_aggs_aa[c] = pred_aggs_aa[c].fillna(0).astype(int)

pred_aggs_aa[~np.isnan(pred_aggs_aa.GAS_est)].loc[aas[i-1], 'y_hat'].sum()





fig = plt.figure(figsize=(25,32)); # width, height
fig.subplots_adjust(hspace=0.5, wspace=0.2); # vertical spacing, horizontal spacing
for i in range(1, 10):
    ax = fig.add_subplot(5, 2, i, )
    preds_ax = preds_aa[preds_aa.article_number == aas[i-1]]
    ax.plot(preds_ax.index, preds_ax['net_qty'], linewidth=4.5, label = 'Observed net_qty')
    ax.plot(preds_ax.index, preds_ax['y_hat'], linewidth=4.5, label = 'Model net_qty estimate')
    ax.set_title('Article: ' + aas[i-1] + 
                 ' \n net_qty STD: ' + str(pred_aggs_aa.loc[aas[i-1], 'net_qty']) + 
                 ' \n Full availability estimate STD: ' + str(preds_aa[(~np.isnan(preds_aa.GAS_est)) & (preds_aa.article_number == aas[i-1])].y_hat.sum().astype(int)) +
                 ' \n FW19 full season estimate: ' + str(pred_aggs_aa.loc[aas[i-1], 'y_hat']), 
                 fontsize=16)
    ax.legend()

fig.savefig('aa_fig.png')    

pass;



preds_aa[['GAS_est', 'net_qty', 'seas_preds']].apply(np.sum).round()

preds_season.reset_index()[preds_season.index.isin(aas)]

dat_aa = dat0[dat0.article_number.isin(aas)].copy()

dat_aa = pd.merge(
    pd.DataFrame(dat_aa.groupby(['year', 'week'])['net_qty'].sum()).reset_index(),
    dat_aa[['year', 'week']].drop_duplicates()
)

dat_aa.year = [str(x) for x in dat_aa.year]
dat_aa.week = [str(x) for x in dat_aa.week]
dat_aa['date'] = [dt.datetime.strptime(x[0] + '-' + x[1] + '-1', "%Y-%W-%w") for x in zip(dat_aa.year, dat_aa.week)]

plt.rcParams["figure.figsize"] = [10,7]
dat_aa[['date', 'net_qty']].set_index('date').plot(linewidth = 4)

In [None]:
# ----- For demo ------

fcsts = preds_season.copy()

# fcsts = pd.read_excel('data/FW20_forecasts.xlsx') # adidas RMA2 -- 21 October


fcsts.loc[:, 'impact'] = fcsts.apply(lambda row: np.where(row['buy_recommendation'] > row['eCom_ILS1'], 
                                                         (row['buy_recommendation'] - row['eCom_ILS1']) * row['margin'], 
                                                         (row['eCom_ILS1'] - row['buy_recommendation']) * row['cost']), axis = 1)

fcsts = fcsts.sort_values('impact', ascending = False).round()

fcsts = fcsts[['net_forecast', 'buy_recommendation', 'eCom_ILS1', 'impact', 'brand', 'description', 'type', 'BU', 'RMH', 'price', 'cost', 'margin',  'FW19_total_market_FC', 'notes']]


fcsts.head()

preds[preds.article_number == 'CG6708'].head()
dat0[dat0.article_number == 'CG6708'].head()

# EDA Plots

# ---- Plot -----
aoi = 'F34314'
a = aoi

# -------

dat_a = preds[preds.article_number == a][['week', 'net_qty', 'GAS_est', 'y_hat']]
dat_a['year'] = '2019'
dat_a.week = [str(x) for x in dat_a.week]
dat_a['date'] = [dt.datetime.strptime(x[0] + '-' + x[1] + '-1', "%Y-%W-%w") for x in zip(dat_a.year, dat_a.week)]

# -------

print('This season:')
print(dat_a[['net_qty', 'GAS_est', 'y_hat']].apply(np.sum).round())

dat_aoi = dat0[dat0.article_number == aoi].copy()

dat_aoi = pd.merge(pd.DataFrame(dat_aoi.groupby(['year', 'week'])['net_qty'].sum()).reset_index(),dat_aoi[['year', 'week']].drop_duplicates())
dat_aoi.year = [str(x) for x in dat_aoi.year]
dat_aoi.week = [str(x) for x in dat_aoi.week]
dat_aoi['date'] = [dt.datetime.strptime(x[0] + '-' + x[1] + '-1', "%Y-%W-%w") for x in zip(dat_aoi.year, dat_aoi.week)]

# --------

full = pd.merge(dat_a, dat_aoi, how = 'outer').sort_values('date').set_index('date')

full = full[~((full.index > dt.datetime(2019, 5, 27))  & full.y_hat.isna())]
# full = full.drop_duplicates(subset = ['week'], keep = 'last')

full = full.rename(columns = {'y_hat': 'model net_qty'})

# --------

full.loc[dt.datetime(2019, 11, 4), 'net_qty'] = np.nan # manually change entry

full # ************

# ----
plt.rcParams["figure.figsize"] = [20,8]
full.drop('GAS_est', axis = 1).plot(linewidth = 3)
plt.ylabel('net_qty')
plt.title('Article net_qty: ' + aoi)

# -------

fcsts[fcsts.index == aoi]



In [None]:
# dat_GAS_a = np.random.choice(dat_GAS.article_number.unique(), size = 100, replace = False)
# dat_GAS = dat_GAS[dat_GAS.article_number.isin(dat_GAS_a)]

In [None]:
# impact evaluations -- 

rma2_adi['rev'] = rma2_adi['Sum of WE eCom'] * rma2_adi['Market Retail Price']

rma2_adi['carryover'] = rma2_adi['Article Number'].isin(carryovers)

print('Total range: ', len(rma2_adi['Article Number'].unique()))
rma2_adi.head()
rma2_adi.carryover.value_counts() # non/carryover

# Revenue total
rma2_adi.rev.sum()
rma2_adi.groupby('carryover')['rev'].sum().round()

rbk_carryovers = carryovers.union(addtl_classics)

rma2_rbk['rev'] = rma2_rbk['RBK WE eCom'] * rma2_rbk['Hub Retail Price']

rma2_rbk['carryover'] = rma2_rbk['Article Number'].isin(rbk_carryovers)

In [None]:

# --- Overbuy statistics ---
# preds_season['pct_overbuy'] = (preds_season.Opt_Ovb - preds_season.y_hat)/preds_season.y_hat*100
# b = np.array([0, 1000, 2000, 5000, 10000, 50000])
# preds_season['bins'] = pd.cut(preds_season.y_hat, bins = b)
# preds_season.groupby('bins')['pct_overbuy'].describe().round()
