In [4]:
import numpy as np
import pandas as pd
import multiprocessing

import matplotlib.pyplot as plt
import bokeh
import bokeh.io
from bokeh.plotting import figure
from bokeh.io import output_notebook, show

# init_notebook_mode()

import seaborn as sns

import re
import math
import copy

from collections import defaultdict
import csv
import itertools
import datetime 
from datetime import datetime
import time
import dateutil.parser
import pickle
import random

import gc
import zipfile
import sys, getopt
import os

from IPython.core.interactiveshell import InteractiveShell
from io import StringIO

import dask.dataframe as dd
#from chest import Chest

InteractiveShell.ast_node_interactivity = "all"
#InteractiveShell.ast_node_interactivity = "last"

# Magic function to make matplotlib inline
%matplotlib inline

%config InlineBackend.figure_formats = {'png', 'retina'}

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

import dask.dataframe as ddf
import dask.array as da

pd.set_option('max_columns', 500)
pd.set_option('max_rows', 800)

## Overstock-SupplyChain-Understock (OSU) Correction

In [None]:
# # Wall time: 3min 4s
# %%time

# dat = pd.read_csv('Transaction SS19.csv', low_memory=False) # *** DATA ***
# dat['consumer_order_date'] = pd.to_datetime(dat['consumer_order_date'])

# # Transaction subset: SS19
# dat = dat[['consumer_order_date', 'article_number', 'gross_demand_quantity', 'Sale', 'season']]
# dat = dat[dat['season'] == 'SS19']

# # Subset to non-clearance transactions only --- change here for David/Mike's request
# dat = dat[dat['Sale'] == 0]

# %%time
# # Wall time: 4min 35s

# dat_SS20_range = pd.read_csv('dat_SS20_range.csv') # *** DATA ***
# SS20_range = dat_SS20_range['Article Number'].unique()
# dat = dat[[(a in SS20_range) for a in dat['article_number']]]

# # 'aggregate' to weekly sums by article for buy_availability merge and adjustment
# dat.set_index('consumer_order_date', inplace = True)

# dat = dat[['article_number', 'gross_demand_quantity']].groupby(['article_number']).resample('W').sum()
# dat.reset_index(inplace=True)

# # Add 'week' and 'year' for merging with stock (buy_availability) data (b/c min_date_of_week)
# dat['week'] = [t.week for t in dat['consumer_order_date']]
# dat['year'] = [t.year for t in dat['consumer_order_date']]

In [3]:
# dat.to_csv('dat_SS20range.csv')

dat = pd.read_csv('dat_SS20range.csv', low_memory=False, index_col = 0) # *** DATA ***
dat['consumer_order_date'] = pd.to_datetime(dat['consumer_order_date'])

In [20]:
dat.head()

Unnamed: 0,article_number,consumer_order_date,gross_demand_quantity,week,year
0,11040,2018-12-02,7.0,48,2018
1,11040,2018-12-09,15.0,49,2018
2,11040,2018-12-16,12.0,50,2018
3,11040,2018-12-23,6.0,51,2018
4,11040,2018-12-30,4.0,52,2018


In [4]:
# Stock data
stock = pd.read_csv('Stock.csv', low_memory=False, index_col = 0) # *** DATA ***

In [5]:
stock['year'] = [str(x)[0:4] for x in stock['week_id']]
stock['week'] = [str(x)[4:6] for x in stock['week_id']]
stock.drop('week_id', axis = 1, inplace = True)

# Tidy
stock.reset_index(inplace = True)
stock.drop(['avg(ecom_available_stock)', 'avg(size_availability)'], axis = 1, inplace = True)
stock.rename(columns = {'avg(buy_availability)': 'buy_availability'}, inplace = True)

stock = stock[(stock['year'] != '2016') & (stock['year'] != '2017')]

stock[['year', 'week']] = stock[['year', 'week']].astype('int64', copy = False)

In [6]:
# Merge weekly demand df and buy_availability df -- for understock correction
dat = pd.merge(dat, stock, 
               left_on = ['article_number', 'year', 'week'], 
               right_on = ['article_number', 'year', 'week'], 
               how = 'left')

dat.fillna(1, inplace=True) # Assume buy_availability = 1

In [7]:
# dat[[x in ['DQ2136', 'DQ3089', 'DU0367', 'DU0369', 'DU0382', 'EE9806', 'EE9809', 'G28417'] for x in dat['article_number']]] 
# ------- Joerian: buy_availability DNE for these articles, for some reason -------

In [8]:
# ----- Calculate *observed* full season gross_demand_quantity per article -----
dat_season = pd.DataFrame(dat.groupby(['article_number'])['gross_demand_quantity'].sum())
dat_season.rename(columns = {'gross_demand_quantity':'season_gross_demand_quantity'}, inplace = True)

# WEEKLY averages for articles when buy_availability > 0.35
dat_stocked = pd.DataFrame(
    dat[dat['buy_availability'] > 0.35].
    groupby(['article_number'])['gross_demand_quantity'].
    mean())

# {'604433', '620635', 'BQ1935', 'BQ2001', 'BS0980', 'CV4000', 'CY8772', 'G27026'}
# These articles have ZERO weeks with: (1) buy_availability > 0.35   ***AND***   (2) gross_demand_quantity > 0

dat_stocked.rename(columns = {'gross_demand_quantity':'corrected_weekly_avg_gross_demand_quantity'}, inplace= True)

# Extend to full season (26 weeks) to estimate full season demand
dat_stocked['corrected_gross_demand_quantity'] = 26*dat_stocked['corrected_weekly_avg_gross_demand_quantity'] 

dat_season = pd.merge(dat_season, dat_stocked, left_index=True, right_index=True, how = 'outer')
dat_season.drop('corrected_weekly_avg_gross_demand_quantity', inplace=True, axis = 1)

preds = (pd.DataFrame(pd.read_csv('Buyers predictions.csv', low_memory=False, index_col = 0))
         [['season', 'ecom_marketing_forecast']].
         reset_index().
         dropna()
        )

preds = preds[preds['season'] == 'SS19']

dat_season['corrected_gross_demand_quantity'] = np.where(
    dat_season['corrected_gross_demand_quantity'].isna(),
    dat_season['season_gross_demand_quantity'],
    dat_season['corrected_gross_demand_quantity']
)

In [9]:
dat_season = pd.merge(
    dat_season, preds, 
    left_index = True,
    right_on = 'article', 
    how = 'left').round()

dat_season = dat_season[['article', 'season_gross_demand_quantity', 
                         'corrected_gross_demand_quantity', 'ecom_marketing_forecast']]

dat_season = dat_season[dat_season['season_gross_demand_quantity'] > 83]
dat_season['DAA_SS20_prediction'] = dat_season['corrected_gross_demand_quantity']*1.1

### Make pretty

In [10]:
# Rename, reorder
dat_season.rename(columns = {
    'article': 'Article Number',
    'season_gross_demand_quantity': 'SS19 Demand',
    'corrected_gross_demand_quantity': 'SS19 Corrected Demand',
    'ecom_marketing_forecast': 'SS19 eCom Forecast',
    'DAA_SS20_prediction': 'Analytics SS20 Forecast',
                            }, inplace = True)

dat_season = dat_season[['Article Number', 'SS19 eCom Forecast', 'SS19 Demand', 
                         'SS19 Corrected Demand', 'Analytics SS20 Forecast']]

In [5]:
# dat_season.to_csv('dat_season.csv')
dat_season = pd.read_csv('dat_season.csv', low_memory=False, index_col = 0) 

In [80]:
# --- David code ---

# buyers = pd.read_csv('article_managers.csv', low_memory=False, index_col = 0)

# raw_ILS = pd.read_excel('SS20_RMA2_13May.xlsx',
#                         sheet_name='RMA-02 Market Range Plan', header=4)

# cols_ILS = {
#     'Article Number': 'article_no', 
#     'CM': 'article_manager',
#     'Product Division': 'product_div',
#     'Article Business Segment': 'bus_seg',
#     'GTM Target Retail Price EUR':'retail_price',
#     'Product Group':'product_group',
#     'Product Type':'product_type',
#     'Sports Category':'category',
#     'WE eCom ILS 1 BUY SIGN OFF final': 'quantity'
# }

# # reduce to fields of interest and add a max_revenue = price*quantity field

# ils1 = raw_ILS[list(cols_ILS.keys())].rename(columns = cols_ILS).fillna(0).set_index('article_no')

# ils1['max_revenue'] = ils1['quantity'] * ils1.retail_price

# ils1.to_csv('ils1.csv')

In [6]:
ils1 = pd.read_csv('ils1.csv')

In [7]:
forecasts = (pd.merge(dat_season, ils1[['article_no', 'article_manager', 'quantity', 'retail_price']], 
                      left_on = 'Article Number', right_on = 'article_no', how = 'left').
             set_index('Article Number').
             drop('article_no', axis = 1).
             rename(columns = {'quantity': 'eCom SS20 Forecast'})
            )

forecasts = forecasts[['SS19 eCom Forecast', 'SS19 Demand', 'SS19 Corrected Demand', 'eCom SS20 Forecast', 
                       'Analytics SS20 Forecast', 'article_manager', 'retail_price']]

In [None]:
# forecasts[forecasts.article_manager == '0'] # missing managers

In [8]:
# ---- Add cost to DF ----
cost_price = pd.read_csv('Cost Price.csv', low_memory=False, index_col = 0)
cost_price.rename(columns = {'avg(cost_of_sales)': 'cost'}, inplace = True)
cost_price = pd.DataFrame(cost_price['cost'].groupby(cost_price.index).mean()).round()

forecasts = pd.merge(forecasts, cost_price, left_index=True, right_index = True, how = 'left')

diff = forecasts['eCom SS20 Forecast'] - forecasts['Analytics SS20 Forecast']
forecasts['Difference-Cost'] = np.where(
        diff > 0, 
        diff*forecasts['cost'],
        diff*(-1)*(forecasts['retail_price'] - forecasts['cost'])
    )
del diff

forecasts.sort_values('Difference-Cost', ascending = False, inplace = True)

In [None]:
# forecasts['article_manager'].value_counts()
forecasts[forecasts['article_manager'] == '0'].index # ['218977', 'CF6925', 'CF6926', 'X35859', 'X53042']

In [16]:
comments = pd.DataFrame({'comments': ['test']})

# ----- post-apocalypse -----
# comments.loc['B42200'] = 'This is strange article history data. In SS19 the sales were paltry, and the buy availability was below 0.35 most weeks. In the few weeks with sufficient buy availability, demand was 0. Is there reason to believe demand will be 3000?'

# Based on gross_demand_quantity
comments.loc['DB3258'] = 'This article had a very short SS19, but sold quite well during that time. Extrapolated out to a full season at that weekly rate the sales would have been 18000. Growth on this numbers yields our SS20 forecast.'
comments.loc['G27706'] = 'The estimated full season demand in SS19 is about 50000. Do you have reason to expect a decline?'
comments.loc['B28128'] = 'The eCom prediction this season was spot on; is there reason to believe demand will drop by 50%?'
comments.loc['S75104'] = 'Demand in SS19 was about twice what was predicted; if demand for this article grows---or stays the same---the current eCom forecast would again be half of demand. Is this intentional? '
comments.loc['EE8836'] = 'This article missed the first six weeks of the season, but correcting for that gives it an estimated SS19 demand of about 7500. Growth on top of that yields our forecast of around 8300.'
comments.loc['G27637'] = 'In a familiar pattern, this article was not introduced until well into the season (March), and so SS20 full season sales should be much higher than the partial SS19, hence our prediction.'
comments.loc['B37616'] = 'This article sold well, then abruptly stopped selling in the beginning of February; I do not have information on why this is. Assuming sufficient stock throughout SS20, full season sales should be higher than its abbreviated 10 week SS19.'
comments.loc['G27639'] = 'This article debuted in March, but in a full season sales should be much higher; unless for some reason it will not be available for the full SS20 season.'
comments.loc['CG5675'] = 'Not sure why, but we are showing an eCom forecast of 0, which is obviously quite different than our forecast. :)'
comments.loc['DP2398'] = 'Demand in SS19 was about twice what was predicted; if demand for this article grows---or stays the same---the current eCom forecast would again be half of demand. Perhaps there is information we do not have access to that justifies this.'
comments.loc['B28129'] = 'Demand in SS19 should be about 7800, with growth SS20 would see demand of around 8600. Is there reason to belive demand will decrease?'
comments.loc['F36215'] = 'There were some supply chain fluctuations in SS19 where low buy availability affected demand. Correcting for this we estimate SS19 demand would have been about 5700. Growth on this account for our SS20 forecast of about 6300.'
comments.loc['EE8925'] = 'This article missed all of December and half of January in SS19. Correcting for this we estimate an SS19 full season demand of about 4000; adding growth gives our SS20 prediction.'
comments.loc['M20325'] = 'This is also a unique article. Sales for all of SS19 should be around 29500, so typical growth on that puts our estimate a bit above yours. The percentage difference between our prediction and yours is small, but underprediction is expensive, and therefore this discrepancy climed quite high on the list.'
comments.loc['BB5478'] = 'SS19 demand should be around 8800; do you have reason to expect a decline?'
comments.loc['G26880'] = 'Similar story; SS19 demand will end up at about 3700; our prediction is therefore about 4000 for SS20.'
comments.loc['BD7633'] = 'I see SS19 demand at about 2700, and thus SS20 at about 3000; is reason to expect a decline?'
comments.loc['B22705'] = 'By season\'s end demand should be about 5500; is there reason to expect a drop in SS20'
comments.loc['AQ1134'] = 'Demand for SS19 should be about 2100, but eCom forecast is 565. Reason to expect decline?'
comments.loc['M20605'] = 'Demand should be between 6500 and 7000 by end of SS19; is there reason to expect a decline in SS20?'
comments.loc['B96578'] = 'There were supply chain issues in the December which limited availability, and again in late January/early February. Without these problems we think demand would have been about 7000, and growth on that gives our SS20 prediction.'
comments.loc['F36485'] = 'Why a drop from ~2000 in SS19 to ~500 in SS20?'
comments.loc['G28109'] = 'This article did not start selling until February, but selling over a full season at the same rate would have yielded an estimated demand of ~2300. Growth on this yields our SS20 forecast.'
comments.loc['B75806'] = 'eCom\'s number for SS19 was spot on: by season\'s end demand should be right around 5000. Should we expect a drop in SS20?'
comments.loc['F34314'] = 'There were two weeks where buy availability affected demand. SS19 demand should be between 3500 and 4000, plus growth gives our SS20.'

# ------- big net-diffs ---------

comments.loc['280647'] = 'SS19 fell far short of expectations; and eCom SS20 forecast is almost 3X SS19 net. Is there sufficient justification for this big predicted increase?'
comments.loc['280648'] = 'Fell far short of expectations in SS19; is there reason to believe net demand will quadruple from SS19 to SS20?'
comments.loc['288022'] = 'Fell far short of expectations in SS19; is there reason to believe net demand will almost triple from SS19 to SS20?'
comments.loc['CW1275'] = 'The net demand in SS19 will be about 7000; is there reason to believe this will jump to 18000 in SS20?'
comments.loc['D95958'] = 'The net demand in SS19 will be around 4000; is there reason to believe this will jump to ~14000 in SS20?'
comments.loc['DT7964'] = 'The net demand in SS19 will be around 4000; is there reason to believe this will jump to ~13000 in SS20?'
comments.loc['F99787'] = 'Is there reason to expect demand to double from SS19 to SS20?'
comments.loc['G28109'] = 'Very short SS19, but with a full season net quantity would have been an estimated ~1300; this puts our forecast at 3X yours.'
comments.loc['M20324'] = 'This is a unique article. Sales for all of SS19 should be around 12500, so typical growth on that puts our estimate quite a bit below yours; do you have reason to believe demand will increase so much?'
comments.loc['S82137'] = 'SS19 net should be about 10400, but eCom SS20 forecast is twice that. Why such a large increase?'



In [29]:
comments = (comments.
            reset_index().
            rename(columns = {'index': 'article'})[1:]
           )

In [30]:
comments

Unnamed: 0,article,comments
1,DB3258,"This article had a very short SS19, but sold q..."
2,G27706,The estimated full season demand in SS19 is ab...
3,B28128,The eCom prediction this season was spot on; i...
4,S75104,Demand in SS19 was about twice what was predic...
5,EE8836,This article missed the first six weeks of the...
6,G27637,"In a familiar pattern, this article was not in..."
7,B37616,"This article sold well, then abruptly stopped ..."
8,G27639,"This article debuted in March, but in a full s..."
9,CG5675,"Not sure why, but we are showing an eCom forec..."
10,DP2398,Demand in SS19 was about twice what was predic...


In [34]:
forecasts17th = pd.merge(forecasts, comments, left_index=True, right_on='article').set_index('article')





In [37]:
# for_mike.to_excel('mike.xlsx')

## Plots

In [None]:
article = 'DB3258'
d = dat[dat['article_number'] == article]

pivoted = d.pivot(index = 'consumer_order_date', columns = 'article_number', values = 'buy_availability')
pivoted2 = d.pivot(index = 'consumer_order_date', columns = 'article_number', values = 'gross_demand_quantity')    

forecasts[forecasts.index == article]

d
d.gross_demand_quantity.sum()

plt.rcParams["figure.figsize"] = [8,2.5]

pivoted.plot(linewidth = 4)
plt.title('Buy Availability Over Time')
plt.ylabel('Buy Availability')

pivoted2.plot(linewidth = 4)
plt.title('Weekly Gross Demand Quantity')
plt.ylabel('Gross Demand Quantity')

In [38]:
forecasts.to_csv('forecasts.csv')