In [2]:
import numpy as np
import pandas as pd
import multiprocessing

import matplotlib.pyplot as plt
import bokeh
import bokeh.io
from bokeh.plotting import figure
from bokeh.io import output_notebook, show

# init_notebook_mode()

import seaborn as sns

import re
import math
import copy

from collections import defaultdict
import csv
import itertools
import datetime 
from datetime import datetime
import time
import dateutil.parser
import pickle
import random

import gc
import zipfile
import sys, getopt
import os

from IPython.core.interactiveshell import InteractiveShell
from io import StringIO

import dask.dataframe as dd
#from chest import Chest

InteractiveShell.ast_node_interactivity = "all"
#InteractiveShell.ast_node_interactivity = "last"

# Magic function to make matplotlib inline
%matplotlib inline

%config InlineBackend.figure_formats = {'png', 'retina'}

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

import dask.dataframe as ddf
import dask.array as da

pd.set_option('max_columns', 500)
pd.set_option('max_rows', 800)

## Overstock-SupplyChain-Understock (OSU) Correction

In [6]:
# %%time

# # Wall time: 3min 4s

# dat = pd.read_csv('Transaction SS19.csv', low_memory=False) # *** DATA ***
# dat['consumer_order_date'] = pd.to_datetime(dat['consumer_order_date'])

CPU times: user 2min 56s, sys: 16.2 s, total: 3min 12s
Wall time: 3min 8s


In [10]:
# # Transaction subset: SS19
# dat = dat[['consumer_order_date', 'article_number', 'net_qty', 'Sale', 'season']]
# dat = dat[dat['season'] == 'SS19']

# # Subset to non-clearance transactions only --- change here for David/Mike's request
# dat = dat[dat['Sale'] == 0]

# # Wall time: 4min 35s

# dat_SS20_range = pd.read_csv('dat_SS20_range.csv') # *** DATA ***
# SS20_range = dat_SS20_range['Article Number'].unique()
# dat = dat[[(a in SS20_range) for a in dat['article_number']]]

# # 'aggregate' to weekly sums by article for buy_availability merge and adjustment
# dat.set_index('consumer_order_date', inplace = True)

# dat = dat[['article_number', 'net_qty']].groupby(['article_number']).resample('W').sum()
# dat.reset_index(inplace=True)

# # Add 'week' and 'year' for merging with stock (buy_availability) data (b/c min_date_of_week)
# dat['week'] = [t.week for t in dat['consumer_order_date']]
# dat['year'] = [t.year for t in dat['consumer_order_date']]

In [11]:
dat.head()

Unnamed: 0,article_number,consumer_order_date,net_qty,week,year
0,11040,2018-12-02,6.0,48,2018
1,11040,2018-12-09,14.0,49,2018
2,11040,2018-12-16,8.0,50,2018
3,11040,2018-12-23,3.0,51,2018
4,11040,2018-12-30,3.0,52,2018


In [3]:
# dat.to_csv('dat_SS20range_net_qty.csv')

dat = pd.read_csv('dat_SS20range_net_qty.csv', low_memory=False, index_col = 0) # *** DATA ***
dat['consumer_order_date'] = pd.to_datetime(dat['consumer_order_date'])

In [4]:
# Stock data
stock = pd.read_csv('Stock.csv', low_memory=False, index_col = 0) # *** DATA ***

In [5]:
stock['year'] = [str(x)[0:4] for x in stock['week_id']]
stock['week'] = [str(x)[4:6] for x in stock['week_id']]
stock.drop('week_id', axis = 1, inplace = True)

# Tidy
stock.reset_index(inplace = True)
stock.drop(['avg(ecom_available_stock)', 'avg(size_availability)'], axis = 1, inplace = True)
stock.rename(columns = {'avg(buy_availability)': 'buy_availability'}, inplace = True)

stock = stock[(stock['year'] != '2016') & (stock['year'] != '2017')]

stock[['year', 'week']] = stock[['year', 'week']].astype('int64', copy = False)

# Merge weekly demand df and buy_availability df -- for understock correction
dat = pd.merge(dat, stock, 
               left_on = ['article_number', 'year', 'week'], 
               right_on = ['article_number', 'year', 'week'], 
               how = 'left')

dat.fillna(1, inplace=True) # Assume buy_availability = 1

# ----- Calculate *observed* full season gross_demand_quantity per article -----
dat_season = pd.DataFrame(dat.groupby(['article_number'])['net_qty'].sum())
dat_season.rename(columns = {'net_qty':'season_net_qty'}, inplace = True)

# WEEKLY averages for articles when buy_availability > 0.35
dat_stocked = pd.DataFrame(
    dat[dat['buy_availability'] > 0.35].
    groupby(['article_number'])['net_qty'].
    mean())

# {'604433', '620635', 'BQ1935', 'BQ2001', 'BS0980', 'CV4000', 'CY8772', 'G27026'}
# These articles have ZERO weeks with: (1) buy_availability > 0.35   ***AND***   (2) gross_demand_quantity > 0

dat_stocked.rename(columns = {'net_qty':'corrected_weekly_avg_net_qty'}, inplace= True)

# Extend to full season (26 weeks) to estimate full season demand
dat_stocked['corrected_net_qty'] = 26*dat_stocked['corrected_weekly_avg_net_qty'] 

dat_season = pd.merge(dat_season, dat_stocked, left_index=True, right_index=True, how = 'outer')
dat_season.drop('corrected_weekly_avg_net_qty', inplace=True, axis = 1)

preds = (pd.DataFrame(pd.read_csv('Buyers predictions.csv', low_memory=False, index_col = 0))
         [['season', 'ecom_marketing_forecast']].
         reset_index().
         dropna()
        )

preds = preds[preds['season'] == 'SS19']

dat_season['corrected_net_qty'] = np.where(
    dat_season['corrected_net_qty'].isna(),
    dat_season['season_net_qty'],
    dat_season['corrected_net_qty']
)

dat_season = pd.merge(
    dat_season, preds, 
    left_index = True,
    right_on = 'article', 
    how = 'left').round()

dat_season = dat_season[['article', 'season_net_qty', 
                         'corrected_net_qty', 'ecom_marketing_forecast']]

dat_season = dat_season[dat_season['season_net_qty'] > 50]
dat_season['DAA_SS20_prediction'] = dat_season['corrected_net_qty']*1.1

In [7]:
dat.to_csv('dat_netqty_buyavail.csv')

### Make pretty

In [80]:
# Rename, reorder
dat_season.rename(columns = {
    'article': 'Article Number',
    'season_net_qty': 'SS19 Net Qty',
    'corrected_net_qty': 'SS19 Corrected Net Qty',
    'ecom_marketing_forecast': 'SS19 eCom Forecast',
    'DAA_SS20_prediction': 'Analytics SS20 Net Qty Forecast',
                            }, inplace = True)

dat_season = dat_season[['Article Number', 'SS19 eCom Forecast', 'SS19 Net Qty', 
                         'SS19 Corrected Net Qty', 'Analytics SS20 Net Qty Forecast']]

In [81]:
# dat_season.to_csv('dat_season_net_qty.csv')
# dat_season = pd.read_csv('dat_season_net_qty.csv', low_memory=False, index_col = 0) 

In [82]:
ils1 = pd.read_csv('ils1.csv')

In [83]:
forecasts = (pd.merge(dat_season, ils1[['article_no', 'article_manager', 'quantity', 'retail_price']], 
                      left_on = 'Article Number', right_on = 'article_no', how = 'left').
             set_index('Article Number').
             drop('article_no', axis = 1).
             rename(columns = {'quantity': 'eCom SS20 Forecast'})
            )

forecasts = forecasts[['SS19 eCom Forecast', 'SS19 Net Qty', 'SS19 Corrected Net Qty', 'eCom SS20 Forecast', 
                       'Analytics SS20 Net Qty Forecast', 'article_manager', 'retail_price']]

In [84]:
# ---- Add cost to DF ----
cost_price = pd.read_csv('Cost Price.csv', low_memory=False, index_col = 0)
cost_price.rename(columns = {'avg(cost_of_sales)': 'cost'}, inplace = True)
cost_price = pd.DataFrame(cost_price['cost'].groupby(cost_price.index).mean()).round()

In [85]:
forecasts = pd.merge(forecasts, cost_price, left_index=True, right_index = True, how = 'left')

In [86]:
diff = forecasts['eCom SS20 Forecast'] - forecasts['Analytics SS20 Net Qty Forecast']

In [87]:
forecasts['Difference-Cost'] = np.where(
        diff > 0, 
        diff*forecasts['cost'],
        diff*(-1)*(forecasts['retail_price'] - forecasts['cost'])
    )
del diff

forecasts.sort_values('Difference-Cost', ascending = False, inplace = True)

In [88]:
len(forecasts.index.unique()) # 638 articles now

642

In [None]:
# Problem articles

forecasts.loc[('B42200', # Analytics corrected demand is 0, forecast is 0 -- zero demand in buy_availability > 0.35 weeks
               'CG5675', # eCom forecast of 0
               'F97634') # eCom forecast of 0
              ,:]

dat[dat['B42200'] == article]

In [89]:
forecasts

Unnamed: 0_level_0,SS19 eCom Forecast,SS19 Net Qty,SS19 Corrected Net Qty,eCom SS20 Forecast,Analytics SS20 Net Qty Forecast,article_manager,retail_price,cost,Difference-Cost
Article Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DB3258,2200.0,3629.0,8578.0,1863.0,9435.8,Thibault,90.0,13.0,583105.6
B28128,15000.0,9678.0,10484.0,6863.0,11532.4,Thibault,100.0,17.0,387560.2
S75104,5500.0,7494.0,8118.0,5000.0,8929.8,Thibault,90.0,14.0,298664.8
EE8836,3300.0,3310.0,5062.0,3063.0,5568.2,Thibault,100.0,16.0,210436.8
B37616,,1556.0,3701.0,2000.0,4071.1,Thibault,120.0,23.0,200896.7
CG5675,2000.0,2009.0,2176.0,0.0,2393.6,Thibault,80.0,14.0,157977.6
M20324,25000.0,11539.0,12501.0,25000.0,13751.1,Thibault,90.0,13.0,146235.7
F36215,2100.0,1915.0,3588.0,1364.0,3946.8,Ksenia Kotlyarova,55.0,10.0,116226.0
280647,40000.0,10823.0,11725.0,31000.0,12897.5,Thibault,35.0,6.0,108615.0
G27639,2233.0,1325.0,3132.0,2000.0,3445.2,Thibault,90.0,15.0,108390.0


In [91]:
forecasts.index[0:50]

# [
#  'BD7633', 'B22705', 'AQ1134', 'M20605', 'B96578',
#  'F36485', 'G28109', 'B75806', 'F34314']

Index(['DB3258', 'B28128', 'S75104', 'EE8836', 'B37616', 'CG5675', 'M20324',
       'F36215', '280647', 'G27639', 'BD7633', 'G27637', 'AQ1134', 'DT7964',
       'CW1275', 'F99787', 'S82137', 'M20325', '288022', 'D95958', 'G27706',
       'G26880', 'F36485', 'B42200', '280648', 'CW1202', 'AP9971', 'EE8925',
       'G28109', 'G27026', 'G28114', 'F34314', 'DV2447', 'CM7492', 'ED6897',
       'G28108', 'CW1235', 'CW1256', 'CW1203', 'DV0337', 'DP2398', 'F36424',
       'G28107', 'BA7928', 'F35543', 'M20605', 'G16220', 'BD7479', 'AC7664',
       'DV1574'],
      dtype='object', name='Article Number')

In [92]:
# forecasts.to_csv('forecasts_net_qty.csv')