# Imports and technical setup

In [1]:
from pathlib import Path
persist_path = Path('..') / 'persist'
import numpy as np
import pandas as pd
from pandas import IndexSlice as idx
from IPython.display import display, HTML
display(HTML("<style>.container { width:100%; }</style>"))
import matplotlib.pyplot as plt
import sys
project_root = str(Path(sys.path[0]).parents[0].absolute())
if project_root not in sys.path:
    sys.path.append(project_root)
from importlib import reload
import scripts.utils as utils
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.colors import RGB
output_notebook()
from functools import partial
from collections import namedtuple

# Chargement des données

In [2]:
orders = pd.read_pickle(persist_path / 'orders.pkl')
orders = orders.reset_index().set_index(['orgacom', 'client', 'date']).sort_index()
day_orders = pd.read_pickle(persist_path / 'day_order.pkl')
df_clt = pd.read_pickle(persist_path / 'clt.pkl')
lib_seg = pd.read_pickle(persist_path / 'lib_seg.pkl')

In [3]:
segs = ['seg1', 'seg2', 'seg3', 'seg4', 'cat', 'sscat']
for i, seg in enumerate(segs):
    df_clt = (
        df_clt
        .merge(lib_seg.loc[i + 1, 'designation'].rename(seg + '_lib'), left_on=seg, right_index=True, validate='m:1', how='left')
    )
names = df_clt.reset_index().loc[:, ['orgacom', 'client', 'nom']]
names = names.loc[names.client.str.isnumeric()]
names = names.astype({'client': 'int'}).astype({'client': 'str'})
hiers = ['hier4', 'hier3', 'hier2', 'hier1']
df_clt = df_clt.reset_index()
for hier in hiers:
    df_clt = (
        df_clt.merge(
            names.rename({'nom': hier + '_lib', 'client': 'client_' + hier}, axis=1),
            left_on=['orgacom', hier],
            right_on=['orgacom', 'client_' + hier],
            validate='m:1',
            how='left'
        )
    )
    df_clt = df_clt.drop('client_' + hier, axis=1)
df_clt = df_clt.set_index(['orgacom', 'client'])

# Construction d'un indicateur "taux de web"

In [4]:
%%time
# total time around 2:30 mins
reload(utils)
test = utils.day_orders_pipe(
    data=day_orders,
    inactive_duration=20.,
    indicator_status='brutrevenue',
    origin='WEB',
    indicator_perf='margin',
    inactive_roll_mode='ignore',
    roll_parms=dict(
        window=75,
        center=True,
        win_type='triang',
        min_periods=1,
    )
)
test

2020-10-28 09:01:15.134073: Computing totals
2020-10-28 09:01:35.244729: Done! Elapsed: 0:00:20.110687
2020-10-28 09:01:35.245015: Computing inactive periods
2020-10-28 09:01:41.507069: Done! Elapsed: 0:00:06.262074
2020-10-28 09:01:41.507190: Computing rolling indicators
2020-10-28 09:03:29.477482: Done! Elapsed: 0:01:47.970348
2020-10-28 09:03:29.477668: Computing percentage and statuses
2020-10-28 09:03:38.408179: Done! Elapsed: 0:00:08.930539
CPU times: user 2min 2s, sys: 22.2 s, total: 2min 24s
Wall time: 2min 24s


Unnamed: 0_level_0,Unnamed: 1_level_0,indicators,brutrevenue,brutrevenue,brutrevenue,brutrevenue,brutrevenue,margin,margin,margin,margin,margin,inactive,brutrevenue_rolled,brutrevenue_rolled,margin_rolled,WEB_percentage,status
Unnamed: 0_level_1,Unnamed: 1_level_1,origin2,EDI,TV,VR,WEB,total,EDI,TV,VR,WEB,total,Unnamed: 13_level_1,WEB,total,total,Unnamed: 17_level_1,Unnamed: 18_level_1
orgacom,client,date,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
1ALO,0000015304,2017-07-03,0.0,1340.95,0.0,0.0,1340.95,0.0,202.80,0.0,0.0,202.80,False,0.0,635.937935,83.924089,0.0,no_web
1ALO,0000015304,2017-07-04,0.0,196.54,0.0,0.0,196.54,0.0,33.70,0.0,0.0,33.70,False,0.0,632.730964,83.342352,0.0,no_web
1ALO,0000015304,2017-07-05,0.0,916.78,0.0,0.0,916.78,0.0,86.73,0.0,0.0,86.73,False,0.0,631.131462,82.996683,0.0,no_web
1ALO,0000015304,2017-07-06,0.0,73.36,0.0,0.0,73.36,0.0,17.55,0.0,0.0,17.55,False,0.0,628.344134,82.596796,0.0,no_web
1ALO,0000015304,2017-07-07,0.0,73.17,0.0,0.0,73.17,0.0,13.77,0.0,0.0,13.77,False,0.0,626.945096,82.326161,0.0,no_web
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2BRE,SL009N,2020-08-26,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,True,0.0,241.407360,24.530400,0.0,inactive
2BRE,SL009N,2020-08-27,0.0,629.91,0.0,0.0,629.91,0.0,61.10,0.0,0.0,61.10,False,0.0,241.191024,24.505197,0.0,no_web
2BRE,SL009N,2020-08-28,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,False,0.0,234.856535,23.904094,0.0,no_web
2BRE,SL009N,2020-08-31,0.0,75.03,0.0,0.0,75.03,0.0,10.51,0.0,0.0,10.51,False,0.0,232.178400,23.675840,0.0,no_web


In [5]:
labeled_bins = namedtuple('labeled_bins', ['labels', 'bin_limits'])
bins = labeled_bins(
    labels=['no_web', 'web', 'full'],
    bin_limits=[0., .5, .75, 1.001],
)
bin_colors={
    'no_web': RGB(0, 255, 0, .0),
    'web': RGB(0, 255, 0, .1),
    'full': RGB(0, 255, 0, .2),
    'inactive': RGB(100, 100, 100, .2),
}

In [6]:
oc, clt = (
#     list(day_orders.loc[day_orders[('brutrevenue', 'WEB')] != 0]
#                      .index.to_frame()
#                      .loc[lambda x: x.client.str[0] != 'P', ['orgacom', 'client']].drop_duplicates().sample(1).iloc[0])
# '1ALO', '0000170669'
# '1ALO', '0000020697'
'1ALO', '0000020575'
# '1ALO', '0000020531'
# '1ALO', '0000024795'

# ('1ALO', '0000162867')
# ('1ALO', '0000026355')
# ('2BRE', '0000162960')
# ('1LRO', '0000092812')
# ('2BRE', '0000135319')
# ('1SOU', '0000109546')
# ('1LRO', '0000211945')
)
oc, clt

('1ALO', '0000020575')

In [8]:
reload(utils)
end_date = pd.to_datetime('2024-02-29')
show(
    partial(
        utils.bk_detail,
        in_data=day_orders.loc[oc, clt, :end_date],
        order_data=orders.loc[oc, clt, :end_date],
        client=clt,
        oc=oc,
        bins=bins,
        bin_colors=bin_colors,
        groupers=['orgacom', 'client'],
        inactive_duration=20,
        indicator_status='brutrevenue',
        origin='WEB',
        indicator_perf='margin',
#         roll_parms=dict(
#             window=150,
#             center=True,
#             win_type='triang',
#             min_periods=1,
#         ),
        inactive_roll_mode='stitch',
        clt_data=df_clt.loc[oc, clt],
    ),
    notebook_url="http://devdm:8888")

2020-10-28 09:23:18.085433: Computing totals
2020-10-28 09:23:18.092044: Done! Elapsed: 0:00:00.006620
2020-10-28 09:23:18.092138: Computing inactive periods
2020-10-28 09:23:18.097990: Done! Elapsed: 0:00:00.005858
2020-10-28 09:23:18.098041: Computing rolling indicators
2020-10-28 09:23:18.120928: Done! Elapsed: 0:00:00.022897
2020-10-28 09:23:18.121003: Computing percentage and statuses
2020-10-28 09:23:18.126857: Done! Elapsed: 0:00:00.005861
2020-10-28 10:41:13.244205: Computing totals
2020-10-28 10:41:13.251283: Done! Elapsed: 0:00:00.007089
2020-10-28 10:41:13.251409: Computing inactive periods
2020-10-28 10:41:13.257111: Done! Elapsed: 0:00:00.005709
2020-10-28 10:41:13.257324: Computing rolling indicators
2020-10-28 10:41:13.282074: Done! Elapsed: 0:00:00.024763
2020-10-28 10:41:13.282167: Computing percentage and statuses
2020-10-28 10:41:13.288167: Done! Elapsed: 0:00:00.006009
2020-10-28 10:42:50.375596: Computing totals
2020-10-28 10:42:50.381940: Done! Elapsed: 0:00:00.00