# Imports and technical setup

In [1]:
from pathlib import Path
persist_path = Path('..') / 'persist'
import numpy as np
import pandas as pd
from pandas import IndexSlice as idx
from IPython.display import display, HTML
display(HTML("<style>.container { width:100%; }</style>"))
import matplotlib.pyplot as plt
import sys
project_root = str(Path(sys.path[0]).parents[0].absolute())
if project_root not in sys.path:
    sys.path.append(project_root)
from importlib import reload
import scripts.utils as utils
from bokeh.io import output_notebook, show
from bokeh.models import ColumnDataSource
from bokeh.colors import RGB
output_notebook()
from functools import partial
from collections import namedtuple

# Chargement des données

In [2]:
orders = pd.read_pickle(persist_path / 'small_orders.pkl')
orders = orders.reset_index().set_index(['orgacom', 'client', 'date']).sort_index()
day_orders = pd.read_pickle(persist_path / 'small_day_order.pkl')
df_clt = pd.read_pickle(persist_path / 'small_clt.pkl')
lib_seg = pd.read_pickle(persist_path / 'small_lib_seg.pkl')

In [3]:
segs = ['seg1', 'seg2', 'seg3', 'seg4', 'cat', 'sscat']
for i, seg in enumerate(segs):
    df_clt = (
        df_clt
        .merge(lib_seg.loc[i + 1, 'designation'].rename(seg + '_lib'), left_on=seg, right_index=True, validate='m:1', how='left')
    )
names = df_clt.reset_index().loc[:, ['orgacom', 'client', 'nom']]
names = names.loc[names.client.str.isnumeric()]
names = names.astype({'client': 'int'}).astype({'client': 'str'})
hiers = ['hier4', 'hier3', 'hier2', 'hier1']
df_clt = df_clt.reset_index()
for hier in hiers:
    df_clt = (
        df_clt.merge(
            names.rename({'nom': hier + '_lib', 'client': 'client_' + hier}, axis=1),
            left_on=['orgacom', hier],
            right_on=['orgacom', 'client_' + hier],
            validate='m:1',
            how='left'
        )
    )
    df_clt = df_clt.drop('client_' + hier, axis=1)
df_clt = df_clt.set_index(['orgacom', 'client'])
df_clt

Unnamed: 0_level_0,Unnamed: 1_level_0,V,groupecompte,nom,postalcode,seg1,seg2,seg3,seg4,cat,sscat,...,seg1_lib,seg2_lib,seg3_lib,seg4_lib,cat_lib,sscat_lib,hier4_lib,hier3_lib,hier2_lib,hier1_lib
orgacom,client,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1ALO,0000022504,A,ZCLT,AV ENT CC LES BRIQUETTES,51190,Z3,Z4,ZJ,ZE,ZP,ZC,...,RHD,Social,Concédé,C.C SRC,Travail,C.C Travail,ELIOR,ELIOR EDI,ELIOR ENTREPRISE,ELIOR ENTREPRISE EST
1ALO,0000026598,A,ZCLT,HOTEL PYRAMID,6182,Z3,Z5,ZK,ZG,ZS,ZK,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Moyenne gamme,,,,
1ALO,0000134589,A,ZCLT,REST LE BOURBON,4760,Z3,Z5,ZK,ZG,ZS,ZJ,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Gastro -semi gastro,,,,
1ALO,0000136324,A,ZCLT,REST DELICIOUS,5240,Z3,Z5,ZK,ZG,ZS,ZJ,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Gastro -semi gastro,,,,
1ALO,0000150039,A,ZFAC,HOTEL RESTAURANT CONFORT HOTEL,57140,Z3,Z5,ZK,ZG,,,...,RHD,Commercial,Indépendant,Cial. indépendant,,,GOURMET CONSULTANTS,GOURMET CONSULTANTS,NPU GOURMET CONSULTANTS ASS TECH,NPU CHOICE HOTEL ASSIT TECH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2NOR,0000285560,A,ZCLT,REST LES CORNICHONS,51100,Z3,Z5,ZK,ZG,ZS,ZK,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Moyenne gamme,,,,
3CLO,0000272709,A,ZCLT,REST ALEX'CELLENT,54710,Z3,Z5,ZK,ZG,ZS,ZK,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Moyenne gamme,,,,
1ALO,0000284904,A,ZCLT,TANKSTELL,67400,Z3,Z5,ZK,ZG,ZS,ZK,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Moyenne gamme,AVANTAGES,AVANTAGES,AVANTAGES,AVANTAGES
2EST,0000284904,A,ZCLT,TANKSTELL,67400,Z3,Z5,ZK,ZG,ZS,ZK,...,RHD,Commercial,Indépendant,Cial. indépendant,Rest. traditionnelle,Moyenne gamme,,,,


# Construction d'un indicateur "taux de web"

In [4]:
labeled_bins = namedtuple('labeled_bins', ['labels', 'bin_limits'])
bins = labeled_bins(
    labels=['no_web', 'web'],
    bin_limits=[0., .5, 1.001],
)

In [5]:
%%time
# total time around 3:30 mins
reload(utils)
test = utils.day_orders_pipe(
    data=day_orders,
    inactive_duration=20.,
    indicator_status='brutrevenue',
    origin='WEB',
    indicator_perf='margin',
    inactive_roll_mode='stitch',
    roll_parms=dict(
        window=75,
        center=True,
        win_type='triang',
        min_periods=10,
    ),
    bins=bins,
)
test

2021-05-26 16:03:05.711104: Computing totals
2021-05-26 16:03:27.027057: Done! Elapsed: 0:00:21.315997
2021-05-26 16:03:27.027338: Computing inactive periods
2021-05-26 16:03:33.753708: Done! Elapsed: 0:00:06.726382
2021-05-26 16:03:33.753811: Computing rolling indicators
2021-05-26 16:06:19.856504: Done! Elapsed: 0:02:46.102727
2021-05-26 16:06:19.856656: Computing percentage and statuses
2021-05-26 16:06:27.262643: Done! Elapsed: 0:00:07.406017
CPU times: user 2min 59s, sys: 24.1 s, total: 3min 23s
Wall time: 3min 22s


Unnamed: 0_level_0,Unnamed: 1_level_0,indicators,brutrevenue,brutrevenue,brutrevenue,brutrevenue,brutrevenue,margin,margin,margin,margin,margin,inactive,brutrevenue_rolled,brutrevenue_rolled,margin_rolled,WEB_percentage,status
Unnamed: 0_level_1,Unnamed: 1_level_1,origin2,EDI,TV,VR,WEB,total,EDI,TV,VR,WEB,total,Unnamed: 13_level_1,WEB,total,total,Unnamed: 17_level_1,Unnamed: 18_level_1
orgacom,client,date,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
1ALO,0000015304,2017-07-03,0.0,1340.95,0.0,0.0,1340.95,0.0,202.80,0.0,0.0,202.80,False,0.0,635.937935,83.924089,0.0,no_web
1ALO,0000015304,2017-07-04,0.0,196.54,0.0,0.0,196.54,0.0,33.70,0.0,0.0,33.70,False,0.0,632.730964,83.342352,0.0,no_web
1ALO,0000015304,2017-07-05,0.0,916.78,0.0,0.0,916.78,0.0,86.73,0.0,0.0,86.73,False,0.0,631.131462,82.996683,0.0,no_web
1ALO,0000015304,2017-07-06,0.0,73.36,0.0,0.0,73.36,0.0,17.55,0.0,0.0,17.55,False,0.0,628.344134,82.596796,0.0,no_web
1ALO,0000015304,2017-07-07,0.0,73.17,0.0,0.0,73.17,0.0,13.77,0.0,0.0,13.77,False,0.0,626.945096,82.326161,0.0,no_web
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2BRE,SL009N,2020-08-26,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,True,0.0,148.904853,28.465760,0.0,inactive
2BRE,SL009N,2020-08-27,0.0,629.91,0.0,0.0,629.91,0.0,61.10,0.0,0.0,61.10,False,0.0,151.605602,28.640258,0.0,no_web
2BRE,SL009N,2020-08-28,0.0,0.00,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,False,0.0,153.128380,28.710424,0.0,no_web
2BRE,SL009N,2020-08-31,0.0,75.03,0.0,0.0,75.03,0.0,10.51,0.0,0.0,10.51,False,0.0,155.007827,28.826248,0.0,no_web


In [6]:
labeled_bins = namedtuple('labeled_bins', ['labels', 'bin_limits'])
bins = labeled_bins(
    labels=['no_web', 'web', 'full'],
    bin_limits=[0., .5, .75, 1.001],
)
bin_colors={
    'no_web': RGB(0, 255, 0, .0),
    'web': RGB(0, 255, 0, .1),
    'full': RGB(0, 255, 0, .2),
    'inactive': RGB(100, 100, 100, .2),
}

In [7]:
day_orders.reset_index().orgacom.unique()

array(['1ALO', '1LRO', '1SOU', '2BRE'], dtype=object)

In [8]:
temp = day_orders.join(pd.concat([df_clt], keys=[''], axis=1).swaplevel(axis=1))

In [9]:
my_lib = 'CORA'

perim = (temp.loc[
            (temp[('brutrevenue', 'WEB')] != 0)
            & (temp.hier4_lib.str.contains(my_lib))
    ]     
                     .index.to_frame()
                     .loc[lambda x: x.client.str[0] != 'P', ['orgacom', 'client']].drop_duplicates()
        )    
print(f"Dans la hiérarchie {my_lib} il y a {len(perim)} clients qui ont fait du Web.")
oc, clt = (
#     perim.sample(1).iloc[0]
# '1ALO', '0000170669'
# '1ALO', '0000020697'
# '1ALO', '0000020575'
# '1ALO', '0000020531'
# '1ALO', '0000024795'

# ('1ALO', '0000162867')
# ('1ALO', '0000026355')
# ('2BRE', '0000162960')
# ('2BRE', '0000135319')
# ('1SOU', '0000109546')
# ('1LRO', '0000211945')
# ('1SOU', '0000129993')   # TUTTI PIZZA JEAN RIEU
# ('1SOU', '0000200101') #   TUTTI PIZZA ST ORENS ==> uniquement du WEB, viison de la diminution de la fréquence
# ('1ALO', '0000196808')
# ('1SOU', '0000253271')  #  TUTTI PIZZA MONTREJEAU ==> un peu comme ST ORENS
# ('1SOU', '0000069161')  #  TUTTI PIZZA MONTAUBAN ==> véritable bascule
# ('1ALO', '0000026272')  #  CACIC : MDR SAINT CHARLES ==> Bascule
#     ('1ALO', '0000026286')  # CACIC : EHPAD MDR  SAINTE FAMILLE ==> Bascule après inactivité
#     ('2BRE', '0000247694')  # CACIC : EHPAD RESID LES BLES D'ORS ==> Full Web aevc une vente route
#     ('1LRO', '0000255792')  # CACIC : SOLANID MONTPELLIER ST ROCH ==> s'y est mis, puis a abandonné
#  ('1SOU', '0000083353')  # CACIC : LES FLORALIES ==> Quasi zéro web   
#     ('2BRE', '0000153386')  # CACIC : ADIMC 35 FOYER DES GLYCINES ==> bascule en 2018
('1ALO', '0000026443')  # AESTERA : MDR LES JARDINS DE CUVIERES ==> bascule en 2019 INTERESSANT A REGARDER!
# ('1ALO', '0000157265')  # AESTERA : IME DE DANNEMARIE ==> 100% WEB
#     ('1ALO', '0000206981')  # AESTERA : REST LEGUMEZ MOI ==> Celui là montre qu'on n'y a pas perdu à basculer sur le WEB
#     ('1ALO', '0000025685')  # AESTERA : LE HOME FLEURI ==> Expérimentations puis bascule
# ('1SOU', '0000243538') # le self de la mairie => que du WEB
#     ('1ALO', '0000027991')  # CORA : CAFET CORA COUDEKERQUE - DUM ==> jolie transition, s'est arrêté en 2019
#     ('1ALO', '0000027977')  # CORA : CAFET CORA VERDUN - VDM ==> idem Coudekerque
#     ('1ALO', '0000027973')  # CORA : CAFET CORA CORMONTREUIL - RCM ==> On voit en plus la progression de marge. Pareil, arrêt en 2019
# ('1ALO', '0000027971')  # CORA : CAFET CORA VESOUL - VEM ==> bascule également, fin 2019.
)
# print(f"('{oc}', '{clt}')  # {my_lib} :", df_clt.loc[(oc, clt), 'nom'])

Dans la hiérarchie CORA il y a 57 clients qui ont fait du Web.


In [13]:
reload(utils)
end_date = pd.to_datetime('2021-02-28')
show(
    partial(
        utils.bk_detail,
        in_data=day_orders.loc[oc, clt, :end_date],
        order_data=orders.loc[oc, clt, :end_date],
        client=clt,
        oc=oc,
        bins=bins,
        bin_colors=bin_colors,
        groupers=['orgacom', 'client'],
        inactive_duration=20,
        indicator_status='brutrevenue',
        origin='WEB',
        indicator_perf='margin',
        inactive_roll_mode='stitch',
        clt_data=df_clt.loc[oc, clt],
    ),
    notebook_url="http://devdm:8888")

2021-05-26 16:07:52.769612: Computing totals
2021-05-26 16:07:52.776413: Done! Elapsed: 0:00:00.006813
2021-05-26 16:07:52.776539: Computing inactive periods
2021-05-26 16:07:52.782646: Done! Elapsed: 0:00:00.006118
2021-05-26 16:07:52.782722: Computing rolling indicators
2021-05-26 16:07:52.802655: Done! Elapsed: 0:00:00.019945
2021-05-26 16:07:52.802750: Computing percentage and statuses
2021-05-26 16:07:52.809037: Done! Elapsed: 0:00:00.006297
2021-05-26 16:11:29.075682: Computing totals
2021-05-26 16:11:29.082798: Done! Elapsed: 0:00:00.007130
2021-05-26 16:11:29.082921: Computing inactive periods
2021-05-26 16:11:29.089078: Done! Elapsed: 0:00:00.006181
2021-05-26 16:11:29.089164: Computing rolling indicators
2021-05-26 16:11:29.109541: Done! Elapsed: 0:00:00.020389
2021-05-26 16:11:29.109647: Computing percentage and statuses
2021-05-26 16:11:29.116526: Done! Elapsed: 0:00:00.006889
2021-05-26 16:11:35.687077: Computing totals
2021-05-26 16:11:35.693893: Done! Elapsed: 0:00:00.00