# Net Zahlen Reservationen/Offerten (Vertrag = Ja) basierend auf erfassdatum

In [101]:

import numpy as np
import pandas as pd

#######################
## Datenaufbereitung ##
#######################


# make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))


from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows  = None

## Libraries & Settings ##
from pa_lib.file import load_bin
from pa_lib.util import cap_words
from pa_lib.log import time_log, info

import datetime as dt
from dateutil.relativedelta import relativedelta

from pa_lib.data import (
    clean_up_categoricals,
    unfactorize,
)

In [2]:
def load_booking_data():
    bd_raw = load_bin("vkprog\\bd_data.feather").rename(
        mapper=lambda name: cap_words(name, sep="_"), axis="columns"
    )
    bd = bd_raw.loc[(bd_raw.Netto > 0)].pipe(clean_up_categoricals)
    return bd


In [3]:
raw_data_bookings = load_booking_data()

2019-10-24 09:05:53 [INFO] Started loading binary file
2019-10-24 09:05:53 [INFO] Reading from file C:\Users\stc\data\vkprog\bd_data.feather
2019-10-24 09:05:53 [INFO] Finished loading binary file in 0.18s (0.89s CPU)


In [4]:
raw_data_bookings.loc[:,["KV_NR","Agps_NR","Endkunde_NR"]].groupby(["KV_NR","Agps_NR"]).count().sort_values("Endkunde_NR", ascending= False).head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Endkunde_NR
KV_NR,Agps_NR,Unnamed: 2_level_1
484261,857753,1


In [5]:
raw_data_bookings.query("Endkunde_NR == 473515").sample(5)

Unnamed: 0,Endkunde_NR,Endkunde,EK_Abc,EK_Boni,EK_Plz,EK_Ort,EK_Land,EK_HB_Apg_Kurzz,EK_Kam_Betreut,EK_Aktiv,Agentur,AG_Hauptbetreuer,Verkaufsberater,Endkunde_Branchengruppe_ID,Endkunde_Branchengruppe,Endkunde_Nbranchengruppe_ID,Endkunde_Nbranchengruppe,Endkunde_Branchenkat_ID,Endkunde_Branchenkat,Endkunde_Nbranchenkat_ID,Endkunde_Nbranchenkat,Auftrag_Branchengruppe_ID,Auftrag_Branchengruppe,Auftrag_Nbranchengruppe_ID,Auftrag_Nbranchengruppe,Auftrag_Branchenkat_ID,Auftrag_Branchenkat,Auftrag_Nbranchenkat_ID,Auftrag_Nbranchenkat,Agps_NR,Segment,KV_NR,KV_Typ,Kampagnen_Status,Kampagne_Erfassungsdatum,Kampagne_Beginn,Auftragsart,Res_Dat,Annullation_Datum,Aush_Von,Dauer,Vertrag,Brutto,Netto,Agglo,PF,Kamp_Beginn_Jahr,Kamp_Beginn_KW,Kamp_Beginn_KW_2,Kamp_Beginn_KW_4,Kamp_Erfass_Jahr,Kamp_Erfass_KW,Kamp_Erfass_KW_2,Kamp_Erfass_KW_4
1384164,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,705,WG - Lose / Lotterien,,,12,WB - Finanzwirtschaft / Versicherung,,,2514180,APG|SGA,912799,KPG,4,2017-06-16,2017-07-03,Promotion,2017-06-16,NaT,2017-07-03,14,Nein,454,227,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2017,27,27,25,2017,24,23,21
852485,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,ROS,ROS,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,405,WG - Reisen / Hotels / Sanatorien,,,10,WB - Freizeit / Touristik,,,1054646,APG|SGA,542436,KPG,4,2008-10-13,2009-02-02,Promotion,2008-11-07,NaT,2009-02-02,14,Nein,1107,554,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2009,6,5,5,2008,42,41,41
595716,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,,,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,2496276,APG|SGA,908625,KPG,4,2017-05-08,2017-05-22,Kommerziell,2017-05-11,NaT,2017-05-29,7,Nein,279,223,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2017,21,21,21,2017,19,19,17
1023353,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,400,WG - Automarkt,,,13,WB - Verkehr,,,2744203,APG|SGA,958346,KPG,4,2018-07-04,2018-07-23,Promotion,2018-07-04,NaT,2018-07-23,14,Nein,1965,1179,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2018,30,29,29,2018,27,27,25
1140895,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Affichage Italia s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,901,WG - Handel / Grossverteiler,901.0,WG - Handel / Grossverteiler,14,WB - Handel,14.0,WB - Handel,2960486,APG|SGA,997939,KPG,4,2019-05-13,2019-06-03,Kommerziell,2019-05-13,NaT,2019-06-03,14,Nein,1578,1136,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2019,23,23,21,2019,20,19,17


# PLZ STUFF

In [6]:
from pa_lib.data import desc_col

In [14]:
ek_info = load_bin("vkprog\\ek_info.feather")

2019-10-24 09:07:48 [INFO] Started loading binary file
2019-10-24 09:07:48 [INFO] Reading from file C:\Users\stc\data\vkprog\ek_info.feather
2019-10-24 09:07:48 [INFO] Finished loading binary file in 0.03s (0.05s CPU)


In [26]:
plz_data = load_bin("vkprog\\plz_data.feather")


2019-10-24 09:14:13 [INFO] Started loading binary file
2019-10-24 09:14:13 [INFO] Reading from file C:\Users\stc\data\vkprog\plz_data.feather
2019-10-24 09:14:13 [INFO] Finished loading binary file in 0.0s (0.0s CPU)


In [29]:
plz_data.sample(5,random_state=42)

Unnamed: 0,PLZ,FRAKTION,ORT,VERKAUFS_GEBIETS_CODE,VB_VKGEB
2580,6595,Locarno-Gerre di Sotto,Locarno,V-S01,LPA
3661,8810,Horgen-Stadt,Horgen,V-Z02,
897,2127,Val-de-Travers - Les Bayards,Val-de-Travers,V-W04,VIT
2091,5225,Bözberg-Unterbözberg,Bözberg,V-M06,OSS
1044,2824,Val Terbi-Vicques,Val Terbi,V-W04,VIT


In [32]:
desc_col(plz_data)
#desc_col(ek_info)

Unnamed: 0,DTYPE,NULLS,UNIQUE
PLZ,uint16,0/4010,3299
FRAKTION,object,113/3897,3288
ORT,object,113/3897,2202
VERKAUFS_GEBIETS_CODE,category,0/4010,25
VB_VKGEB,category,761/3249,19


In [20]:
col_list = """Endkunde_NR
                PLZ
                GEMEINDE
                KANTON
                EK_Land
""".split()
cust_current = ek_info.loc[:,col_list]

In [33]:
# BASIS TABELLE
cust_current.sample(5, random_state=42)

Unnamed: 0,Endkunde_NR,PLZ,GEMEINDE,KANTON,EK_Land
20042,577920,1185,Budapest,,HUNGARY
10311,503679,3011,Bern,BE,SCHWEIZ
13356,520701,4543,Deitingen,SO,SCHWEIZ
17149,555869,4654,Lostorf,SO,SCHWEIZ
10714,506552,6253,Uffikon,LU,SCHWEIZ


In [24]:
desc_col(cust_current)

Unnamed: 0,DTYPE,NULLS,UNIQUE
Endkunde_NR,int64,0/32237,32237
PLZ,int64,0/32237,2875
GEMEINDE,object,49/32188,3006
KANTON,object,1178/31059,26
EK_Land,object,49/32188,45


- There exists exactly one line per customer!!
- International customers get mapped to MAT

In [150]:
row_select = cust_current.loc[:,"EK_Land"] != "SCHWEIZ"
cust_matched_international = cust_current.loc[row_select,:]
cust_matched_international.loc[:,"VERKAUFS_GEBIETS_CODE"] = "INTERNATIONAL"
cust_matched_international.loc[:,"VB_VKGEB"] = "MAT"
col_list = """Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()
cust_matched_international = cust_matched_international.loc[:,col_list].copy()

In [151]:
cust_matched_international.sample(5,random_state=42)

Unnamed: 0,Endkunde_NR,VERKAUFS_GEBIETS_CODE,VB_VKGEB
7344,472303,INTERNATIONAL,MAT
10386,504199,INTERNATIONAL,MAT
6020,327269,INTERNATIONAL,MAT
12795,518227,INTERNATIONAL,MAT
21924,591280,INTERNATIONAL,MAT


In [152]:
row_select = (pd.merge(cust_current,
                      cust_matched_international,
                      on="Endkunde_NR",
                      how="left")
              .loc[:,"VB_VKGEB"]
              .isna()
             )

cust_unmatched = cust_current.loc[row_select,:]
#cust_unmatched.sample(5,random_state=42)

In [153]:
cust_unmatched.shape

(31059, 5)

In [154]:
cust_current.shape

(32237, 5)

- Only swiss customers are left now to match
- Next step: Figure out unique ``FRAKTION`` in ``plz_data``, but first check if it is worthwhile

In [155]:
plz_data.sample(5,random_state=42)

Unnamed: 0,PLZ,FRAKTION,ORT,VERKAUFS_GEBIETS_CODE,VB_VKGEB
2580,6595,Locarno-Gerre di Sotto,Locarno,V-S01,LPA
3661,8810,Horgen-Stadt,Horgen,V-Z02,
897,2127,Val-de-Travers - Les Bayards,Val-de-Travers,V-W04,VIT
2091,5225,Bözberg-Unterbözberg,Bözberg,V-M06,OSS
1044,2824,Val Terbi-Vicques,Val Terbi,V-W04,VIT


In [156]:
#row_select = pd.merge(cust_unmatched,plz_data, right_on="FRAKTION", left_on="GEMEINDE", how="left").loc[:,"VB_VKGEB"].isna()


In [157]:
#boxplot_histogram(plz_data.groupby("FRAKTION").agg({"VERKAUFS_GEBIETS_CODE": "count"}).sort_values("VERKAUFS_GEBIETS_CODE", ascending=False))

In [163]:
fraktion_cnt = (plz_data.groupby("FRAKTION")
                        .agg({"VERKAUFS_GEBIETS_CODE": "count"}) # no missing values!
                        .reset_index()
                        .rename(columns={"VERKAUFS_GEBIETS_CODE": "CNT"}))

plz_unique_fraktion = (pd.merge(plz_data,
                                fraktion_cnt,
                                left_on="FRAKTION",
                                right_on="FRAKTION",
                                how="left"
                               )
                         .query("CNT == 1") # only unique ones
                      )

In [164]:
desc_col(plz_unique_fraktion)

Unnamed: 0,DTYPE,NULLS,UNIQUE
PLZ,uint16,0/2850,2310
FRAKTION,object,0/2850,2850
ORT,object,0/2850,1925
VERKAUFS_GEBIETS_CODE,category,0/2850,25
VB_VKGEB,category,477/2373,19
CNT,float64,0/2850,1


In [172]:
cust_container_fraktion = (pd.merge(cust_unmatched,
                                  plz_unique_fraktion,
                                  left_on="GEMEINDE",
                                  right_on="FRAKTION",
                                  how="left"
                                 )
                         .loc[:,"""Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()]
                        )

In [184]:
~cust_container_fraktion.loc[:,"VB_VKGEB"].isna() 

0         True
1         True
2         True
3         True
4         True
5         True
6         True
7         True
8         True
9         True
10       False
11        True
12       False
13        True
14        True
15       False
16        True
17        True
18        True
19        True
20        True
21        True
22        True
23        True
24        True
25        True
26        True
27        True
28        True
29        True
30       False
31        True
32        True
33        True
34        True
35       False
36        True
37        True
38        True
39        True
40        True
41        True
42        True
43        True
44        True
45        True
46        True
47        True
48        True
49        True
50        True
51        True
52        True
53        True
54        True
55        True
56        True
57        True
58        True
59        True
60        True
61       False
62        True
63        True
64        True
65       False
66       F

In [165]:
row_select = (pd.merge(cust_unmatched,
                      cust_matched_fraktion,
                      on="Endkunde_NR",
                      how="left")
              .loc[:,"VERKAUFS_GEBIETS_CODE"]
              .isna()
             )

In [167]:
len(row_select)

31059

In [168]:
cust_unmatched.shape

(31059, 5)

In [169]:
cust_unmatched.loc[row_select,:]

IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match