# Net Zahlen Reservationen/Offerten (Vertrag = Ja) basierend auf erfassdatum

In [3]:

import numpy as np
import pandas as pd

#######################
## Datenaufbereitung ##
#######################


# make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))


from IPython.display import display
pd.options.display.max_columns = None

## Libraries & Settings ##
from pa_lib.file import load_bin
from pa_lib.util import cap_words
from pa_lib.log import time_log, info

import datetime as dt
from dateutil.relativedelta import relativedelta

from pa_lib.data import (
    clean_up_categoricals,
    unfactorize,
)

In [4]:
def load_booking_data():
    bd_raw = load_bin("vkprog\\bd_data.feather").rename(
        mapper=lambda name: cap_words(name, sep="_"), axis="columns"
    )
    bd = bd_raw.loc[(bd_raw.Netto > 0)].pipe(clean_up_categoricals)
    return bd


In [5]:
raw_data_bookings = load_booking_data()

2019-10-22 17:19:58 [INFO] Started loading binary file
2019-10-22 17:19:58 [INFO] Reading from file C:\Users\stc\data\vkprog\bd_data.feather
2019-10-22 17:19:58 [INFO] Finished loading binary file in 0.18s (0.83s CPU)


In [6]:
raw_data_bookings.loc[:,["KV_NR","Agps_NR","Endkunde_NR"]].groupby(["KV_NR","Agps_NR"]).count().sort_values("Endkunde_NR", ascending= False).head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Endkunde_NR
KV_NR,Agps_NR,Unnamed: 2_level_1
484261,857753,1


In [10]:
raw_data_bookings.query("Endkunde_NR == 473515").sample(5)

Unnamed: 0,Endkunde_NR,Endkunde,EK_Abc,EK_Boni,EK_Plz,EK_Ort,EK_Land,EK_HB_Apg_Kurzz,EK_Kam_Betreut,EK_Aktiv,Agentur,AG_Hauptbetreuer,Verkaufsberater,Endkunde_Branchengruppe_ID,Endkunde_Branchengruppe,Endkunde_Nbranchengruppe_ID,Endkunde_Nbranchengruppe,Endkunde_Branchenkat_ID,Endkunde_Branchenkat,Endkunde_Nbranchenkat_ID,Endkunde_Nbranchenkat,Auftrag_Branchengruppe_ID,Auftrag_Branchengruppe,Auftrag_Nbranchengruppe_ID,Auftrag_Nbranchengruppe,Auftrag_Branchenkat_ID,Auftrag_Branchenkat,Auftrag_Nbranchenkat_ID,Auftrag_Nbranchenkat,Agps_NR,Segment,KV_NR,KV_Typ,Kampagnen_Status,Kampagne_Erfassungsdatum,Kampagne_Beginn,Auftragsart,Res_Dat,Annullation_Datum,Aush_Von,Dauer,Vertrag,Brutto,Netto,Agglo,PF,Kamp_Beginn_Jahr,Kamp_Beginn_KW,Kamp_Beginn_KW_2,Kamp_Beginn_KW_4,Kamp_Erfass_Jahr,Kamp_Erfass_KW,Kamp_Erfass_KW_2,Kamp_Erfass_KW_4
1311141,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,ROS,ROS,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,990,WG - Veranstaltungen,,,22,WB - Veranstaltungen,,,1445085,APG|SGA,657402,KPG,4,2011-07-26,2011-10-10,Kommerziell,2011-07-26,NaT,2011-10-10,14,Nein,5959,5959,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2011,41,41,41,2011,30,29,29
1100650,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,901,WG - Handel / Grossverteiler,,,14,WB - Handel,,,2223251,APG|SGA,853090,KPG,4,2016-02-23,2016-04-04,Promotion,2016-02-23,NaT,2016-04-04,14,Nein,784,392,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2016,14,13,13,2016,8,7,5
737633,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,ROS,ROS,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,460,WG - Freizeit / Sport,,,10,WB - Freizeit / Touristik,,,1946919,APG|SGA,790096,KPG,4,2014-10-01,2014-11-24,Promotion,2014-10-01,NaT,2014-11-24,14,Nein,307,154,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2014,48,47,45,2014,40,39,37
993490,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,,,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,901,WG - Handel / Grossverteiler,901.0,WG - Handel / Grossverteiler,14,WB - Handel,14.0,WB - Handel,2417406,APG|SGA,893441,KPG,4,2017-01-12,2017-04-17,Kommerziell,2017-01-12,NaT,2017-04-17,14,Nein,2200,1650,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2017,16,15,13,2017,2,1,1
450309,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,ROS,ROS,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,990,WG - Veranstaltungen,,,22,WB - Veranstaltungen,,,1211165,APG|SGA,594267,KPG,4,2010-02-03,2010-03-15,Kommerziell,2010-02-03,NaT,2010-03-15,14,Nein,19731,9866,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2010,11,11,9,2010,5,5,5


# PLZ STUFF

In [30]:
from pa_lib.data import desc_col

In [11]:
plz = load_bin("vkprog\\plz_data.feather")


2019-10-23 13:43:18 [INFO] Started loading binary file
2019-10-23 13:43:18 [INFO] Reading from file C:\Users\stc\data\vkprog\plz_data.feather
2019-10-23 13:43:18 [INFO] Finished loading binary file in 0.0s (0.0s CPU)


In [13]:
plz.sample(5,random_state=42)

Unnamed: 0,PLZ,FRAKTION,ORT,VERKAUFS_GEBIETS_CODE,VB_VKGEB
2580,6595,Locarno-Gerre di Sotto,Locarno,V-S01,LPA
3661,8810,Horgen-Stadt,Horgen,V-Z02,
897,2127,Val-de-Travers - Les Bayards,Val-de-Travers,V-W04,VIT
2091,5225,Bözberg-Unterbözberg,Bözberg,V-M06,OSS
1044,2824,Val Terbi-Vicques,Val Terbi,V-W04,VIT


In [31]:
desc_col(plz)

Unnamed: 0,DTYPE,NULLS,UNIQUE
PLZ,uint16,0/4010,3299
FRAKTION,object,113/3897,3288
ORT,object,113/3897,2202
VERKAUFS_GEBIETS_CODE,category,0/4010,25
VB_VKGEB,category,761/3249,19


In [24]:
(plz.groupby("PLZ")
    .count()
    .sort_values("ORT",ascending=False)
    .head(10)
)

Unnamed: 0_level_0,FRAKTION,ORT,VERKAUFS_GEBIETS_CODE,VB_VKGEB
PLZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1724,9,9,9,9
6300,9,9,9,0
6823,7,7,7,7
1148,7,7,7,7
1470,7,7,7,7
8586,7,7,7,7
6500,7,7,7,7
6314,6,6,6,0
1041,6,6,6,6
8580,6,6,6,6


In [25]:
plz.query("PLZ == 1724")

Unnamed: 0,PLZ,FRAKTION,ORT,VERKAUFS_GEBIETS_CODE,VB_VKGEB
617,1724,Ferpicloz,Ferpicloz,V-W04,VIT
618,1724,Le Mouret-Bonnefontaine,Le Mouret,V-W04,VIT
619,1724,Le Mouret-Essert,Le Mouret,V-W04,VIT
620,1724,Le Mouret-Montévraz,Le Mouret,V-W04,VIT
621,1724,Le Mouret-Oberried,Le Mouret,V-W04,VIT
622,1724,Le Mouret-Praroman,Le Mouret,V-W04,VIT
623,1724,Le Mouret-Village,Le Mouret,V-W04,VIT
624,1724,Le Mouret-Zénauva,Le Mouret,V-W04,VIT
625,1724,Senèdes,Senèdes,V-W04,VIT


In [35]:
col_list = """PLZ
            VERKAUFS_GEBIETS_CODE
            VB_VKGEB
            """.split()
test_df = plz.loc[:,col_list].drop_duplicates(col_list)

AttributeError: 'Series' object has no attribute 'count_values'

In [40]:
(test_df.groupby("PLZ")
    .count()
    .sort_values("VERKAUFS_GEBIETS_CODE",ascending=False)
    .head(10)
)

Unnamed: 0_level_0,VERKAUFS_GEBIETS_CODE,VB_VKGEB
PLZ,Unnamed: 1_level_1,Unnamed: 2_level_1
1000,2,2
6003,2,1
2747,2,2
3280,2,2
3421,2,2
5012,2,2
4001,2,2
8165,2,0
2827,2,2
6340,2,0


22.5

In [41]:
ek_info = load_bin("vkprog\\ek_info.feather")

2019-10-23 17:59:48 [INFO] Started loading binary file
2019-10-23 17:59:48 [INFO] Reading from file C:\Users\stc\data\vkprog\ek_info.feather
2019-10-23 17:59:48 [INFO] Finished loading binary file in 0.02s (0.03s CPU)


In [42]:
desc_col(ek_info)

Unnamed: 0,DTYPE,NULLS,UNIQUE
index,int64,0/32237,32237
Endkunde_NR,int64,0/32237,32237
Endkunde,object,0/32237,30291
EK_Aktiv,int64,0/32237,2
EK_Kam_Betreut,int64,0/32237,2
EK_Land,object,49/32188,45
PLZ,int64,0/32237,2875
GEMEINDE,object,49/32188,3006
Agentur,object,20999/11238,3247
EK_BG,object,1829/30408,83


In [66]:
col_list = """Endkunde_NR
                PLZ
                GEMEINDE
                KANTON
""".split()
ek_info.loc[:,col_list]

Unnamed: 0,Endkunde_NR,PLZ,GEMEINDE,KANTON
0,100034,8408,Winterthur,ZH
1,100039,8610,Uster,ZH
2,100061,8400,Winterthur,ZH
3,100064,8610,Uster,ZH
4,100066,8610,Uster,ZH
5,100083,8604,Volketswil,ZH
6,100092,8001,Zürich,ZH
7,100095,8620,Wetzikon ZH,ZH
8,100097,8600,Dübendorf,ZH
9,100098,8304,Wallisellen,ZH


In [58]:
test_series = pd.Series(list(set(ek_info.KANTON))).sort_values(inplace=False)

In [59]:
test_series

13      AG
20      AI
1       AR
0       BE
10      BL
14      BS
11      FR
18      GE
26      GL
19      GR
15      JU
23      LU
2       NE
8       NW
3       OW
12      SG
4       SH
7       SO
24      SZ
17      TG
6       TI
22      UR
9       VD
25      VS
21      ZG
5       ZH
16    None
dtype: object