# Net Zahlen Reservationen/Offerten (Vertrag = Ja) basierend auf erfassdatum

In [2]:

import numpy as np
import pandas as pd

#######################
## Datenaufbereitung ##
#######################


# make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))


from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows  = None

## Libraries & Settings ##
from pa_lib.file import load_bin
from pa_lib.util import cap_words
from pa_lib.log import time_log, info

import datetime as dt
from dateutil.relativedelta import relativedelta

from pa_lib.data import (
    clean_up_categoricals,
    unfactorize,
)

In [2]:
def load_booking_data():
    bd_raw = load_bin("vkprog\\bd_data.feather").rename(
        mapper=lambda name: cap_words(name, sep="_"), axis="columns"
    )
    bd = bd_raw.loc[(bd_raw.Netto > 0)].pipe(clean_up_categoricals)
    return bd


In [3]:
raw_data_bookings = load_booking_data()

2019-10-24 09:05:53 [INFO] Started loading binary file
2019-10-24 09:05:53 [INFO] Reading from file C:\Users\stc\data\vkprog\bd_data.feather
2019-10-24 09:05:53 [INFO] Finished loading binary file in 0.18s (0.89s CPU)


In [4]:
raw_data_bookings.loc[:,["KV_NR","Agps_NR","Endkunde_NR"]].groupby(["KV_NR","Agps_NR"]).count().sort_values("Endkunde_NR", ascending= False).head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Endkunde_NR
KV_NR,Agps_NR,Unnamed: 2_level_1
484261,857753,1


In [5]:
raw_data_bookings.query("Endkunde_NR == 473515").sample(5)

Unnamed: 0,Endkunde_NR,Endkunde,EK_Abc,EK_Boni,EK_Plz,EK_Ort,EK_Land,EK_HB_Apg_Kurzz,EK_Kam_Betreut,EK_Aktiv,Agentur,AG_Hauptbetreuer,Verkaufsberater,Endkunde_Branchengruppe_ID,Endkunde_Branchengruppe,Endkunde_Nbranchengruppe_ID,Endkunde_Nbranchengruppe,Endkunde_Branchenkat_ID,Endkunde_Branchenkat,Endkunde_Nbranchenkat_ID,Endkunde_Nbranchenkat,Auftrag_Branchengruppe_ID,Auftrag_Branchengruppe,Auftrag_Nbranchengruppe_ID,Auftrag_Nbranchengruppe,Auftrag_Branchenkat_ID,Auftrag_Branchenkat,Auftrag_Nbranchenkat_ID,Auftrag_Nbranchenkat,Agps_NR,Segment,KV_NR,KV_Typ,Kampagnen_Status,Kampagne_Erfassungsdatum,Kampagne_Beginn,Auftragsart,Res_Dat,Annullation_Datum,Aush_Von,Dauer,Vertrag,Brutto,Netto,Agglo,PF,Kamp_Beginn_Jahr,Kamp_Beginn_KW,Kamp_Beginn_KW_2,Kamp_Beginn_KW_4,Kamp_Erfass_Jahr,Kamp_Erfass_KW,Kamp_Erfass_KW_2,Kamp_Erfass_KW_4
1384164,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,705,WG - Lose / Lotterien,,,12,WB - Finanzwirtschaft / Versicherung,,,2514180,APG|SGA,912799,KPG,4,2017-06-16,2017-07-03,Promotion,2017-06-16,NaT,2017-07-03,14,Nein,454,227,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2017,27,27,25,2017,24,23,21
852485,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Srl,ROS,ROS,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,405,WG - Reisen / Hotels / Sanatorien,,,10,WB - Freizeit / Touristik,,,1054646,APG|SGA,542436,KPG,4,2008-10-13,2009-02-02,Promotion,2008-11-07,NaT,2009-02-02,14,Nein,1107,554,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2009,6,5,5,2008,42,41,41
595716,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,,,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,2496276,APG|SGA,908625,KPG,4,2017-05-08,2017-05-22,Kommerziell,2017-05-11,NaT,2017-05-29,7,Nein,279,223,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2017,21,21,21,2017,19,19,17
1023353,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,400,WG - Automarkt,,,13,WB - Verkehr,,,2744203,APG|SGA,958346,KPG,4,2018-07-04,2018-07-23,Promotion,2018-07-04,NaT,2018-07-23,14,Nein,1965,1179,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2018,30,29,29,2018,27,27,25
1140895,473515,Publifutura Affichage Italia s.r.l.,B,gut,22038,Tavernerio (CO),ITALIA,TRA,0,1,Publifutura Affichage Italia s.r.l.,TRA,TRA,720,WG - Dienstleistung,,,15,WB - Dienstleistung,,,901,WG - Handel / Grossverteiler,901.0,WG - Handel / Grossverteiler,14,WB - Handel,14.0,WB - Handel,2960486,APG|SGA,997939,KPG,4,2019-05-13,2019-06-03,Kommerziell,2019-05-13,NaT,2019-06-03,14,Nein,1578,1136,"91362,93787,93851,A0261,A0351,A1061,A2701,A500...","City ePanel,F12,F200,F200 Traffic,F200L,F24,F4...",2019,23,23,21,2019,20,19,17


# PLZ STUFF

In [3]:
from pa_lib.data import desc_col

In [47]:
ek_info = load_bin("vkprog\\ek_info.feather")

2019-10-25 15:47:45 [INFO] Started loading binary file
2019-10-25 15:47:45 [INFO] Reading from file C:\Users\stc\data\vkprog\ek_info.feather
2019-10-25 15:47:45 [INFO] Finished loading binary file in 0.03s (0.03s CPU)


In [48]:
ek_info.sample(4)

Unnamed: 0,index,Endkunde_NR,Endkunde,EK_Aktiv,EK_Kam_Betreut,EK_Land,PLZ,GEMEINDE,Agentur,EK_BG,EK_BG_ID,Auftrag_BG_ID,Auftrag_BG_Anz,Last_Res_Date,First_Res_Year,Last_Res_Year,Last_Aus_Date,EK_HB_Apg_Kurzz,AG_Hauptbetreuer,KANTON,BEZIRK,REGION,GROSSREGION,VERKAUFS_GEBIETS_CODE,VB_VKGEB
16029,16029,548705,Wohntex Affoltern AG,1,0,SCHWEIZ,8910,Affoltern am Albis,,WG - Hausbau / Ausstattung,556,556,1,2011-11-18,2011,2011,2011-12-12,MOE,,ZH,Affoltern,Dietikon–Schlieren,Region Zürich,V-Z02,
26153,26153,618971,mein-brennholz.ch,1,0,SCHWEIZ,4334,Sisseln AG,,WG - Energiewirtschaft,230,230,1,2016-06-08,2016,2016,2016-08-01,CKT,,AG,Laufenburg,Rheinfelden,Region Basel,V-M02,CKT
11250,11250,509644,Wymann Elektro AG,1,0,SCHWEIZ,3612,Steffisburg,Schweiz. Elektro-Einkaufs-Vereinigung,WG - Hausbau / Ausstattung,556,240,1,2009-03-31,2009,2009,2009-05-18,CBA,BES,BE,Thun,Thun,Berner Oberland,V-M05,JAN
8922,8922,490228,Garage de l'Etoile SA,1,0,SCHWEIZ,1020,Renens VD,Nisada Communication drd Sàrl,WG - Automarkt,400,400,1,2019-01-01,2010,2019,2019-04-01,SAO,PIE,VD,Ouest lausannois,Renens–Ecublens,Region Lausanne,V-W02,SAO


In [21]:
## Contains Verkaufsgebiete
def load_plz():
    #get the raw data
    with project_dir("vkprog"):
        plz_data = load_bin("plz_data.feather")
        plz_data.loc[:,"PLZ"] = plz_data.loc[:,"PLZ"].astype("int64")
    return plz_data

In [22]:
plz_data = load_plz()

NameError: name 'project_dir' is not defined

In [35]:
def plz_data_olap_counter(column):
    agg_counter = (plz_data.groupby(column)
                        .agg({"VERKAUFS_GEBIETS_CODE": "count"})
                        .reset_index()
                        .rename(columns={"VERKAUFS_GEBIETS_CODE": "CNT"})) # COUNT
    return agg_counter

def report_success():
    print("cust_matched:  ", cust_matched.shape)
    print("cust_unmatched:", cust_unmatched.shape)
    print("cust_current:  ", cust_current.shape)
    print("Matched-Perc:  ",f"{cust_matched.shape[0]/cust_current.shape[0]}"[0:4])

def collect(s, sep=","):
        return sep.join(map(str, s[s.notna()].unique()))

- Only swiss customers are left now to match
- Next step: Figure out unique ``FRAKTION`` in ``plz_data``, but first check if it is worthwhile

In [39]:
def endkunde2vkgeb():
    ## Our basis table with our customers
    col_list = """Endkunde_NR
                  PLZ
                  GEMEINDE
                  KANTON
                  EK_Land""".split()

    cust_current = ek_info.loc[:,col_list]

    ## International customers get mapped to MAT, INTERNATIONAL

    # find the internationl customers
    row_select = (cust_current.loc[:,"EK_Land"] != "SCHWEIZ")
    cust_matched_international = cust_current.loc[row_select,:]

    # map international customers to (MAT, INTERNATIONAL)
    cust_matched_international.loc[:,"VERKAUFS_GEBIETS_CODE"] = "INTERNATIONAL"
    cust_matched_international.loc[:,"VB_VKGEB"] = "MAT"

    # matched, cleanup
    col_list     = """Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()
    cust_matched = cust_matched_international.loc[:,col_list].copy()

    # unmatched, cleanup
    row_select = (pd.merge(cust_current,
                          cust_matched_international,
                          on="Endkunde_NR",
                          how="left")
                  .loc[:,"VERKAUFS_GEBIETS_CODE"]
                  .isna()
                 )
    cust_unmatched = cust_current.loc[row_select,:]

    #print("INTERNATIONAL")
    #report_success()

    ## Taking care of all the Swiss Customers:

    fraktion_cnt = plz_data_olap_counter("FRAKTION")
    plz_cnt      = plz_data_olap_counter("PLZ")
    ort_cnt      = plz_data_olap_counter("ORT")

    max_iters = max(fraktion_cnt.loc[:,"CNT"]
                    .append(plz_cnt.loc[:,"CNT"])
                    .append(ort_cnt.loc[:,"CNT"])
                   )+1

    for unique_by in range(1,max_iters):

        ## Match by FRAKTION
        plz_unique_fraktion = (pd.merge(plz_data,
                                        fraktion_cnt,
                                        left_on="FRAKTION",
                                        right_on="FRAKTION",
                                        how="left"
                                       )
                                 .query(f"CNT =={unique_by}") # only unique ones
                                 .groupby("FRAKTION")
                                 .agg(
                                     {"VERKAUFS_GEBIETS_CODE": collect,
                                      "VB_VKGEB": collect,
                                     }
                                 )
                                 .reset_index()
                              )

        cust_container_fraktion = (pd.merge(cust_unmatched,
                                          plz_unique_fraktion.loc[:,"""FRAKTION VERKAUFS_GEBIETS_CODE  VB_VKGEB""".split()],
                                          left_on="GEMEINDE",
                                          right_on="FRAKTION",
                                          how="left"
                                         )
                                 )

        row_select = cust_container_fraktion.loc[:,"VERKAUFS_GEBIETS_CODE"].isna() # unmatched rows!
        cust_matched_fraktion = cust_container_fraktion.loc[~row_select,"""Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()]

        cust_matched          = cust_matched.append(cust_matched_fraktion).drop_duplicates()
        cust_unmatched        = cust_container_fraktion.loc[ row_select,"""Endkunde_NR PLZ GEMEINDE KANTON EK_Land""".split()]

        #print(f"FRAKTION, unique by {unique_by}")
        #report_success()

        ## Match by PLZ
        plz_unique_plz = (pd.merge(plz_data,
                                        plz_cnt,
                                        left_on="PLZ",
                                        right_on="PLZ",
                                        how="left"
                                       )
                                 .query(f"CNT == {unique_by}") # only unique ones
                                 .groupby("PLZ")
                                 .agg(
                                     {"VERKAUFS_GEBIETS_CODE": collect,
                                      "VB_VKGEB": collect,
                                     }
                                 )
                                 .reset_index()
                              )

        cust_container_plz = (pd.merge(cust_unmatched,
                                       plz_unique_plz.loc[:,"""PLZ VERKAUFS_GEBIETS_CODE  VB_VKGEB""".split()],
                                       left_on="PLZ",
                                       right_on="PLZ",
                                       how="left"
                                      )
                             )

        row_select = cust_container_plz.loc[:,"VERKAUFS_GEBIETS_CODE"].isna() # unmatched rows!
        cust_matched_plz = cust_container_plz.loc[~row_select,"""Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()]

        cust_matched     = cust_matched.append(cust_matched_plz).drop_duplicates()
        cust_unmatched   = cust_container_plz.loc[ row_select,"""Endkunde_NR PLZ GEMEINDE KANTON EK_Land""".split()]

        #print(f"PLZ, unique by {unique_by}")
        #report_success()

        ## Match by ORT
        plz_unique_ort = (pd.merge(plz_data,
                                        ort_cnt,
                                        left_on="ORT",
                                        right_on="ORT",
                                        how="left"
                                       )
                                 .query(f"CNT == {unique_by}") # only unique ones
                                 .groupby("ORT")
                                 .agg(
                                     {"VERKAUFS_GEBIETS_CODE": collect,
                                      "VB_VKGEB": collect,
                                     }
                                 )
                                 .reset_index()
                              )

        cust_container_ort = (pd.merge(cust_unmatched,
                                       plz_unique_ort.loc[:,"""ORT VERKAUFS_GEBIETS_CODE  VB_VKGEB""".split()],
                                       left_on="GEMEINDE",
                                       right_on="ORT",
                                       how="left"
                                      )
                             )

        row_select = cust_container_ort.loc[:,"VERKAUFS_GEBIETS_CODE"].isna()
        cust_matched_ort = cust_container_ort.loc[~row_select,"""Endkunde_NR VERKAUFS_GEBIETS_CODE VB_VKGEB""".split()]

        cust_matched     = cust_matched.append(cust_matched_plz).drop_duplicates()
        cust_unmatched   = cust_container_ort.loc[ row_select,"""Endkunde_NR PLZ GEMEINDE KANTON EK_Land""".split()]

        #print(f"ORT, unique by {unique_by}")
        #report_success()
    report_success()
    return cust_matched


In [40]:
test_df = endkunde2vkgeb()

cust_matched:   (31760, 3)
cust_unmatched: (43, 5)
cust_current:   (32237, 5)
Matched-Perc:   0.98


In [15]:
cust_matched.shape

(31760, 3)

In [41]:
desc_col(cust_matched)

Unnamed: 0,DTYPE,NULLS,UNIQUE
Endkunde_NR,int64,0/31760,31760
VERKAUFS_GEBIETS_CODE,object,0/31760,35
VB_VKGEB,object,0/31760,27


In [16]:
cust_unmatched.shape

(43, 5)

In [42]:
test_df = pd.merge(ek_info, cust_matched, on="Endkunde_NR",how="left")

In [44]:
test_df.shape

(32237, 25)

In [45]:
ek_info.shape

(32237, 23)