# Data-Prep: CRM Data

# Load libs

In [40]:

# make imports from pa_lib possible (parent directory of file's directory)
import sys
from pathlib import Path

file_dir = Path.cwd()
parent_dir = file_dir.parent
sys.path.append(str(parent_dir))

import pandas as pd
pd.options.display.max_columns = None


## Libraries & Settings ##
from pa_lib.file import load_bin
from pa_lib.util import cap_words
from pa_lib.log import time_log, info

import datetime as dt
from dateutil.relativedelta import relativedelta

from pa_lib.data import (
    clean_up_categoricals,
    unfactorize,
)

from pa_lib.data import desc_col

# Load CRM data

In [2]:
def load_crm_data():
    raw_data = load_bin("vkprog\\crm_data_vkprog.feather").rename(
        mapper=lambda name: cap_words(name, sep="_"), axis="columns"
    )
    return raw_data


In [3]:
raw_crm_data = load_crm_data()

2019-10-03 13:58:36 [INFO] Started loading binary file
2019-10-03 13:58:36 [INFO] Reading from file C:\Users\stc\data\vkprog\crm_data_vkprog.feather
2019-10-03 13:58:36 [INFO] Finished loading binary file in 0.23s (0.39s CPU)


In [20]:
raw_crm_data = raw_crm_data.astype({"Year": "int64", "KW_2": "int64"})

In [22]:
raw_crm_data.eval("YYYYKW_2 = Year * 100 + KW_2", inplace=True)

# View data

In [27]:
display(raw_crm_data.head())
display(raw_crm_data.describe())
display(desc_col(raw_crm_data))

Unnamed: 0,Betreff,Kanal,Datum,Quelle,Verantwortlich,Kuerzel,Endkunde_NR,VB_Filter_Von,VB_Filter_Bis,VB_Filter_Grund,Year,KW,KW_2,KW_4,YYYYKW_2
0,Ost - Aktionen - 2014 - ACO F200 Leuchtplakate...,E-Mail,2014-10-08,Marketing,Liliane Schüpbach (REGION OST ZÜRICH),LSC,100034,NaT,NaT,,2014,41,41,41,201441
1,City ePanel Winterthur,Besuch,2014-11-03,Verkauf,Alma Coralic (REGION OST ZÜRICH),ACO,100034,NaT,NaT,,2014,45,45,45,201445
2,KMU Forum,Besuch,2014-11-27,Verkauf,Heinz Kläui (APG WINTERTHUR),HKL,100034,NaT,NaT,,2014,48,47,45,201447
3,Ost - Aktionen - 2015 - ACO_KMU Aktion_KW5-7_2015,E-Mail,2014-12-11,Marketing,Larissa Zingre (KAM ZÜRICH),LKU,100034,NaT,NaT,,2014,50,49,49,201449
4,Cafe,Besuch,2014-12-23,Verkauf,Heinz Kläui (APG WINTERTHUR),HKL,100034,NaT,NaT,,2014,52,51,49,201451


Unnamed: 0,Year,KW_2,YYYYKW_2
count,904403.0,904403.0,904403.0
mean,2016.555787,25.974495,201681.553147
std,1.636505,14.913925,160.71222
min,2013.0,1.0,201339.0
25%,2015.0,13.0,201543.0
50%,2017.0,25.0,201707.0
75%,2018.0,39.0,201819.0
max,2024.0,51.0,202441.0


Unnamed: 0,DTYPE,NULLS,UNIQUE
Betreff,category,22/904381,259652
Kanal,category,0/904403,12
Datum,datetime64[ns],0/904403,2249
Quelle,category,0/904403,3
Verantwortlich,category,0/904403,270
Kuerzel,category,1/904402,272
Endkunde_NR,category,0/904403,48517
VB_Filter_Von,datetime64[ns],903007/1396,13
VB_Filter_Bis,datetime64[ns],903369/1034,37
VB_Filter_Grund,category,903021/1382,60


In [32]:
display(raw_crm_data.groupby(["Year","Kanal"]).count().loc[:,"Endkunde_NR"])

Year  Kanal        
2013  Besprechung         91.0
      Besuch            2546.0
      Brief             2924.0
      Dankeskarte          NaN
      E-Mail           17177.0
      Event               54.0
      Fax                  NaN
      Internet            23.0
      SMS                  1.0
      Telefon            970.0
      Twitter              NaN
      Veranstaltung        NaN
2014  Besprechung        171.0
      Besuch           10941.0
      Brief             7512.0
      Dankeskarte         68.0
      E-Mail           73949.0
      Event              223.0
      Fax                  1.0
      Internet            85.0
      SMS                  8.0
      Telefon           4942.0
      Twitter              2.0
      Veranstaltung      238.0
2015  Besprechung        179.0
      Besuch           11944.0
      Brief             3664.0
      Dankeskarte         76.0
      E-Mail           97372.0
      Event               99.0
                        ...   
2022  Fax          

In [37]:
raw_crm_data.query("Year == 2019")

Unnamed: 0,Betreff,Kanal,Datum,Quelle,Verantwortlich,Kuerzel,Endkunde_NR,VB_Filter_Von,VB_Filter_Bis,VB_Filter_Grund,Year,KW,KW_2,KW_4,YYYYKW_2
47,MegaPoster Newsletter - Newsletter Gerbergasse...,E-Mail,2019-01-15,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,3,3,1,201903
48,Ost - Aktionen 2019 - ACO_Dispo_Eröffnung 2. H...,E-Mail,2019-02-18,Marketing,Ivonne Wipfli (REGION OST ZÜRICH),IWI,100034,NaT,NaT,,2019,8,7,5,201907
49,MegaPoster Newsletter - Last-minute-Angebot AP...,E-Mail,2019-03-06,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,10,9,9,201909
50,MegaPoster Newsletter - neues MegaPoster Basel...,E-Mail,2019-04-03,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,14,13,13,201913
51,MegaPoster Newsletter - neues MegaPoster Lande...,E-Mail,2019-04-10,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,15,15,13,201915
52,MegaPoster Newsletter - Sommer-Special zu gesc...,E-Mail,2019-04-25,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,17,17,17,201917
53,Ost - Aktionen 2019 - ACO_Sommeraktion_KW28-32...,E-Mail,2019-05-24,Marketing,Ivonne Wipfli (REGION OST ZÜRICH),IWI,100034,NaT,NaT,,2019,21,21,21,201921
54,Ost - Aktionen 2019 - REMINDER ACO_Sommeraktio...,E-Mail,2019-06-12,Marketing,Ivonne Wipfli (REGION OST ZÜRICH),IWI,100034,NaT,NaT,,2019,24,23,21,201923
55,MegaPoster Newsletter - Newsletter Börsenstras...,E-Mail,2019-06-20,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,25,25,25,201925
56,MegaPoster Newsletter - Newsletter Hardstrasse...,E-Mail,2019-07-23,Marketing,Lara Hösli (KAM ZÜRICH),LAH,100034,NaT,NaT,,2019,30,29,29,201929


In [42]:
display(raw_crm_data.pivot_table(
    index=["Kanal"],
    columns=["Year"],
    values=["Endkunde_NR"],
    aggfunc="count",
    fill_value=0)
)

Unnamed: 0_level_0,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR,Endkunde_NR
Year,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
Kanal,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Besprechung,91,171,179,749,547,213,116,0,0,0,0,0
Besuch,2546,10941,11944,12302,11974,10922,7830,51,0,0,1,0
Brief,2924,7512,3664,6305,3371,3873,2383,0,0,0,0,0
Dankeskarte,0,68,76,82,77,72,44,0,0,0,0,0
E-Mail,17177,73949,97372,158107,147074,158915,103440,1,1,1,1,1
Event,54,223,99,674,848,601,0,0,0,0,0,0
Fax,0,1,1,4,5,2,0,0,0,0,0,0
Internet,23,85,99,112,127,88,73,0,0,0,0,0
SMS,1,8,6,17,10,8,5,0,0,0,0,0
Telefon,970,4942,9424,7388,7644,8117,4577,17,0,0,0,0


# Define Groups

In [93]:
from functools import reduce

all_kanal =set(raw_crm_data.loc[:,"Kanal"])
kanal_grps = {}

kanal_grps["Besprechung"]         = {"Besprechung"}
kanal_grps["Besuch"]              = {"Besuch"}
kanal_grps["Brief_Dankeskarte"]   = {"Brief","Dankeskarte"}
kanal_grps["E-Mail"]              = {"E-Mail"}
kanal_grps["Event_Veranstaltung"] = {"Event","Veranstaltung"}
kanal_grps["Telefon"]             = {"Telefon"}
kanal_grps["Anderes"]             =  all_kanal - reduce(set.union,kanal_grps.values()) 

In [94]:
kanal_grps

{'Besprechung': {'Besprechung'},
 'Besuch': {'Besuch'},
 'Brief_Dankeskarte': {'Brief', 'Dankeskarte'},
 'E-Mail': {'E-Mail'},
 'Event_Veranstaltung': {'Event', 'Veranstaltung'},
 'Telefon': {'Telefon'},
 'Anderes': {'Fax', 'Internet', 'SMS', 'Twitter'}}

# Global variables

In [64]:
def global_variables(day, month, year_score, year_train, year_span):
    global date_now, date_training
    
    date_now      = dt.datetime(year_score,month,day) # only works for odd calendar weeks!!!
    date_training = dt.datetime(year_train,month,day) # only works for odd calendar weeks!!!
    
    return f"date_now: {date_now} -- date_training: {date_training})"
    

In [58]:
global_variables(day=23,
    month=9,
    year_score=2019,
    year_train=2018,
    year_span=4)

'date_now: 2019-09-23 00:00:00 -- date_training: 2018-09-23 00:00:00)'

In [63]:
year_span = 4
print(date_now)
print(date_training)
print(year_span)

2019-09-23 00:00:00
2018-09-23 00:00:00
4


# Yearly aggregation

In [100]:
kanal_grps.keys()

dict_keys(['Besprechung', 'Besuch', 'Brief_Dankeskarte', 'E-Mail', 'Event_Veranstaltung', 'Telefon', 'Anderes'])

In [108]:
date_now - relativedelta(years=1)

datetime.datetime(2018, 9, 23, 0, 0)

In [128]:
kanal_grps.keys()

TypeError: 'dict_keys' object is not subscriptable

In [135]:
def yrl_kanal_contacts(group_name, rel_year):
    return (raw_crm_data.loc[(raw_crm_data.loc[:,"Kanal"].isin(kanal_grps[group_name]) &     # adjust to key
                     (raw_crm_data.loc[:,"Datum"] <  date_now  - relativedelta(years= rel_year -1)) &
                     (raw_crm_data.loc[:,"Datum"] >= date_now  - relativedelta(years= rel_year )) # adjust years
                     ),:]
        .groupby("Endkunde_NR").count()
        .reset_index(inplace=False)
        .loc[:,["Endkunde_NR","Kanal"]]
        .rename(columns={"Kanal": "Anzahl"})   # adjust "Anzahl"
        #.sort_values("Anzahl", ascending=False)
        )

In [137]:
yrl_kanal_contacts("Besuch",2)

Unnamed: 0,Endkunde_NR,Anzahl
0,100034,0
1,100039,1
2,100043,0
3,100045,0
4,100048,0
5,100056,0
6,100059,0
7,100061,0
8,100064,2
9,100066,1


In [65]:
raw_crm_data.head()

Unnamed: 0,Betreff,Kanal,Datum,Quelle,Verantwortlich,Kuerzel,Endkunde_NR,VB_Filter_Von,VB_Filter_Bis,VB_Filter_Grund,Year,KW,KW_2,KW_4,YYYYKW_2
0,Ost - Aktionen - 2014 - ACO F200 Leuchtplakate...,E-Mail,2014-10-08,Marketing,Liliane Schüpbach (REGION OST ZÜRICH),LSC,100034,NaT,NaT,,2014,41,41,41,201441
1,City ePanel Winterthur,Besuch,2014-11-03,Verkauf,Alma Coralic (REGION OST ZÜRICH),ACO,100034,NaT,NaT,,2014,45,45,45,201445
2,KMU Forum,Besuch,2014-11-27,Verkauf,Heinz Kläui (APG WINTERTHUR),HKL,100034,NaT,NaT,,2014,48,47,45,201447
3,Ost - Aktionen - 2015 - ACO_KMU Aktion_KW5-7_2015,E-Mail,2014-12-11,Marketing,Larissa Zingre (KAM ZÜRICH),LKU,100034,NaT,NaT,,2014,50,49,49,201449
4,Cafe,Besuch,2014-12-23,Verkauf,Heinz Kläui (APG WINTERTHUR),HKL,100034,NaT,NaT,,2014,52,51,49,201451
