# Benodigde libraries importeren

In [2]:
import pandas as pd
from datetime import datetime
import numpy as np
import pandas.api.types as ptypes


# Bestand inladen en data filteren

In [3]:
# excel bestand maxilia geimporteerd en kolommen gefilterd 

df = pd.read_csv('expdata 20190912 1002.csv', sep = None, delimiter = None, usecols = ['email','Orderdate','OrderID','TurnoverLead', 'BuyinLead', 'LeadPhase','LeadStatus'], engine = 'python') 
df.head()

Unnamed: 0,OrderID,Orderdate,email,TurnoverLead,BuyinLead,LeadPhase,LeadStatus
0,27654,22-4-2014,janneke.van.wingerden@hotmail.com,840.0,0.0,Order,Gefactureerd
1,30777,9-4-2015,marloessanders@izit.nl,300.0,0.0,Order,Gefactureerd
2,32291,30-7-2014,info@ferox-design.com,235.0,0.0,Order,Gefactureerd
3,32291,30-7-2014,info@ferox-design.com,235.0,0.0,Order,Gefactureerd
4,33824,27-5-2016,johan@thermservice.be,0.0,0.0,Order,Gefactureerd


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273322 entries, 0 to 273321
Data columns (total 7 columns):
OrderID         273322 non-null int64
Orderdate       131183 non-null object
email           272921 non-null object
TurnoverLead    273322 non-null float64
BuyinLead       273322 non-null float64
LeadPhase       273322 non-null object
LeadStatus      271268 non-null object
dtypes: float64(2), int64(1), object(4)
memory usage: 14.6+ MB


In [5]:
# lege rijen 'NaT' eruit gefilterd 
# Email bevat @ controle
# Duplicaten verwijderen

df = df.dropna(axis=0, how = 'any', inplace=False)
df = df[~df['email'].str.contains('@')==False]
df = df.drop_duplicates('OrderID', keep='first')
df.head()

Unnamed: 0,OrderID,Orderdate,email,TurnoverLead,BuyinLead,LeadPhase,LeadStatus
0,27654,22-4-2014,janneke.van.wingerden@hotmail.com,840.0,0.0,Order,Gefactureerd
1,30777,9-4-2015,marloessanders@izit.nl,300.0,0.0,Order,Gefactureerd
2,32291,30-7-2014,info@ferox-design.com,235.0,0.0,Order,Gefactureerd
4,33824,27-5-2016,johan@thermservice.be,0.0,0.0,Order,Gefactureerd
5,34406,4-2-2015,miranda@dynadental.com,1185.0,0.0,Order,Gefactureerd


In [6]:
df['Orderdate']=pd.to_datetime(df['Orderdate'], format='%d-%m-%Y')


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 106583 entries, 0 to 273321
Data columns (total 7 columns):
OrderID         106583 non-null int64
Orderdate       106583 non-null datetime64[ns]
email           106583 non-null object
TurnoverLead    106583 non-null float64
BuyinLead       106583 non-null float64
LeadPhase       106583 non-null object
LeadStatus      106583 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(3)
memory usage: 6.5+ MB


In [8]:
df.describe()

Unnamed: 0,OrderID,TurnoverLead,BuyinLead
count,106583.0,106583.0,106583.0
mean,147886.233912,611.979496,360.30912
std,61013.462495,1391.539161,1031.11294
min,27654.0,-392.0,-1001.51
25%,94114.0,167.0,34.5
50%,148056.0,300.4,151.5
75%,201786.5,610.0,358.47
max,252808.0,174519.5,141338.84


In [9]:
# Leadphase = order
# Leadstatus = gefactureerd
df = df[~df['LeadPhase'].str.contains('Order')==False]
df = df[~df['LeadStatus'].str.contains('Gefactureerd')==False]
df.head()

Unnamed: 0,OrderID,Orderdate,email,TurnoverLead,BuyinLead,LeadPhase,LeadStatus
0,27654,2014-04-22,janneke.van.wingerden@hotmail.com,840.0,0.0,Order,Gefactureerd
1,30777,2015-04-09,marloessanders@izit.nl,300.0,0.0,Order,Gefactureerd
2,32291,2014-07-30,info@ferox-design.com,235.0,0.0,Order,Gefactureerd
4,33824,2016-05-27,johan@thermservice.be,0.0,0.0,Order,Gefactureerd
5,34406,2015-02-04,miranda@dynadental.com,1185.0,0.0,Order,Gefactureerd


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 97825 entries, 0 to 272752
Data columns (total 7 columns):
OrderID         97825 non-null int64
Orderdate       97825 non-null datetime64[ns]
email           97825 non-null object
TurnoverLead    97825 non-null float64
BuyinLead       97825 non-null float64
LeadPhase       97825 non-null object
LeadStatus      97825 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(3)
memory usage: 6.0+ MB


In [11]:
# Check data bevat voldoet aan bepaalde condities
assert ptypes.is_datetime64_any_dtype(df['Orderdate'])
assert df['email'].str.contains('@').all()
assert df['LeadPhase'].eq('Order').all()
assert df['LeadStatus'].eq('Gefactureerd').all()


# Data grouperen per klant

In [12]:
# group by customer ID
# hoeveel orders zijn er gedaan per customer
# totale uitgaven per customer 

df_clv = df.groupby('email').agg({
                                                        'Orderdate': lambda date: (date.max() - date.min()).days,
                                                        'OrderID': lambda num: len(num), 
                                                        'TurnoverLead': lambda price: price.sum()
                                                    }) 
df_clv.head()

Unnamed: 0_level_0,Orderdate,OrderID,TurnoverLead
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
03vichy@gmail.com,0,1,259.0
06alexisbrunet@gmail.com,0,1,180.0
070paulus070@gmail.com,0,1,33.5
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,1,1230.5
1.vorsitzende.gv.muenster@gmail.com,0,1,344.5


In [13]:
# rename kolommen 

df_clv.columns = ['days_customer', 'total_transactions', 'total_spent']
df_clv.head()

Unnamed: 0_level_0,days_customer,total_transactions,total_spent
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
03vichy@gmail.com,0,1,259.0
06alexisbrunet@gmail.com,0,1,180.0
070paulus070@gmail.com,0,1,33.5
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,1,1230.5
1.vorsitzende.gv.muenster@gmail.com,0,1,344.5


In [14]:
df_clv.describe()

Unnamed: 0,days_customer,total_transactions,total_spent
count,67777.0,67777.0,67777.0
mean,76.580684,1.443336,886.926759
std,225.263499,1.724549,2695.22434
min,0.0,1.0,-392.0
25%,0.0,1.0,173.0
50%,0.0,1.0,332.0
75%,0.0,1.0,757.6
max,1686.0,112.0,167285.4


# Berekenen van de totale CLV

In [15]:
# gemiddelde order waarde 

avg_order_value = sum(df_clv['total_spent']) / sum(df_clv['total_transactions'])
print(avg_order_value)

614.4976738052648


In [16]:
# hoe vaak koopt iemand gemiddeld iets

purchase_frequency = sum(df_clv['total_transactions']) / df_clv.shape[0]
print(purchase_frequency)

1.4433362350059755


In [17]:
# hoe vaak komt iemand terug

repeat_rate = df_clv[df_clv.total_transactions > 1].shape[0] / df_clv.shape[0] 
print(repeat_rate)

0.20118919397435708


In [18]:
# churnrate: hoeveel % doet na 1 aankoop geen bestelling meer

churn_rate = 1 - repeat_rate
print(churn_rate)

0.7988108060256429


In [19]:
# gemiddelde marge  

profit = sum(df['TurnoverLead']) - sum(df['BuyinLead'])
margin = profit / sum(df['TurnoverLead'])
    
print(margin)

0.4098393753820391


In [20]:
# totale clv & clv marge 
# in het blok hierboven is de marge berekend ('margin'), maar je kan ook een getal noteren ipv 'margin'

total_clv = (avg_order_value * purchase_frequency) / churn_rate 
total_clv_margin = total_clv * margin

print(total_clv) # het eerste getal is de totale clv
print(total_clv_margin) # het tweede getal is de clv waarbij rekening is gehouden met de berekende marge

1110.3089145010256
455.04831200021016


In [21]:
# Bovenstaande cijfers in kolom neerzetten zodat de data in datastudio kan worden gebruikt
set_total_clv = avg_order_value, purchase_frequency, churn_rate, total_clv, total_clv_margin 
df_total_clv = pd.DataFrame(set_total_clv, columns= ['Waarde'], index = ['Gemiddelde orderwaarde' , 'Frequentie aankoop', 'Churn rate' , 'CLV', 'CLV marge'])
df_clv_values=df_total_clv.T.groupby(level=0).agg(lambda x : x.values.tolist()).stack().apply(pd.Series).unstack().sort_index(level=1,axis=1)
df_clv_values.columns=df_clv_values.columns.droplevel(level=0)
df_clv_values.head()

Unnamed: 0,CLV,CLV marge,Churn rate,Frequentie aankoop,Gemiddelde orderwaarde
Waarde,1110.308915,455.048312,0.798811,1.443336,614.497674


In [22]:
df_clv_values.to_excel('CLV_waardes.xlsx')

   # Transacties en churn rate per maand uiteenzetten

In [23]:
# transacties per klant per maand uiteenzetten
df_month = df.set_index('Orderdate')
df_monthly = df_month.pivot_table(
    index=['email'],
    columns=pd.Grouper(freq='M'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_monthly.head()

Orderdate,2014-04-30 00:00:00,2014-07-31 00:00:00,2014-09-30 00:00:00,2014-10-31 00:00:00,2014-11-30 00:00:00,2014-12-31 00:00:00,2015-01-31 00:00:00,2015-02-28 00:00:00,2015-03-31 00:00:00,2015-04-30 00:00:00,...,2018-12-31 00:00:00,2019-01-31 00:00:00,2019-02-28 00:00:00,2019-03-31 00:00:00,2019-04-30 00:00:00,2019-05-31 00:00:00,2019-06-30 00:00:00,2019-07-31 00:00:00,2019-08-31 00:00:00,2019-09-30 00:00:00
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
03vichy@gmail.com,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
06alexisbrunet@gmail.com,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
070paulus070@gmail.com,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1.vorsitzende.gv.muenster@gmail.com,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
repeat_rate = df_monthly[df_monthly > 1].count() / df_monthly[df_monthly > 0].count()
df_churn = repeat_rate.to_frame(name='repeat_rate')

In [46]:
df_churn['churn_rate'] = 1 - df_churn['repeat_rate']
df_churn.head()


Unnamed: 0_level_0,repeat_rate,churn_rate
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-04-30,0.0,1.0
2014-07-31,0.0,1.0
2014-09-30,0.0,1.0
2014-10-31,0.0,1.0
2014-11-30,0.0,1.0


# Churn per 6 maanden en per jaar

In [26]:
# Churn per 6 maanden
df_6_months = df_month.pivot_table(
    index=['email'],
    columns=pd.Grouper(freq='6M'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_6_months.head()

Orderdate,2014-04-30 00:00:00,2014-10-31 00:00:00,2015-04-30 00:00:00,2015-10-31 00:00:00,2016-04-30 00:00:00,2016-10-31 00:00:00,2017-04-30 00:00:00,2017-10-31 00:00:00,2018-04-30 00:00:00,2018-10-31 00:00:00,2019-04-30 00:00:00,2019-10-31 00:00:00
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
03vichy@gmail.com,0,0,0,0,0,0,0,0,0,0,1,0
06alexisbrunet@gmail.com,0,0,0,0,0,0,0,0,1,0,0,0
070paulus070@gmail.com,0,0,0,0,0,0,0,0,0,0,0,1
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,0,0,0,0,1,0,0,0,0,0,0
1.vorsitzende.gv.muenster@gmail.com,0,0,0,0,0,0,0,0,0,1,0,0


In [27]:
repeat_rate = df_6_months[df_6_months > 1].count() / df_6_months[df_6_months > 0].count()
df_churn_6_months = repeat_rate.to_frame(name='repeat_rate_6_months')

In [28]:
df_churn_6_months['churn_rate_6_months'] = 1 - df_churn_6_months['repeat_rate_6_months']
df_churn_6_months

Unnamed: 0_level_0,repeat_rate_6_months,churn_rate_6_months
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-04-30,0.0,1.0
2014-10-31,0.0,1.0
2015-04-30,0.063094,0.936906
2015-10-31,0.086375,0.913625
2016-04-30,0.090199,0.909801
2016-10-31,0.102916,0.897084
2017-04-30,0.110682,0.889318
2017-10-31,0.124986,0.875014
2018-04-30,0.110779,0.889221
2018-10-31,0.117209,0.882791


In [30]:
# Churn per jaar
df_year = df_month.pivot_table(
    index=['email'],
    columns=pd.Grouper(freq='Y'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_year.head()

Orderdate,2014-12-31 00:00:00,2015-12-31 00:00:00,2016-12-31 00:00:00,2017-12-31 00:00:00,2018-12-31 00:00:00,2019-12-31 00:00:00
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
03vichy@gmail.com,0,0,0,0,1,0
06alexisbrunet@gmail.com,0,0,0,0,1,0
070paulus070@gmail.com,0,0,0,0,0,1
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,0,1,0,0,0
1.vorsitzende.gv.muenster@gmail.com,0,0,0,0,1,0


In [31]:
repeat_rate = df_year[df_year > 1].count() / df_year[df_year > 0].count()
df_churn_year = repeat_rate.to_frame(name='repeat_rate_year')

In [32]:
df_churn_year['churn_rate_year'] = 1 - df_churn_year['repeat_rate_year']
df_churn_year

Unnamed: 0_level_0,repeat_rate_year,churn_rate_year
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-12-31,0.0,1.0
2015-12-31,0.114538,0.885462
2016-12-31,0.130083,0.869917
2017-12-31,0.151944,0.848056
2018-12-31,0.148535,0.851465
2019-12-31,0.131676,0.868324


In [33]:
df_churn_year.index = [x.strftime('%Y') for x in df_churn_year.index]
df_churn_year

Unnamed: 0,repeat_rate_year,churn_rate_year
2014,0.0,1.0
2015,0.114538,0.885462
2016,0.130083,0.869917
2017,0.151944,0.848056
2018,0.148535,0.851465
2019,0.131676,0.868324


# CLV per klant uitrekenen

In [34]:
# clv per customer

df_clv['customer_order_value'] = df_clv['total_spent'] / df_clv['total_transactions']
df_clv['customer_clv'] = (df_clv['customer_order_value'] * purchase_frequency) / churn_rate
df_clv['customer_clv_margin'] = df_clv['customer_clv'] * margin

df_clv.head()

Unnamed: 0_level_0,days_customer,total_transactions,total_spent,customer_order_value,customer_clv,customer_clv_margin
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
03vichy@gmail.com,0,1,259.0,259.0,467.975749,191.794888
06alexisbrunet@gmail.com,0,1,180.0,180.0,325.234111,133.293745
070paulus070@gmail.com,0,1,33.5,33.5,60.529682,24.807447
1.NLD.MINUSMA.GAO.AHDET.COGP.S1@B-SC1.TTN.MINDEF.NL,0,1,1230.5,1230.5,2223.336519,911.21085
1.vorsitzende.gv.muenster@gmail.com,0,1,344.5,344.5,622.461951,255.109417


In [35]:
df_clv.describe()

Unnamed: 0,days_customer,total_transactions,total_spent,customer_order_value,customer_clv,customer_clv_margin
count,67777.0,67777.0,67777.0,67777.0,67777.0,67777.0
mean,76.580684,1.443336,886.926759,537.520097,971.221505,398.044815
std,225.263499,1.724549,2695.22434,1009.096235,1823.29176,747.256756
min,0.0,1.0,-392.0,-392.0,-708.287619,-290.284156
25%,0.0,1.0,173.0,162.89,294.318802,120.623434
50%,0.0,1.0,332.0,290.0,523.98829,214.751033
75%,0.0,1.0,757.6,567.6,1025.571563,420.319609
max,1686.0,112.0,167285.4,61600.0,111302.340186,45616.08158


In [36]:
df_clv.to_excel('CLV_per_klant.xlsx')

In [37]:
# set orderdate als index
df.set_index('Orderdate', inplace=True)

In [38]:
#totale transacties / opbrengst per maand

df_transacties = df.groupby(pd.Grouper(freq='M')).agg(
    {
        'OrderID': len, 
        'TurnoverLead': sum,
        'email': pd.Series.nunique
    }
)

df_transacties.head()

Unnamed: 0_level_0,OrderID,TurnoverLead,email
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-04-30,1,840.0,1
2014-05-31,0,0.0,0
2014-06-30,0,0.0,0
2014-07-31,1,235.0,1
2014-08-31,0,0.0,0


In [39]:
df_transacties.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 66 entries, 2014-04-30 to 2019-09-30
Freq: M
Data columns (total 3 columns):
OrderID         66 non-null int64
TurnoverLead    66 non-null float64
email           66 non-null int64
dtypes: float64(1), int64(2)
memory usage: 4.6 KB


In [40]:
df_transacties.columns = ['total_transactions', 'total_spent', 'total_unique_customers']
df_transacties.head(5)

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-04-30,1,840.0,1
2014-05-31,0,0.0,0
2014-06-30,0,0.0,0
2014-07-31,1,235.0,1
2014-08-31,0,0.0,0


In [41]:
df_transacties.describe()

Unnamed: 0,total_transactions,total_spent,total_unique_customers
count,66.0,66.0,66.0
mean,1482.19697,910806.6,1393.166667
std,922.956741,571263.5,856.84598
min,0.0,0.0,0.0
25%,897.25,480599.5,871.25
50%,1395.5,914666.4,1314.5
75%,2106.25,1291857.0,1955.75
max,3810.0,2391474.0,3538.0


In [42]:
df_transacties.to_excel('Overzicht_per_maand.xlsx')

# CLV per maand berekenen met churn per maand

In [43]:
# CLV per maand berekenen
# margin is nu over gehele dataset, kunt deze ook nog per maand uitrekenen
df_transacties['purchase_frequency_month'] = df_transacties['total_transactions'] / df_transacties['total_unique_customers']
df_transacties['avg_order_value_month'] = df_transacties['total_spent'] / df_transacties['total_transactions'] 
df_transacties['CLV'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_churn['churn_rate']
df_transacties['CLV_margin'] = df_transacties['CLV'] * margin
df_transacties.head()

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers,purchase_frequency_month,avg_order_value_month,CLV,CLV_margin
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-04-30,1,840.0,1,1.0,840.0,840.0,344.265075
2014-05-31,0,0.0,0,,,,
2014-06-30,0,0.0,0,,,,
2014-07-31,1,235.0,1,1.0,235.0,235.0,96.312253
2014-08-31,0,0.0,0,,,,


In [44]:
# New dataframe to calculate CLV averages. Needs "index" & "number of months" helper columns
# Even if some rows ("months") are missing, "number of months" is still correct
# Because it looks at the dates, not the index locations. 
df_clv_cum = df_transacties[['CLV', 'CLV_margin']].copy()
df_clv_cum.dropna(inplace=True)
df_clv_cum['Index'] = np.arange(1, len(df_clv_cum) + 1)
df_clv_cum['CLV_AVG'] = df_clv_cum['CLV'].cumsum().div(df_clv_cum['Index'])
df_clv_cum['CLV_margin_AVG'] = df_clv_cum['CLV_margin'].cumsum().div(df_clv_cum['Index'])
df_clv_cum.head()

Unnamed: 0_level_0,CLV,CLV_margin,Index,CLV_AVG,CLV_margin_AVG
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-04-30,840.0,344.265075,1,840.0,344.265075
2014-07-31,235.0,96.312253,2,537.5,220.288664
2014-09-30,219.0,89.754823,3,431.333333,176.777384
2014-10-31,564.0,231.149408,4,464.5,190.37039
2014-11-30,675.251111,276.744494,5,506.650222,207.645211


# CLV per maand met churn per 6 maanden

In [45]:
df_transacties['CLV_churn_6M'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_churn_6_months['churn_rate_6_months']
df_transacties['CLV_margin_churn_6M'] = df_transacties['CLV_churn_6M'] * margin
df_transacties

TypeError: list indices must be integers or slices, not str

# CLV per maand met churn per jaar

In [None]:
df_transacties['CLV_churn_Y'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_churn_year['churn_rate_year']
df_transacties['CLV_margin_churn_Y'] = df_transacties['CLV_churn_Y'] * margin
df_transacties