# How to use this code?

1. Make a copy of the code. Make sure you save it in the same folder
2. Save your clients file in the incoming folder
3. Edit the code where necessary. See comment lines before the code to check where you have to change the code

# Choose file to process

In [1]:
# Please enter the name of the file after executing this cell (leave empty for test data)
filename = input('Filename of file in /incoming/ folder: ')
client = input('Client\'s name: ')
client = client if client else 'Test'

Filename of file in /incoming/ folder: max_website.csv
Client's name: Max-Website


# Import needed libraries

In [2]:
from datetime import datetime
from pathlib import Path
import helpers  # custom helper functions
import numpy as np
import pandas as pd
import pandas.api.types as ptypes

assert pd.__version__ > '0.24.9'   # Need Pandas version 0.25.0 or higher

# Import file and filter data

In [3]:
# Building the full path to the file
path = Path().absolute().parent  # each '.parent' goes one level up - vary as required
if filename:
    file = f'{path}/incoming/{filename}'
else:
    file = f'{path}/samples/sample-dataset.csv'

# Path for outgoing files
path_outgoing = f'{path}/outgoing/'

print('Processing: ', file)
print('For client: ', client)

Processing:  /Users/chiwang/Google Drive/1. IWB/Python/GitHub/incoming/max_website.csv
For client:  Max-Website


In [4]:
# File import
# To do: choose the right columns from the dataset
# for excel: data = pd.read_excel(file) 
# df = pd.DataFrame(data, columns = [''])

df = pd.read_csv(file, sep = None, delimiter = ';', decimal=',', 
                 usecols = ['CustomerID', 'Orderdate', 'OrderID', 'Revenue', 'COGS'],
                 parse_dates=['Orderdate'], infer_datetime_format=True, dayfirst=True,
                 dtype = {'CustomerID': str,'OrderID':str, 'Revenue':float, 'COGS':float})
df.head()

Unnamed: 0,CustomerID,Orderdate,OrderID,Revenue,COGS
0,121341,2020-02-03,36853,53.27,33.37
1,121333,2020-02-03,36845,28.84,18.28
2,121331,2020-02-03,36843,82.6,41.5
3,121327,2020-02-03,36839,61.94,40.4
4,121322,2020-02-03,36832,61.98,29.5


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3727 entries, 0 to 3726
Data columns (total 5 columns):
CustomerID    3727 non-null object
Orderdate     3727 non-null datetime64[ns]
OrderID       3726 non-null object
Revenue       3727 non-null float64
COGS          3727 non-null float64
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 145.7+ KB


In [6]:
# Delete empty rows'NaT'
# Check email contains @ -> aanpassen als je geen gebruik maakt van email als client id
# Delete duplicates in orderID -> naam aanmpassen naar naam van de kolom 

#df = df.dropna(axis=0, how='any')
#df = df[df['email'].str.contains('@')]
df = df.dropna(subset=['CustomerID'])
df = df.drop_duplicates('OrderID', keep='first')
df.head()

Unnamed: 0,CustomerID,Orderdate,OrderID,Revenue,COGS
0,121341,2020-02-03,36853,53.27,33.37
1,121333,2020-02-03,36845,28.84,18.28
2,121331,2020-02-03,36843,82.6,41.5
3,121327,2020-02-03,36839,61.94,40.4
4,121322,2020-02-03,36832,61.98,29.5


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3694 entries, 0 to 3726
Data columns (total 5 columns):
CustomerID    3694 non-null object
Orderdate     3694 non-null datetime64[ns]
OrderID       3693 non-null object
Revenue       3694 non-null float64
COGS          3694 non-null float64
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 173.2+ KB


In [8]:
df.describe()

Unnamed: 0,Revenue,COGS
count,3694.0,3694.0
mean,45.583904,26.533979
std,29.530862,13.009123
min,1.61,7.75
25%,24.71,16.95
50%,41.28,24.975
75%,57.81,31.75
max,645.38,166.85


In [9]:
# Filtering the data (these rules are specific for Maxilia -> delete if not necessary or change to needed values)
# Leadphase = order
# Leadstatus = gefactureerd
#df = df[df['LeadPhase'].str.contains('Order')]
#df = df[df['LeadStatus'].str.contains('Gefactureerd')]
#df.head()

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3694 entries, 0 to 3726
Data columns (total 5 columns):
CustomerID    3694 non-null object
Orderdate     3694 non-null datetime64[ns]
OrderID       3693 non-null object
Revenue       3694 non-null float64
COGS          3694 non-null float64
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 173.2+ KB


In [11]:
# Check if the data equals certain conditions
assert ptypes.is_datetime64_any_dtype(df['Orderdate'])
#assert df['email'].str.contains('@').all()
#assert df['LeadPhase'].eq('Order').all()
#assert df['LeadStatus'].eq('Gefactureerd').all()

# Group data per customer

In [12]:
# Here the data is being grouped by email. 
# To do: change the variables into the right column names of the data set

# The first and last transaction are taken to calculate how many days someone is a customer
# The number of orders (how many OrderID's per customer) per customer is calculated
# The sum of all spend is calculated

df_clv = df.groupby('CustomerID').agg(
    first_transaction=('Orderdate', min),
    last_transaction=('Orderdate', max),
    total_transactions=('OrderID', len), 
    total_spent=('Revenue', sum)
)
df_clv['days_customer'] = (df_clv['last_transaction'] - df_clv['first_transaction']).dt.days
df_clv.drop(['first_transaction', 'last_transaction'], axis=1, inplace=True)
df_clv.head()

Unnamed: 0_level_0,total_transactions,total_spent,days_customer
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
101787,2,129.6,151
101811,4,518.4,268
101874,5,324.0,262
102016,1,70.95,0
102080,3,200.56,140


In [13]:
#df_clv.columns = ['total_transactions', 'total_spent', 'days_customer']
#df_clv.head()

In [14]:
df_clv.describe()

Unnamed: 0,total_transactions,total_spent,days_customer
count,3271.0,3271.0,3271.0
mean,1.129318,51.478734,6.080404
std,0.566738,44.630044,30.200228
min,1.0,1.61,0.0
25%,1.0,26.36,0.0
50%,1.0,41.83,0.0
75%,1.0,61.98,0.0
max,19.0,1086.77,298.0


In [15]:
# Export to file
# df_clv.to_excel(f'{path_outgoing}{client} - Overzicht_per_klant.xlsx')
df_clv.to_csv(f'{path_outgoing}{client} - Overzicht_per_klant.csv')

# Calculating the total CLV

In [16]:
# Initiate a dictionary that holds all calculated values
calculated = {}

In [17]:
# Calculate the average order value

total_spent_all = sum(df_clv['total_spent'])
avg_order_value = sum(df_clv['total_spent']) / sum(df_clv['total_transactions'])
calculated['Gemiddelde orderwaarde'] = avg_order_value
print(total_spent_all)
#print('total_transactions')
print(avg_order_value)

168386.9399999998
45.58390362750401


In [18]:
# Calculate how often someone buys from the company

purchase_frequency = sum(df_clv['total_transactions']) / df_clv.shape[0]
calculated['Frequentie aankoop'] = purchase_frequency
print(purchase_frequency)

1.1293182512992967


In [19]:
# Calculate how often someone returns
# Output * 100 is the rate in %

repeat_rate = df_clv[df_clv.total_transactions > 1].shape[0] / df_clv.shape[0] 
calculated['Repeat rate'] = repeat_rate
print(repeat_rate)

0.09202078874961786


In [20]:
# Calculate the churnrate: what % doesn't return after 1 order

churn_rate = 1 - repeat_rate
calculated['Churn rate'] = churn_rate
print(churn_rate)

0.9079792112503822


In [21]:
# Calculating the average margin of the dataset
# To do: choose the right columns from the dataset

profit = sum(df['Revenue']) - sum(df['COGS'])
calculated['Profit'] = profit
margin = profit / sum(df['Revenue'])
calculated['Margin'] = margin
absolut_margin = profit * margin
calculated['Absolut margin'] = absolut_margin
    
print(profit)    
print(margin)
print(absolut_margin)

70370.42000000011
0.41790901360877586
29408.432809435322


In [22]:
# Calculating the total CLV and the CLV with margin
# If you cannot calculate the margin (as is done in the above cell) you can change margin into the margin number that the company provides you with

total_clv = (avg_order_value * purchase_frequency) / churn_rate 
calculated['CLV'] = total_clv
total_clv_margin = total_clv * margin
calculated['CLV marge'] = total_clv_margin

print(total_clv) # het eerste getal is de totale clv
print(total_clv_margin) # het tweede getal is de clv waarbij rekening is gehouden met de berekende marge

56.69593939393932
23.69374410774412


In [23]:
# In this cell the above numbers are set into a dataset (which is necessary to use them in datastudio)
df_clv_values = pd.DataFrame([calculated], index=['Waarde'])
df_clv_values.head()

Unnamed: 0,Gemiddelde orderwaarde,Frequentie aankoop,Repeat rate,Churn rate,Profit,Margin,Absolut margin,CLV,CLV marge
Waarde,45.583904,1.129318,0.092021,0.907979,70370.42,0.417909,29408.432809,56.695939,23.693744


In [24]:
# Export to CSV file
df_clv_values.to_csv(f'{path_outgoing}{client} - CLV_waardes.csv')
# Export to Excel-file use:
# df_clv_values.to_excel(f'{path_outgoing}{client} - CLV_waardes.xlsx')

   # Churn rate per month

In [25]:
# Grouping data per month to calculate the churn rate per month
df_monthly = df.pivot_table(
    index=['CustomerID'],
    columns=pd.Grouper(key='Orderdate', freq='M'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_monthly.head()

Orderdate,2019-04-30,2019-05-31,2019-06-30,2019-07-31,2019-08-31,2019-09-30,2019-10-31,2019-11-30,2019-12-31,2020-01-31,2020-02-29
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
101787,1,0,0,0,0,1,0,0,0,0,0
101811,1,0,0,1,0,0,1,0,0,1,0
101874,1,0,1,0,1,0,0,1,0,1,0
102016,0,0,0,1,0,0,0,0,0,0,0
102080,0,1,0,0,1,0,1,0,0,0,0


In [26]:
repeat_rate = df_monthly[df_monthly > 1].count() / df_monthly[df_monthly > 0].count()
df_churn = repeat_rate.to_frame(name='repeat_rate')

In [27]:
df_churn['churn_rate'] = 1 - df_churn['repeat_rate']
df_churn.head()


Unnamed: 0_level_0,repeat_rate,churn_rate
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-30,0.042636,0.957364
2019-05-31,0.048346,0.951654
2019-06-30,0.067073,0.932927
2019-07-31,0.058047,0.941953
2019-08-31,0.029605,0.970395


# Churn per 6 months and per year

In [28]:
# Grouping data per 6 months to calculate the churn rate per 6 months
df_6_months = df.pivot_table(
    index=['CustomerID'],
    columns=pd.Grouper(key='Orderdate', freq='6M'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_6_months.head()

Orderdate,2019-04-30,2019-10-31,2020-04-30
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
101787,1,1,0
101811,1,2,1
101874,1,2,2
102016,0,1,0
102080,0,3,0


In [29]:
repeat_rate = df_6_months[df_6_months > 1].count() / df_6_months[df_6_months > 0].count()
df_churn_6_months = repeat_rate.to_frame(name='repeat_rate_6_months')

In [30]:
df_churn_6_months['churn_rate_6_months'] = 1 - df_churn_6_months['repeat_rate_6_months']
df_churn_6_months

Unnamed: 0_level_0,repeat_rate_6_months,churn_rate_6_months
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-30,0.042636,0.957364
2019-10-31,0.084608,0.915392
2020-04-30,0.05503,0.94497


In [31]:
# Grouping the data per year to calculate the churn rate per year
df_year = df.pivot_table(
    index=['CustomerID'],
    columns=pd.Grouper(key='Orderdate', freq='Y'),
    values='OrderID', 
    aggfunc='count',
    fill_value=0
)

df_year.head()

Orderdate,2019-12-31,2020-12-31
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1
101787,2,0
101811,3,1
101874,4,1
102016,1,0
102080,3,0


In [32]:
repeat_rate = df_year[df_year > 1].count() / df_year[df_year > 0].count()
df_churn_year = repeat_rate.to_frame(name='repeat_rate_year')

In [33]:
df_churn_year['churn_rate_year'] = 1 - df_churn_year['repeat_rate_year']
df_churn_year

Unnamed: 0_level_0,repeat_rate_year,churn_rate_year
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-12-31,0.091102,0.908898
2020-12-31,0.040486,0.959514


# Transactions, spend and unique customers per month

In [34]:
# Grouping total transactions, turnover and unique clients per month

df_transacties = df.groupby(pd.Grouper(key='Orderdate', freq='M')).agg(
    {
        'OrderID': len, 
        'Revenue': sum,
        'CustomerID': pd.Series.nunique
    }
)

df_transacties.head()

Unnamed: 0_level_0,OrderID,Revenue,CustomerID
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-30,270,12896.88,259
2019-05-31,415,19455.57,393
2019-06-30,351,16909.93,328
2019-07-31,404,19229.29,379
2019-08-31,326,15399.55,304


In [35]:
df_transacties.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 11 entries, 2019-04-30 to 2020-02-29
Freq: M
Data columns (total 3 columns):
OrderID       11 non-null int64
Revenue       11 non-null float64
CustomerID    11 non-null int64
dtypes: float64(1), int64(2)
memory usage: 672.0 bytes


In [36]:
# Renaming the columns
df_transacties.columns = ['total_transactions', 'total_spent', 'total_unique_customers']
df_transacties.head(5)

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-30,270,12896.88,259
2019-05-31,415,19455.57,393
2019-06-30,351,16909.93,328
2019-07-31,404,19229.29,379
2019-08-31,326,15399.55,304


In [37]:
df_transacties.describe()

Unnamed: 0,total_transactions,total_spent,total_unique_customers
count,11.0,11.0,11.0
mean,335.818182,15307.903636,321.272727
std,111.318299,4891.25473,105.963287
min,40.0,1904.14,40.0
25%,333.5,15000.82,316.0
50%,352.0,15986.52,343.0
75%,382.5,18107.035,365.5
max,475.0,19455.57,456.0


In [38]:
# Export to file
# df_transacties.to_excel(f'{path_outgoing}{client} - Overzicht_per_maand.xlsx')
df_transacties.to_csv(f'{path_outgoing}{client} - Overzicht_per_maand.csv')

# Calculate CLV per month with churn per month

In [39]:
# Calculating the CLV per month
df_transacties['purchase_frequency_month'] = df_transacties['total_transactions'] / df_transacties['total_unique_customers']
df_transacties['avg_order_value_month'] = df_transacties['total_spent'] / df_transacties['total_transactions'] 
df_transacties['CLV'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_churn['churn_rate']
df_transacties['CLV_margin'] = df_transacties['CLV'] * margin
df_transacties.head()

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers,purchase_frequency_month,avg_order_value_month,CLV,CLV_margin
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-04-30,270,12896.88,259,1.042471,47.766222,52.01249,21.736489
2019-05-31,415,19455.57,393,1.05598,46.880892,52.020241,21.739727
2019-06-30,351,16909.93,328,1.070122,48.176439,55.261209,23.094157
2019-07-31,404,19229.29,379,1.065963,47.597252,53.863557,22.510066
2019-08-31,326,15399.55,304,1.072368,47.237883,52.201864,21.81563


In [40]:
# New dataset to calculate the aggregated average CLV
# Drop empty columns
# Add index column to calculate the aggregated average CLV
df_clv_cum= df_transacties[['CLV', 'CLV_margin']].copy()
df_clv_cum.dropna(inplace=True)
df_clv_cum['Index'] = np.arange(1, len(df_clv_cum) + 1)
df_clv_cum['CLV_AVG'] = df_clv_cum['CLV'].cumsum().div(df_clv_cum['Index'])
df_clv_cum['CLV_margin_AVG'] = df_clv_cum['CLV_margin'].cumsum().div(df_clv_cum['Index'])
df_clv_cum.head()

Unnamed: 0_level_0,CLV,CLV_margin,Index,CLV_AVG,CLV_margin_AVG
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-04-30,52.01249,21.736489,1,52.01249,21.736489
2019-05-31,52.020241,21.739727,2,52.016365,21.738108
2019-06-30,55.261209,23.094157,3,53.09798,22.190124
2019-07-31,53.863557,22.510066,4,53.289374,22.27011
2019-08-31,52.201864,21.81563,5,53.071872,22.179214


# Calculate CLV per month with churn per 6 months

In [41]:
# Code used to find the churn that connects to the right months
def find_churn_rate_6_months(date):
    """
    Iterate through the 'df_churn_6_months' dataframe.
    When the date is smaller than the date on the index, 
    return the corresponding churn rate 
    Eg: July < Oct, so return October's churn rate
    """
    for m in df_churn_6_months['churn_rate_6_months'].iteritems():
        if date <= m[0]:
            return m[1]
        
def find_churn_rate_1_year(date):
    """
    Iterate through the 'df_churn_year' dataframe.
    When the date is smaller than the year on the index, 
    return the corresponding churn rate 
    """
    for y in df_churn_year['churn_rate_year'].iteritems():
        if date <= y[0]:
            return y[1]

In [42]:
# Calculate CLV with Churn per 6 months
# First line of code is used to find the right churn for the corresponding months
df_transacties['churn_rate_6_months'] = df_transacties.index.to_series().apply(lambda x: find_churn_rate_6_months(x))
df_transacties['CLV_churn_6M'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_transacties['churn_rate_6_months']
df_transacties['CLV_margin_churn_6M'] = df_transacties['CLV_churn_6M'] * margin
df_transacties.head(10)

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers,purchase_frequency_month,avg_order_value_month,CLV,CLV_margin,churn_rate_6_months,CLV_churn_6M,CLV_margin_churn_6M
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-04-30,270,12896.88,259,1.042471,47.766222,52.01249,21.736489,0.957364,52.01249,21.736489
2019-05-31,415,19455.57,393,1.05598,46.880892,52.020241,21.739727,0.915392,54.080921,22.600904
2019-06-30,351,16909.93,328,1.070122,48.176439,55.261209,23.094157,0.915392,56.319739,23.536527
2019-07-31,404,19229.29,379,1.065963,47.597252,53.863557,22.510066,0.915392,55.426405,23.163194
2019-08-31,326,15399.55,304,1.072368,47.237883,52.201864,21.81563,0.915392,55.338466,23.126444
2019-09-30,352,17170.5,343,1.026239,48.77983,51.408683,21.484152,0.915392,54.686672,22.854053
2019-10-31,341,14602.09,330,1.033333,42.821378,45.774577,19.129608,0.915392,48.338565,20.201122
2019-11-30,361,15986.52,352,1.025568,44.283989,46.60793,19.477874,0.94497,48.061054,20.085148
2019-12-31,359,15788.9,350,1.025714,43.980223,46.30176,19.349923,0.94497,47.738179,19.950215
2020-01-31,475,19043.57,456,1.041667,40.091726,43.47847,18.170045,0.94497,44.194227,18.469166


# Calculate CLV per month with churn per year

In [43]:
# Calculate CLV with churn per year
df_transacties['churn_rate_1_year'] = df_transacties.index.to_series().apply(lambda x: find_churn_rate_1_year(x))
df_transacties['CLV_churn_Y'] = (df_transacties['avg_order_value_month'] * df_transacties['purchase_frequency_month']) / df_transacties['churn_rate_1_year']
df_transacties['CLV_margin_churn_Y'] = df_transacties['CLV_churn_Y'] * margin
df_transacties.head(10)

Unnamed: 0_level_0,total_transactions,total_spent,total_unique_customers,purchase_frequency_month,avg_order_value_month,CLV,CLV_margin,churn_rate_6_months,CLV_churn_6M,CLV_margin_churn_6M,churn_rate_1_year,CLV_churn_Y,CLV_margin_churn_Y
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-04-30,270,12896.88,259,1.042471,47.766222,52.01249,21.736489,0.957364,52.01249,21.736489,0.908898,54.786046,22.895583
2019-05-31,415,19455.57,393,1.05598,46.880892,52.020241,21.739727,0.915392,54.080921,22.600904,0.908898,54.467379,22.762408
2019-06-30,351,16909.93,328,1.070122,48.176439,55.261209,23.094157,0.915392,56.319739,23.536527,0.908898,56.722195,23.704717
2019-07-31,404,19229.29,379,1.065963,47.597252,53.863557,22.510066,0.915392,55.426405,23.163194,0.908898,55.822477,23.328716
2019-08-31,326,15399.55,304,1.072368,47.237883,52.201864,21.81563,0.915392,55.338466,23.126444,0.908898,55.73391,23.291703
2019-09-30,352,17170.5,343,1.026239,48.77983,51.408683,21.484152,0.915392,54.686672,22.854053,0.908898,55.077458,23.017366
2019-10-31,341,14602.09,330,1.033333,42.821378,45.774577,19.129608,0.915392,48.338565,20.201122,0.908898,48.683988,20.345477
2019-11-30,361,15986.52,352,1.025568,44.283989,46.60793,19.477874,0.94497,48.061054,20.085148,0.908898,49.968503,20.882288
2019-12-31,359,15788.9,350,1.025714,43.980223,46.30176,19.349923,0.94497,47.738179,19.950215,0.908898,49.632814,20.742
2020-01-31,475,19043.57,456,1.041667,40.091726,43.47847,18.170045,0.94497,44.194227,18.469166,0.959514,43.524334,18.189211


# Aggregated average CLV

In [44]:
# Calculate aggregated average CLV with churn of 6 months and add to dataset
df_clv_cum_6M = df_transacties[['CLV_churn_6M', 'CLV_margin_churn_6M']].copy()
df_clv_cum_6M.dropna(inplace=True)
df_clv_cum['CLV_AVG_6M'] = df_clv_cum_6M['CLV_churn_6M'].cumsum().div(df_clv_cum['Index'])
df_clv_cum['CLV_margin_AVG_6M'] = df_clv_cum_6M['CLV_margin_churn_6M'].cumsum().div(df_clv_cum['Index'])
df_clv_cum.head()

Unnamed: 0_level_0,CLV,CLV_margin,Index,CLV_AVG,CLV_margin_AVG,CLV_AVG_6M,CLV_margin_AVG_6M
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-04-30,52.01249,21.736489,1,52.01249,21.736489,52.01249,21.736489
2019-05-31,52.020241,21.739727,2,52.016365,21.738108,53.046706,22.168696
2019-06-30,55.261209,23.094157,3,53.09798,22.190124,54.137717,22.62464
2019-07-31,53.863557,22.510066,4,53.289374,22.27011,54.459889,22.759278
2019-08-31,52.201864,21.81563,5,53.071872,22.179214,54.635604,22.832712


In [45]:
# Calculate aggregated average CLV with churn of a year and add to dataset
df_clv_cum_Y = df_transacties[['CLV_churn_Y', 'CLV_margin_churn_Y']].copy()
df_clv_cum_Y.dropna(inplace=True)
df_clv_cum['CLV_AVG_Y'] = df_clv_cum_Y['CLV_churn_Y'].cumsum().div(df_clv_cum['Index'])
df_clv_cum['CLV_margin_AVG_Y'] = df_clv_cum_Y['CLV_margin_churn_Y'].cumsum().div(df_clv_cum['Index'])
df_clv_cum.head()

Unnamed: 0_level_0,CLV,CLV_margin,Index,CLV_AVG,CLV_margin_AVG,CLV_AVG_6M,CLV_margin_AVG_6M,CLV_AVG_Y,CLV_margin_AVG_Y
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-04-30,52.01249,21.736489,1,52.01249,21.736489,52.01249,21.736489,54.786046,22.895583
2019-05-31,52.020241,21.739727,2,52.016365,21.738108,53.046706,22.168696,54.626712,22.828996
2019-06-30,55.261209,23.094157,3,53.09798,22.190124,54.137717,22.62464,55.325207,23.120903
2019-07-31,53.863557,22.510066,4,53.289374,22.27011,54.459889,22.759278,55.449524,23.172856
2019-08-31,52.201864,21.81563,5,53.071872,22.179214,54.635604,22.832712,55.506401,23.196625


In [46]:
# Keep dataset with all the values
# CLV_AVG is churn calculated per month, 6M churn is churn over 6 months, Y is churn calculated over a year
df_clv_avg = df_clv_cum[['CLV_AVG', 'CLV_margin_AVG', 'CLV_AVG_6M', 'CLV_margin_AVG_6M', 'CLV_AVG_Y', 'CLV_margin_AVG_Y']].copy()
df_clv_avg.head(10)

Unnamed: 0_level_0,CLV_AVG,CLV_margin_AVG,CLV_AVG_6M,CLV_margin_AVG_6M,CLV_AVG_Y,CLV_margin_AVG_Y
Orderdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-04-30,52.01249,21.736489,52.01249,21.736489,54.786046,22.895583
2019-05-31,52.016365,21.738108,53.046706,22.168696,54.626712,22.828996
2019-06-30,53.09798,22.190124,54.137717,22.62464,55.325207,23.120903
2019-07-31,53.289374,22.27011,54.459889,22.759278,55.449524,23.172856
2019-08-31,53.071872,22.179214,54.635604,22.832712,55.506401,23.196625
2019-09-30,52.794674,22.06337,54.644116,22.836268,55.434911,23.166749
2019-10-31,51.791803,21.644261,53.743323,22.459819,54.470493,22.76371
2019-11-30,51.143819,21.373463,53.033039,22.162985,53.907745,22.528532
2019-12-31,50.605812,21.148625,52.444721,21.917122,53.432752,22.330029
2020-01-31,49.893078,20.850767,51.619672,21.572326,52.44191,21.915947


In [47]:
# Export to file
# df_clv_avg.to_excel(f'{path_outgoing}{client} - Gemiddeldes CLV per maand.xlsx')
df_clv_avg.to_csv(f'{path_outgoing}{client} - Gemiddeldes CLV per maand.csv')