# CUSTOMER LIFETIME VALUE CALCULATION

In [14]:
import pandas as pd
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 20)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
from sklearn.preprocessing import MinMaxScaler

In [15]:
df_ = pd.read_excel("online_retail_II.xlsx", sheet_name="Year 2010-2011")
df = df_.copy()

In [16]:
df = df[~df["Invoice"].str.contains("C", na=False)]
df = df[(df['Quantity'] > 0)]
df.dropna(inplace=True)
df["TotalPrice"] = df["Quantity"] * df["Price"]

In [17]:
# Unique data by customer id 
cltv = df.groupby("Customer ID").agg({"Invoice": lambda x: len(x),
                                      "Quantity": lambda x: x.sum(),
                                      "TotalPrice": lambda x: x.sum()})
cltv.columns = ['total_transaction', 'total_unit', 'total_price']
cltv.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12346.0,1,74215,77183.6
12347.0,182,2458,4310.0
12348.0,31,2341,1797.24
12349.0,73,631,1757.55
12350.0,17,197,334.4


In [18]:
# Calculation of Customer Lifetime Metrics
cltv['avg_order_value'] = cltv["total_price"] / cltv['total_transaction']
cltv["purchase_frequency"] = cltv['total_transaction'] / cltv.shape[0]
repeat_rate = cltv[cltv["total_transaction"] > 1].shape[0] / cltv.shape[0]
churn_rate = 1 - repeat_rate
cltv['profit_margin'] = cltv['total_price'] * 0.05
cltv['customer_value'] = cltv['avg_order_value'] * cltv["purchase_frequency"]
cltv['cltv_c'] = (cltv['customer_value'] / churn_rate) * cltv['profit_margin']
cltv.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,cltv_c
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12346.0,1,74215,77183.6,77183.6,0.00023,3859.18,17.78834,4137019.52011
12347.0,182,2458,4310.0,23.68132,0.04195,215.5,0.99332,12900.06944
12348.0,31,2341,1797.24,57.97548,0.00714,89.862,0.41421,2243.10529
12349.0,73,631,1757.55,24.07603,0.01682,87.8775,0.40506,2145.12639
12350.0,17,197,334.4,19.67059,0.00392,16.72,0.07707,77.65511


In [19]:
# Scaling 
scaler = MinMaxScaler(feature_range=(1, 100))
scaler.fit(cltv[["cltv_c"]])
cltv["SCALED_CLTV_C"] = scaler.transform(cltv[["cltv_c"]])
cltv.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,cltv_c,SCALED_CLTV_C
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
12346.0,1,74215,77183.6,77183.6,0.00023,3859.18,17.78834,4137019.52011,8.51156
12347.0,182,2458,4310.0,23.68132,0.04195,215.5,0.99332,12900.06944,1.02342
12348.0,31,2341,1797.24,57.97548,0.00714,89.862,0.41421,2243.10529,1.00407
12349.0,73,631,1757.55,24.07603,0.01682,87.8775,0.40506,2145.12639,1.00389
12350.0,17,197,334.4,19.67059,0.00392,16.72,0.07707,77.65511,1.00014


In [20]:
# Segmentation of scaled_cltv_c with using qcut functions
cltv["segment"] = pd.qcut(cltv["SCALED_CLTV_C"], 4, labels=["D", "C", "B", "A"])
cltv.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,cltv_c,SCALED_CLTV_C,segment
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
12346.0,1,74215,77183.6,77183.6,0.00023,3859.18,17.78834,4137019.52011,8.51156,A
12347.0,182,2458,4310.0,23.68132,0.04195,215.5,0.99332,12900.06944,1.02342,A
12348.0,31,2341,1797.24,57.97548,0.00714,89.862,0.41421,2243.10529,1.00407,A
12349.0,73,631,1757.55,24.07603,0.01682,87.8775,0.40506,2145.12639,1.00389,A
12350.0,17,197,334.4,19.67059,0.00392,16.72,0.07707,77.65511,1.00014,C


In [21]:
cltv.groupby("segment")[["total_transaction", "total_unit",
                         "total_price", "cltv_c", "SCALED_CLTV_C"]].agg({"count", "mean", "sum"})

Unnamed: 0_level_0,total_transaction,total_transaction,total_transaction,total_unit,total_unit,total_unit,total_price,total_price,total_price,cltv_c,cltv_c,cltv_c,SCALED_CLTV_C,SCALED_CLTV_C,SCALED_CLTV_C
Unnamed: 0_level_1,sum,mean,count,sum,mean,count,sum,mean,count,sum,mean,count,sum,mean,count
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
D,18682,17.21843,1085,140493,129.48664,1085,194130.09,178.92174,1085,28106.99627,25.90507,1085,1085.05103,1.00005,1085
C,40122,36.9788,1085,310632,286.29677,1085,503836.852,464.36576,1085,171620.23375,158.17533,1085,1085.31161,1.00029,1085
B,82856,76.43542,1084,703995,649.44188,1084,1161913.311,1071.87575,1084,924739.47491,853.0807,1084,1085.67904,1.00155,1084
A,256265,236.18894,1085,4026577,3711.13088,1085,7051545.651,6499.12042,1085,254961091.91196,234987.18149,1085,1547.93144,1.42666,1085


In [22]:
# Create CLTV_C Metrics
def cltv_df(dataframe):
    cltv = dataframe.groupby("Customer ID").agg({
        "Invoice": lambda x: len(x),
        "Quantity": lambda x: x.sum(),
        "TotalPrice": lambda x: x.sum()})
    cltv.columns = ['total_transaction', 'total_unit', 'total_price']

    return cltv

cltv_df = cltv_df(df)


def cltv_c(dataframe):
    dataframe['avg_order_value'] = dataframe["total_price"] / cltv_df['total_transaction']
    dataframe["purchase_frequency"] = cltv_df['total_transaction'] / dataframe.shape[0]
    repeat_rate = dataframe[dataframe["total_transaction"] > 1].shape[0] / dataframe.shape[0]
    churn_rate = 1 - repeat_rate
    dataframe['profit_margin'] = dataframe['total_price'] * 0.05
    dataframe['customer_value'] = dataframe['avg_order_value'] * dataframe["purchase_frequency"]
    dataframe['cltv_c'] = (dataframe['customer_value'] / churn_rate) * dataframe['profit_margin']

    return dataframe


cltv_c = cltv_c(cltv_df)