In [3]:
#Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import datetime as dt
import re 
import matplotlib.pyplot as plt 
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format',lambda x: '%.3f' % x)
df_ = pd.read_csv("flo_data_20k.csv")
df = df_.copy()

In [42]:
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from lifetimes.plotting import plot_period_transactions
from sklearn.preprocessing import MinMaxScaler

In [4]:
df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN]
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]"
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]"
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]"
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR]


In [5]:
def outlier_threshold(dataframe, variable):
    quartile1 = dataframe[variable].quantile(0.01)
    quartile3 = dataframe[variable].quantile(0.99)
    interquantile_range = quartile3-quartile1
    up_limit = quartile3 + 1.5 * interquantile_range
    low_limit = quartile1 - 1.5 * interquantile_range
    return low_limit, up_limit

def replace_with_threshold(dataframe,variable):
    low_limit, up_limit = outlier_threshold(dataframe,variable)
    dataframe.loc[(dataframe[variable] < low_limit),variable] = low_limit.round()
    dataframe.loc[(dataframe[variable] > up_limit),variable] = up_limit.round()


In [6]:
replace_with_threshold(df,"order_num_total_ever_online")
replace_with_threshold(df,"order_num_total_ever_offline")
replace_with_threshold(df,"customer_value_total_ever_offline")
replace_with_threshold(df,"customer_value_total_ever_online")

In [7]:
#Total for each customer number of purchases and spending.
#Total total_transaction = order_num_total_ever_online + order_num_total_ever_offline
#Total Price for each customer: (order_num_total_ever_online * customer_value_total_ever_online) + (order_num_total_ever_offline*customer_value_total_ever_offline)
df["total_price"] =  (df["order_num_total_ever_online"]*df["customer_value_total_ever_online"]) + (df["order_num_total_ever_offline"]*df["customer_value_total_ever_offline"])
df["total_transaction"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]

In [8]:
#Converting String types to date type.
df["first_order_date"] = pd.to_datetime(df["first_order_date"])
df["last_order_date"] = pd.to_datetime(df["last_order_date"])
df["last_order_date_online"] = pd.to_datetime(df["last_order_date_online"])
df["last_order_date_offline"] = pd.to_datetime(df["last_order_date_offline"])

In [9]:
today_date = dt.datetime(2021,6,1)

In [12]:
df["recency_cltv_weekly"] = (df["last_order_date"] - df["first_order_date"]) / dt.timedelta(weeks = 1)

In [14]:
df["T_weekly"] = (today_date - df["first_order_date"]) / dt.timedelta(weeks = 1)

In [23]:
df = df[(df["frequency"] > 1)]

In [24]:
df["monetary_cltv_avg"] = df["total_price"] / df["frequency"]

In [44]:
cltv_df = df.groupby("master_id").agg({"recency_cltv_weekly": lambda x: x,"T_weekly": lambda y: y,"frequency":lambda a: a,"monetary_cltv_avg":lambda z:z})

In [45]:
cltv_df

Unnamed: 0_level_0,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,78.571,80.000,5.000,395.866
00034aaa-a838-11e9-a2fc-000d3a38a36f,247.571,290.143,3.000,139.983
000be838-85df-11ea-a90b-000d3a38a36f,28.286,58.714,4.000,359.158
000c1fe2-a8b7-11ea-8479-000d3a38a36f,47.429,51.286,7.000,450.524
000f5e3e-9dde-11ea-80cd-000d3a38a36f,43.286,46.143,7.000,1028.816
...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,85.429,111.571,5.000,469.544
fff4736a-60a4-11ea-8dd8-000d3a38a36f,46.714,56.286,2.000,159.435
fffacd34-ae14-11e9-a2fc-000d3a38a36f,100.571,137.286,6.000,779.947
fffacecc-ddc3-11e9-a848-000d3a38a36f,69.714,88.571,9.000,703.510


In [43]:
bgf = BetaGeoFitter(penalizer_coef = 0.001)

In [46]:
bgf.fit(cltv_df['frequency'],
        cltv_df['recency_cltv_weekly'],
        cltv_df['T_weekly'])

  result = getattr(ufunc, method)(*inputs, **kwargs)


<lifetimes.BetaGeoFitter: fitted with 19945 subjects, a: 0.00, alpha: 76.17, b: 0.00, r: 3.66>

In [47]:
cltv_df["exp_sales_3_month"] = bgf.predict(4*3,cltv_df['frequency'],
                                    cltv_df['recency_cltv_weekly'],
                                    cltv_df['T_weekly'])

In [48]:
cltv_df["exp_sales_6_month"] = bgf.predict(4*6,cltv_df['frequency'],
                                    cltv_df['recency_cltv_weekly'],
                                    cltv_df['T_weekly'])

In [51]:
cltv_df

Unnamed: 0_level_0,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,78.571,80.000,5.000,395.866,0.666,1.331
00034aaa-a838-11e9-a2fc-000d3a38a36f,247.571,290.143,3.000,139.983,0.218,0.437
000be838-85df-11ea-a90b-000d3a38a36f,28.286,58.714,4.000,359.158,0.682,1.364
000c1fe2-a8b7-11ea-8479-000d3a38a36f,47.429,51.286,7.000,450.524,1.004,2.008
000f5e3e-9dde-11ea-80cd-000d3a38a36f,43.286,46.143,7.000,1028.816,1.046,2.092
...,...,...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,85.429,111.571,5.000,469.544,0.554,1.107
fff4736a-60a4-11ea-8dd8-000d3a38a36f,46.714,56.286,2.000,159.435,0.513,1.026
fffacd34-ae14-11e9-a2fc-000d3a38a36f,100.571,137.286,6.000,779.947,0.543,1.086
fffacecc-ddc3-11e9-a848-000d3a38a36f,69.714,88.571,9.000,703.510,0.922,1.845


In [52]:
ggf = GammaGammaFitter(penalizer_coef = 0.01)

ggf.fit(cltv_df['frequency'], cltv_df['monetary_cltv_avg'])

<lifetimes.GammaGammaFitter: fitted with 19945 subjects, p: 3.73, q: 0.33, v: 3.67>

In [54]:
cltv = ggf.customer_lifetime_value(bgf,
                                   cltv_df['frequency'],
                                   cltv_df['recency_cltv_weekly'],
                                   cltv_df['T_weekly'],
                                   cltv_df['monetary_cltv_avg'],
                                   time= 6, 
                                   freq="W",  
                                   discount_rate=0.01)

In [55]:
cltv.reset_index()

Unnamed: 0,master_id,clv
0,00016786-2f5a-11ea-bb80-000d3a38a36f,574.689
1,00034aaa-a838-11e9-a2fc-000d3a38a36f,68.807
2,000be838-85df-11ea-a90b-000d3a38a36f,539.403
3,000c1fe2-a8b7-11ea-8479-000d3a38a36f,975.304
4,000f5e3e-9dde-11ea-80cd-000d3a38a36f,2319.332
...,...,...
19940,fff1db94-afd9-11ea-b736-000d3a38a36f,566.857
19941,fff4736a-60a4-11ea-8dd8-000d3a38a36f,190.797
19942,fffacd34-ae14-11e9-a2fc-000d3a38a36f,917.330
19943,fffacecc-ddc3-11e9-a848-000d3a38a36f,1390.314


In [56]:
cltv_final = cltv_df.merge(cltv, on = "master_id", how = "left")

In [57]:
cltv_final

Unnamed: 0_level_0,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,clv
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,78.571,80.000,5.000,395.866,0.666,1.331,574.689
00034aaa-a838-11e9-a2fc-000d3a38a36f,247.571,290.143,3.000,139.983,0.218,0.437,68.807
000be838-85df-11ea-a90b-000d3a38a36f,28.286,58.714,4.000,359.158,0.682,1.364,539.403
000c1fe2-a8b7-11ea-8479-000d3a38a36f,47.429,51.286,7.000,450.524,1.004,2.008,975.304
000f5e3e-9dde-11ea-80cd-000d3a38a36f,43.286,46.143,7.000,1028.816,1.046,2.092,2319.332
...,...,...,...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,85.429,111.571,5.000,469.544,0.554,1.107,566.857
fff4736a-60a4-11ea-8dd8-000d3a38a36f,46.714,56.286,2.000,159.435,0.513,1.026,190.797
fffacd34-ae14-11e9-a2fc-000d3a38a36f,100.571,137.286,6.000,779.947,0.543,1.086,917.330
fffacecc-ddc3-11e9-a848-000d3a38a36f,69.714,88.571,9.000,703.510,0.922,1.845,1390.314


In [62]:
cltv_df["expected_average_profit"] = ggf.conditional_expected_average_profit(cltv_df['frequency'],cltv_df['monetary_cltv_avg'])

In [64]:
cltv = ggf.customer_lifetime_value(bgf,
                                   cltv_df['frequency'],
                                   cltv_df['recency_cltv_weekly'],
                                   cltv_df['T_weekly'],
                                   cltv_df['monetary_cltv_avg'],
                                   time= 6, 
                                   freq="W",  
                                   discount_rate=0.01)

In [66]:
cltv.reset_index()

Unnamed: 0,master_id,clv
0,00016786-2f5a-11ea-bb80-000d3a38a36f,574.689
1,00034aaa-a838-11e9-a2fc-000d3a38a36f,68.807
2,000be838-85df-11ea-a90b-000d3a38a36f,539.403
3,000c1fe2-a8b7-11ea-8479-000d3a38a36f,975.304
4,000f5e3e-9dde-11ea-80cd-000d3a38a36f,2319.332
...,...,...
19940,fff1db94-afd9-11ea-b736-000d3a38a36f,566.857
19941,fff4736a-60a4-11ea-8dd8-000d3a38a36f,190.797
19942,fffacd34-ae14-11e9-a2fc-000d3a38a36f,917.330
19943,fffacecc-ddc3-11e9-a848-000d3a38a36f,1390.314


In [67]:
cltv_final = cltv_df.merge(cltv, on = "master_id", how = "left")

In [68]:
cltv_final

Unnamed: 0_level_0,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,expected_average_profit,clv
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,78.571,80.000,5.000,395.866,0.666,1.331,411.406,574.689
00034aaa-a838-11e9-a2fc-000d3a38a36f,247.571,290.143,3.000,139.983,0.218,0.437,150.216,68.807
000be838-85df-11ea-a90b-000d3a38a36f,28.286,58.714,4.000,359.158,0.682,1.364,377.036,539.403
000c1fe2-a8b7-11ea-8479-000d3a38a36f,47.429,51.286,7.000,450.524,1.004,2.008,462.949,975.304
000f5e3e-9dde-11ea-80cd-000d3a38a36f,43.286,46.143,7.000,1028.816,1.046,2.092,1056.500,2319.332
...,...,...,...,...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,85.429,111.571,5.000,469.544,0.554,1.107,487.835,566.857
fff4736a-60a4-11ea-8dd8-000d3a38a36f,46.714,56.286,2.000,159.435,0.513,1.026,177.213,190.797
fffacd34-ae14-11e9-a2fc-000d3a38a36f,100.571,137.286,6.000,779.947,0.543,1.086,804.692,917.330
fffacecc-ddc3-11e9-a848-000d3a38a36f,69.714,88.571,9.000,703.510,0.922,1.845,718.279,1390.314


In [69]:
cltv_final["segment"] = pd.qcut(cltv_final["clv"], 4, labels = ["D","C","B","A"])

In [70]:
cltv_final

Unnamed: 0_level_0,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3_month,exp_sales_6_month,expected_average_profit,clv,segment
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,78.571,80.000,5.000,395.866,0.666,1.331,411.406,574.689,B
00034aaa-a838-11e9-a2fc-000d3a38a36f,247.571,290.143,3.000,139.983,0.218,0.437,150.216,68.807,D
000be838-85df-11ea-a90b-000d3a38a36f,28.286,58.714,4.000,359.158,0.682,1.364,377.036,539.403,B
000c1fe2-a8b7-11ea-8479-000d3a38a36f,47.429,51.286,7.000,450.524,1.004,2.008,462.949,975.304,A
000f5e3e-9dde-11ea-80cd-000d3a38a36f,43.286,46.143,7.000,1028.816,1.046,2.092,1056.500,2319.332,A
...,...,...,...,...,...,...,...,...,...
fff1db94-afd9-11ea-b736-000d3a38a36f,85.429,111.571,5.000,469.544,0.554,1.107,487.835,566.857,B
fff4736a-60a4-11ea-8dd8-000d3a38a36f,46.714,56.286,2.000,159.435,0.513,1.026,177.213,190.797,C
fffacd34-ae14-11e9-a2fc-000d3a38a36f,100.571,137.286,6.000,779.947,0.543,1.086,804.692,917.330,A
fffacecc-ddc3-11e9-a848-000d3a38a36f,69.714,88.571,9.000,703.510,0.922,1.845,718.279,1390.314,A


In [71]:
#Yönetime Öneri: 
#Tavsiye verilecek segmentler A ve D 

# A grubu segmentin CLV değerleri oldukça yüksektir. Müşterilere satın alma alışkanlıkları yöneliminde kampanya ve indirimler sağlanmalı, böylece toplam 
#getiri arttırılma yoluna gidilmeli. 

#D Segment grubu, CLV değerlerinin en düşük olduğu grup. Satın alma alışkanlıkları üzerinde etkili olan faktörler incelenip, müşterinin satın alım 
#frekansını arttırmaya ve birim bazda pahalı ürünler almaya yönlendirecek kampanya ve indirimler sağlanmalı.