# BG-NBD ve Gamma-Gamma ile CLTV Prediction

---

**İş Problemi (Business Problem)**

FLO satış ve pazarlama faaliyetleri için roadmap belirlemek istemektedir. Şirketin orta uzun vadeli plan yapabilmesi için var olan müşterilerin gelecekte şirkete sağlayacakları potansiyel değerin tahmin edilmesi gerekmektedir.


**Veri Seti Hikayesi**

Veri seti son alışverişlerini 2020 - 2021 yıllarında OmniChannel(hem online hem offline alışveriş yapan) olarak yapan müşterilerin geçmiş alışveriş davranışlarından elde edilen bilgilerden oluşmaktadır.

In [None]:
# Değişkenler;

# master_id: Eşsiz müşteri numarası
# order_channel : Alışveriş yapılan platforma ait hangi kanalın kullanıldığı (Android, ios, Desktop, Mobile, Offline)
# last_order_channel : En son alışverişin yapıldığı kanal
# first_order_date : Müşterinin yaptığı ilk alışveriş tarihi
# last_order_date : Müşterinin yaptığı son alışveriş tarihi
# last_order_date_online : Muşterinin online platformda yaptığı son alışveriş tarihi
# last_order_date_offline : Muşterinin offline platformda yaptığı son alışveriş tarihi
# order_num_total_ever_online : Müşterinin online platformda yaptığı toplam alışveriş sayısı
# order_num_total_ever_offline : Müşterinin offline'da yaptığı toplam alışveriş sayısı
# customer_value_total_ever_offline : Müşterinin offline alışverişlerinde ödediği toplam ücret
# customer_value_total_ever_online : Müşterinin online alışverişlerinde ödediği toplam ücret
# interested_in_categories_12 : Müşterinin son 12 ayda alışveriş yaptığı kategorilerin listesi

---------

## Veriyi Hazırlama

In [2]:
# kütüphanelerin import işlemlerini yapalım;
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from lifetimes import BetaGeoFitter
from lifetimes import GammaGammaFitter
from lifetimes.plotting import plot_period_transactions

# pandas görüntü ayarlarını yapalım;
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [3]:
# veri setini dahil edelim;
df_ = pd.read_csv(r"C:\Users\kkakt\Desktop\Flo\dataset\flo_data_20k.csv")
df = df_.copy()

df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN]
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]"
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]"
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]"
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR]


In [4]:
# aykırı değerleri belirlemek ve baskılamak için fonksiyonlarımızı hazırlayalım;

#aykırı değer belirlemek fonksiyonu oluşturalım;
def outlier_thresholds(dataframe,columns):
    Q1 = dataframe[columns].quantile(0.01)
    Q3 = dataframe[columns].quantile(0.99)
    QIR = Q3-Q1
    outlier_max = Q3 + 1.5*QIR
    outlier_min = Q1 - 1.5*QIR
    return (outlier_max,outlier_min)

#aykırı değer baskılama fonksiyonu oluşturalım;
def replace_with_thresholds(dataframe,columns):
    qmax,qmin = outlier_thresholds(dataframe,columns)
    
    dataframe.loc [(dataframe[columns]>qmax),columns] = round(qmax)
    dataframe.loc [(dataframe[columns]<qmin),columns] = round(qmin)
    
    

In [5]:
# farklı değişkenlere baskılama işlemlerini uygulayalım;
replace_with_thresholds(df,"order_num_total_ever_online")
replace_with_thresholds(df,"order_num_total_ever_offline")
replace_with_thresholds(df,"customer_value_total_ever_offline")
replace_with_thresholds(df,"customer_value_total_ever_online")


In [6]:
# toplam işlem sayısı ve toplam ücreti değişkenlere atayalım;
df["total_transactions"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]
df["total_price"] = df["customer_value_total_ever_online"] + df["customer_value_total_ever_offline"]

df.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,total_transactions,total_price
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98


In [7]:
# değişken tiplerini inceleyelim;

df.dtypes

master_id                             object
order_channel                         object
last_order_channel                    object
first_order_date                      object
last_order_date                       object
last_order_date_online                object
last_order_date_offline               object
order_num_total_ever_online          float64
order_num_total_ever_offline         float64
customer_value_total_ever_offline    float64
customer_value_total_ever_online     float64
interested_in_categories_12           object
total_transactions                   float64
total_price                          float64
dtype: object

In [8]:
# tarih ifade eden değişkenleri 'date'e çevirelim

data_columns = df.columns[df.columns.str.contains("date")]
df[data_columns] = df[data_columns].apply(pd.to_datetime)

df.dtypes

master_id                                    object
order_channel                                object
last_order_channel                           object
first_order_date                     datetime64[ns]
last_order_date                      datetime64[ns]
last_order_date_online               datetime64[ns]
last_order_date_offline              datetime64[ns]
order_num_total_ever_online                 float64
order_num_total_ever_offline                float64
customer_value_total_ever_offline           float64
customer_value_total_ever_online            float64
interested_in_categories_12                  object
total_transactions                          float64
total_price                                 float64
dtype: object

-------

## CLTV Veri Yapısını Oluşturulması

In [9]:
# analiz tarihini oluşturalım;
today_date = df["last_order_date"].max()
today_date = today_date + dt.timedelta(days=2)

today_date

Timestamp('2021-06-01 00:00:00')

In [10]:
# metriklerin oluşturulması
#recency,T,frequency,monetary

cltv = pd.DataFrame()

cltv["customer_id"] = df["master_id"]
cltv["recency_cltv_weekly"] = (df["last_order_date"] - df["first_order_date"]).astype("timedelta64[W]")
cltv["T_weekly"] = (today_date - df["first_order_date"]).astype("timedelta64[W]")
cltv["frequency"] = df["total_transactions"]
cltv["monetary_cltv_avg"] = df["total_price"] / df["total_transactions"]

cltv.head()

Unnamed: 0,customer_id,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.0,5.0,187.874
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.0,224.0,21.0,95.883333
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.0,78.0,5.0,117.064
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.0,20.0,2.0,60.985
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.0,95.0,2.0,104.99


-----------

## BG/NBD, Gamma-Gamma Kurulması ve CLTV'nin Hesaplanması

In [11]:
#BG/NBD Modelinin Fit edilmesi
bgf = BetaGeoFitter(penalizer_coef=0.001)
bgf.fit(cltv["frequency"],cltv["recency_cltv_weekly"],cltv["T_weekly"])
cltv["exp_sales_3"] = bgf.predict(12,cltv["frequency"],cltv["recency_cltv_weekly"],cltv["T_weekly"])
cltv["exp_sales_6"] = bgf.predict(24,cltv["frequency"],cltv["recency_cltv_weekly"],cltv["T_weekly"])

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [12]:
#Gamma Gamma Modelinin Fit edilmesi;

ggf = GammaGammaFitter(penalizer_coef=0.01)
ggf.fit(cltv["frequency"],cltv["monetary_cltv_avg"])
cltv["exp_average_value"] = ggf.conditional_expected_average_profit(cltv["frequency"],cltv["monetary_cltv_avg"])
cltv.head()

Unnamed: 0,customer_id,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3,exp_sales_6,exp_average_value
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.0,5.0,187.874,0.983894,1.967788,193.632679
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.0,224.0,21.0,95.883333,0.987649,1.975298,96.665048
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.0,78.0,5.0,117.064,0.676212,1.352425,120.967619
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.0,20.0,2.0,60.985,0.709935,1.419871,67.320145
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.0,95.0,2.0,104.99,0.397634,0.795268,114.325108


In [13]:
# 6 aylık CLTV değeri hesaplayalım;

cltv["clv"] = ggf.customer_lifetime_value(bgf,cltv["frequency"],cltv["recency_cltv_weekly"],cltv["T_weekly"],cltv["monetary_cltv_avg"],time=6,freq="W",discount_rate=0.01)

In [14]:
# Cltv değeri en yüksek 20 kişi gözlemleyelim;
cltv.sort_values("clv",ascending=False).head(20)

Unnamed: 0,customer_id,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3,exp_sales_6,exp_average_value,clv
9055,47a642fe-975b-11eb-8c2a-000d3a38a36f,2.0,7.0,4.0,1401.8,1.112748,2.225496,1449.060468,3383.616305
13880,7137a5c0-7aad-11ea-8f20-000d3a38a36f,6.0,13.0,11.0,758.085455,1.986531,3.973063,767.360602,3198.840545
17323,f59053e2-a503-11e9-a2fc-000d3a38a36f,51.0,101.0,7.0,1106.467143,0.724073,1.448147,1127.611525,1713.324813
12438,625f40a2-5bd2-11ea-98b0-000d3a38a36f,74.0,74.0,16.0,501.87375,1.577291,3.154583,506.166665,1675.340842
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,62.0,67.0,52.0,166.224615,4.686508,9.373017,166.712253,1639.51203
8868,9ce6e520-89b0-11ea-a6e7-000d3a38a36f,3.0,34.0,8.0,601.22625,1.276739,2.553478,611.492616,1638.290804
6402,851de3b4-8f0c-11eb-8cb8-000d3a38a36f,8.0,9.0,2.0,862.69,0.802361,1.604722,923.679965,1555.209722
6666,53fe00d4-7b7a-11eb-960b-000d3a38a36f,9.0,13.0,17.0,259.865294,2.800157,5.600314,262.072907,1539.934181
19538,55d54d9e-8ac7-11ea-8ec0-000d3a38a36f,52.0,58.0,31.0,228.53,3.114728,6.229455,229.606946,1500.73075
14858,031b2954-6d28-11eb-99c4-000d3a38a36f,14.0,15.0,3.0,743.586667,0.881769,1.763537,778.05037,1439.660456


In [15]:
# Müşterileri segmentlere ayırmak

cltv["segment"] = pd.qcut(cltv["clv"],4,["D",'C','B','A'])
cltv

Unnamed: 0,customer_id,recency_cltv_weekly,T_weekly,frequency,monetary_cltv_avg,exp_sales_3,exp_sales_6,exp_average_value,clv,segment
0,cc294636-19f0-11eb-8d74-000d3a38a36f,17.0,30.0,5.0,187.874000,0.983894,1.967788,193.632679,399.783245,A
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,209.0,224.0,21.0,95.883333,0.987649,1.975298,96.665048,200.340943,B
2,69b69676-1a40-11ea-941b-000d3a38a36f,52.0,78.0,5.0,117.064000,0.676212,1.352425,120.967619,171.652401,B
3,1854e56c-491f-11eb-806e-000d3a38a36f,1.0,20.0,2.0,60.985000,0.709935,1.419871,67.320145,100.290907,D
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,83.0,95.0,2.0,104.990000,0.397634,0.795268,114.325108,95.394366,D
...,...,...,...,...,...,...,...,...,...,...
19940,727e2b6e-ddd4-11e9-a848-000d3a38a36f,41.0,88.0,3.0,133.986667,0.488056,0.976113,141.360373,144.775624,C
19941,25cd53d4-61bf-11ea-8dd8-000d3a38a36f,42.0,65.0,2.0,195.235000,0.482542,0.965085,210.722402,213.375123,B
19942,8aea4c2a-d6fc-11e9-93bc-000d3a38a36f,88.0,89.0,3.0,210.980000,0.485089,0.970179,221.775211,225.752465,B
19943,e50bb46c-ff30-11e9-a5e8-000d3a38a36f,98.0,113.0,6.0,168.295000,0.614314,1.228628,172.647458,222.560510,B


In [76]:
# tüm sürecin fonksiyonlaştırılması

def create_cltv(dataframe,month=3,csv=False):
    #veri hazırlığı
    dataframe.dropna(inplace=True)
    dataframe["order_num_total"] =  dataframe["order_num_total_ever_online"] + dataframe["order_num_total_ever_offline"]
    dataframe["customer_value_total"] = dataframe["customer_value_total_ever_online"] + dataframe["customer_value_total_ever_offline"]
    
    #aykırı değerleri baskılamak
    columns = ["order_num_total_ever_online", "order_num_total_ever_offline", "customer_value_total_ever_offline","customer_value_total_ever_online"]
    for col in columns:
        replace_with_thresholds(dataframe,col)
    
    #tarih değişkenlerinin tiplerini değiştirdik.
    date_columns = dataframe.columns[dataframe.columns.str.contains("date")]
    dataframe[date_columns] = dataframe[date_columns].apply(pd.to_datetime)
    
    #analiz tarihini oluşturduk
    today_date = dataframe["last_order_date"].max() + dt.timedelta(days=2)
    
    #metriklerin oluşturulması
    cltv = pd.DataFrame()
    cltv["frequency"] = dataframe["order_num_total"]
    cltv["recency_week"] = (dataframe["last_order_date"] - dataframe["first_order_date"] ).astype("timedelta64[W]")
    cltv["T_week"] = (today_date - dataframe["first_order_date"] ).astype("timedelta64[W]")
    cltv["monetary_avg"] = dataframe["customer_value_total"] / dataframe["order_num_total"]
    
    #BG-NBD Methodu Fit edilesi
    bgf = BetaGeoFitter(penalizer_coef=0.001)
    bgf.fit(cltv["frequency"],cltv["recency_week"],cltv["T_week"])
    cltv[f"exp_sales_{month}"] = bgf.predict(month*4,cltv["frequency"],cltv["recency_week"],cltv["T_week"])
    
    #GammaGamme Methodu Fit edilmesi
    ggf = GammaGammaFitter(penalizer_coef=0.01)
    ggf.fit(cltv["frequency"],cltv["monetary_avg"])
    cltv["exp_average_value"] = ggf.conditional_expected_average_profit(cltv["frequency"],cltv["monetary_avg"])
    
    # Customers Lifetimes Value
    cltv["clv"] = ggf.customer_lifetime_value(bgf,cltv["frequency"],cltv["recency_week"],cltv["T_week"],cltv["monetary_avg"],
                                              time=month,freq="W",discount_rate=0.01)
    
    # müşterilerin CLTV değerine göre segmentasyon edilmesi
    cltv["segment"] = pd.qcut(cltv["clv"],4,labels=("D","C","B","A"))
    
    if csv:
        cltv.to_csv("cltv.csv",index=False)
    
    return cltv

In [79]:
create_cltv(df)

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,frequency,recency_week,T_week,monetary_avg,exp_sales_3,exp_average_value,clv,segment
0,5.0,17.0,30.0,187.874000,0.983894,193.632679,202.874882,A
1,21.0,209.0,224.0,95.883333,0.987649,96.665048,101.665455,B
2,5.0,52.0,78.0,117.064000,0.676212,120.967619,87.107104,B
3,2.0,1.0,20.0,60.985000,0.709935,67.320145,50.893844,D
4,2.0,83.0,95.0,104.990000,0.397634,114.325108,48.409034,D
...,...,...,...,...,...,...,...,...
19940,3.0,41.0,88.0,133.986667,0.488056,141.360373,73.468156,C
19941,2.0,42.0,65.0,195.235000,0.482542,210.722402,108.279808,B
19942,3.0,88.0,89.0,210.980000,0.485089,221.775211,114.560841,B
19943,6.0,98.0,113.0,168.295000,0.614314,172.647458,112.941045,B
