<a href="https://colab.research.google.com/github/dezineskv/shareUI/blob/main/Customer_Lifetime_Value_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import lifetimes

import pandas as pd
import numpy as np
import datetime as dt

import matplotlib.pyplot as plt
import seaborn as sns

from lifetimes import BetaGeoFitter, GammaGammaFitter
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('online_retail.csv')

data.head()
data.info()
data.describe()
data = data[data['Quantity'] > 0 ]
data = data[data['UnitPrice'] > 0]
data = data[~data['InvoiceNo'].str.contains("C",na=False)]
# Removing missing values from the data
data.dropna(inplace=True)
# Defining a function to remove outliers .
def cap_outliers(dataframe, variable, q1=0.05, q2=0.95):
    lower_bound = dataframe[variable].quantile(q1)
    upper_bound = dataframe[variable].quantile(q2)
    dataframe[variable] = np.clip(dataframe[variable], lower_bound, upper_bound)

# Calling cap_outliers for UnitPrice and Quantity
cap_outliers(data,'UnitPrice')
cap_outliers(data,'Quantity')
data.describe()

data['Total Price'] = data['UnitPrice'] * data['Quantity']
RFM = lifetimes.utils.summary_data_from_transaction_data(data,'CustomerID','InvoiceDate','Total Price',observation_period_end='2011-12-09')

RFM.head()

# we want only customers shopped more than 2 times
RFM = RFM[RFM['frequency']>1]
RFM.head()

bgf = BetaGeoFitter(penalizer_coef=0.0)  # Create a BG/NBD model instance
bgf.fit(RFM['frequency'], RFM['recency'], RFM['T'])  # Fit the model with your data

# Predict future customer transactions (let's say for the next 6 months)
predicted_purchases = bgf.predict(6, RFM['frequency'], RFM['recency'], RFM['T'])

# Display the predicted purchases for each customer
RFM['predicted_purchases'] = predicted_purchases
print(RFM[['frequency', 'recency', 'T', 'predicted_purchases']].head())

# Fit the Gamma-Gamma model to your data (assuming you have 'monetary_value' column)
ggf = GammaGammaFitter(penalizer_coef=0.0)  # Adjust the penalizer_coef if needed
ggf.fit(RFM['frequency'], RFM['monetary_value'])

# Predict the expected monetary value for each transaction
predicted_monetary_value = ggf.conditional_expected_average_profit(RFM['frequency'], RFM['monetary_value'])

# Display the predicted monetary value for each transaction
RFM['predicted_monetary_value'] = predicted_monetary_value
print(RFM[['frequency', 'monetary_value', 'predicted_monetary_value']].head())

# Calculate the CLV prediction for each customer
RFM['predicted_CLV'] = RFM['predicted_purchases'] * RFM['predicted_monetary_value']

# Display the predicted CLV for each customer
print(RFM[['predicted_CLV']].head())

# Segment customers into quantiles based on predicted CLV
RFM['Segment'] = pd.qcut(RFM['predicted_CLV'], q=4, labels=['Hibernating', 'Need Attention', 'Loyal Customers', 'Champions'])

# Display the CLV quantile for each customer
RFM.head()

RFM.groupby('Segment').mean()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   InvoiceNo    541909 non-null  object 
 1   StockCode    541909 non-null  object 
 2   Description  540455 non-null  object 
 3   Quantity     541909 non-null  int64  
 4   InvoiceDate  541909 non-null  object 
 5   UnitPrice    541909 non-null  float64
 6   CustomerID   406829 non-null  float64
 7   Country      541909 non-null  object 
dtypes: float64(2), int64(1), object(5)
memory usage: 33.1+ MB
            frequency  recency      T  predicted_purchases
CustomerID                                                
12347.0           6.0    365.0  367.0             0.105275
12348.0           3.0    283.0  358.0             0.069025
12352.0           6.0    260.0  296.0             0.123399
12356.0           2.0    303.0  325.0             0.060655
12359.0           3.0    274.0  331.0      

Unnamed: 0_level_0,frequency,recency,T,monetary_value,predicted_purchases,predicted_monetary_value,predicted_CLV
Segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Hibernating,2.887265,236.34238,314.776618,184.460644,0.0741,272.986569,19.634967
Need Attention,3.995825,240.048017,282.870564,288.097725,0.097324,315.073218,28.962636
Loyal Customers,5.087683,224.202505,255.139875,387.433829,0.124657,368.076314,42.133573
Champions,12.14405,265.471816,279.501044,565.476698,0.215625,501.891978,104.77734
