<a href="https://colab.research.google.com/github/inbalv/tictactoe/blob/master/life_time.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install lifetime

In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta
from lifetimes import BetaGeoFitter, ParetoNBDFitter, GammaGammaFitter
from lifetimes.utils import summary_data_from_transaction_data

# ------------------------------------------------------------------------------
# 1. Load or Simulate Transaction Data
# ------------------------------------------------------------------------------

# If you have a CSV file with your transactions, load it (make sure it includes at least three columns:
# 'customer_id', 'date', and 'amount'). For example:
#
# data = pd.read_csv("transactions.csv", parse_dates=["date"])
#
# For this demonstration, we simulate some transaction data.

np.random.seed(42)

# simulate 200 customers over 100 days
n_customers = 200
date_range = pd.date_range(start="2023-01-01", periods=100, freq='D')
simulated_data = []

for cust in range(1, n_customers + 1):
    # each customer makes between 0 and 10 transactions randomly
    n_transactions = np.random.poisson(lam=2)
    if n_transactions > 0:
        transaction_dates = np.random.choice(date_range, size=n_transactions)
        for t in transaction_dates:
            # simulate an amount between $5 and $50
            amount = np.random.uniform(5, 50)
            simulated_data.append({"customer_id": cust, "date": t, "amount": amount})

data = pd.DataFrame(simulated_data)
data['date'] = pd.to_datetime(data['date'])

print("Head of the simulated transaction data:")
print(data.head())

# ------------------------------------------------------------------------------
# 2. Create RFM Summary Data
# ------------------------------------------------------------------------------

# summary_data_from_transaction_data aggregates raw transactions into the following:
# frequency: number of repeat purchases (must be >= 0; note that frequency==0 for customers with a single purchase)
# recency: time between the first and the last purchase in days
# T: time between the first purchase and the end of the observation period, in days
# monetary_value: average transaction value

summary = summary_data_from_transaction_data(data,
                                               customer_id_col='customer_id',
                                               datetime_col='date',
                                               monetary_value_col='amount',
                                               freq='D')

print("\nSummary data (first five rows):")
print(summary.head())

# ------------------------------------------------------------------------------
# 3. Fit the BG/NBD Model to Predict Future Purchase Frequency
# ------------------------------------------------------------------------------

# BG/NBD (Beta-Geometric/Negative Binomial Distribution) is popular for non-contractual settings.
bgf = BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])

# Optionally, if you prefer using the Pareto/NBD model, uncomment these lines:
# pnbd = ParetoNBDFitter(penalizer_coef=0.0)
# pnbd.fit(summary['frequency'], summary['recency'], summary['T'])

# Let's predict the expected number of transactions in the next 30 days for each customer.
time_horizon = 30
summary['predicted_purchases_30'] = bgf.conditional_expected_number_of_purchases_up_to_time(
    time_horizon, summary['frequency'], summary['recency'], summary['T'])

print("\nPredicted number of purchases in the next 30 days:")
print(summary[['predicted_purchases_30']].head())

# ------------------------------------------------------------------------------
# 4. Fit the Gamma-Gamma Model to Predict Monetary Value per Transaction
# ------------------------------------------------------------------------------

# The Gamma-Gamma model estimates the average monetary value for future transactions.
# It should be fit on customers with at least one repeat purchase (frequency > 0).
# Here we filter the summary data accordingly.
summary_positive = summary[summary['frequency'] > 0]

ggf = GammaGammaFitter(penalizer_coef=0.0)
ggf.fit(summary_positive['frequency'], summary_positive['monetary_value'])

# For those customers, predict the expected average monetary value.
summary_positive['predicted_avg_value'] = ggf.conditional_expected_average_profit(
    summary_positive['frequency'], summary_positive['monetary_value'])

print("\nPredicted average value per transaction:")
print(summary_positive[['predicted_avg_value']].head())

# ------------------------------------------------------------------------------
# 5. Calculate Customer Lifetime Value (CLV)
# ------------------------------------------------------------------------------

# Finally, we can estimate the CLV. The lifetimes library provides a helper method
# for calculating CLV from the fitted models.
# Note: You need to provide a discount rate per period. For example, discount_rate=0.01
# per day might be used here. You can adjust 'time' to your desired forecast period, e.g., 30 days.

discount_rate = 0.01  # daily discount rate
summary_positive['ltv_30'] = ggf.customer_lifetime_value(
    bgf,  # or use pnbd if using ParetoNBDFitter instead
    summary_positive['frequency'],
    summary_positive['recency'],
    summary_positive['T'],
    summary_positive['monetary_value'],
    time=time_horizon,  # time horizon for prediction (in days)
    discount_rate=discount_rate  # discount rate per time period
)

print("\nEstimated 30-day CLV (first few rows):")
print(summary_positive[['ltv_30']].head())

# ------------------------------------------------------------------------------
# Optional: Save the summary with predicted LTVs
# ------------------------------------------------------------------------------

summary_positive.to_csv("customer_ltv_estimates.csv")
