In [1]:
# Copyright (c) Meta Platforms, Inc. and affiliates.

# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

In [2]:
# pip install kaleido

In [3]:
from src import LTVSyntheticData
from src import LTVexploratory
from src.graph import save_plot

### Generate synthetic data 

In [4]:
synth_data_gen = LTVSyntheticData(n_users=20000, random_seed=42)
customer_table = synth_data_gen.get_customers_data()
event_table = synth_data_gen.get_events_data()

# Call the exploratory class
da = LTVexploratory(
    customer_table, 
    event_table,
    registration_time_col='registration_date',
    event_time_col='event_date',
    event_name_col='event_name',
    value_col='value'
    )

In [None]:
# The ‘customer’ table is a user-level table that defines the Day0 of each user who has engaged with 
# the business, marking the initial point of interaction or anchor event, 
# such as installing the App or making a first purchase.

customer_table.head()

In [None]:
# The ‘event’ table is a transaction-level documentation of all revenue-generating events completed 
# for the advertising unit.

event_table.head()

### Run analysis

In [7]:
# The plot_customers_intersection function shows how much percent of customers are actually purchasers 
# and whether there are customers who are present in the customer table but not present in the events table, 
# and therefore need to be excluded from following analysis.


In [None]:
# Intersection between users in the two datasets
fig, data = da.plot_customers_intersection()
save_plot(fig, "images/customer_intersection.png")


# From the demo data output we can see that: 
# Upper right: 95.6% of customers are not generating any revenue 
# Lower right: 4.4% of all customers are revenue-generating customers or purchasers 
# Lower left: 0.0% means everyone in the customer table are also in the events table, therefore, 
# there’s no need to exclude any customers from following analysis.
# Upper left: should always be 0% 0.0%


In [None]:
# The “plot_paying_customers_flow” function provides further insights into purchasers’ buying behavior overtime, 
# showing how low, medium and high purchasers flow to the same or different classes at a later point in time. 
# This function has two input parameters: 
# early_limit: This parameter sets the time stamp on the left axis, which shows the cumulative value of a customer by that early point in time, 
# categorized into equally sized and ranked groups: No spend, low spend, medium spend and high spend; It’s set to 7 days by default because most digital campaigns have a 7-day optimization window.
# days_limit: This parameter sets the time stamp on the right axis, which shows the cumulative value of the same customer at a later point in time, 
# again categorized into equally sized and ranked groups: low spend, medium spend and high spend; 
# feel free to play with this parameter and experiment with different future timestamps (e.g. 120 days, 180 days, 365 days etc.), 
# to gain a more nuanced exploration of your customers’ purchasing behavior across different time frames. 

# ---------------------------------------------- #### ---------------------------------------------------
# Please note that this visualization includes ALL purchasers defined by the days_limit parameter. 
# In this example, early_limit is set to 7 days and days_limit is set to 60 days, which means this visualization includes ALL purchasers up to day 60 from initial interaction. 


fig, data = da.plot_paying_customers_flow(days_limit=60, early_limit=7, spending_breaks={}, end_spending_breaks={})
save_plot(fig, "images/paying_customer_flow.png", dpi=400) # you can increase the dpi to get a higher resolution
fig

In [None]:
data

In [11]:
# The plot_revenue_pareto function visualizes if a significant portion of revenue was contributed by 
# a small group of purchasers.
# Similar to plot_purchases_distribution, the plot_revenue_pareto function uses the same days_limit 
# parameter to generate the output and it operates on the same customer cohorts as 
# the plot_purchases_distribution function. 
# days_limit: Defines the minimum duration since a customers’ initial interaction, for them 
# to be included in this visualization. In this example, the limit is set at 60 days.

In [None]:
# Visualize how the revenue is concentrated by looking at % of revenue that the highest spending customers contribute
fig, data = da.plot_revenue_pareto(days_limit=60)
save_plot(fig, "images/revenue_pareto.png")
fig


# From the demo data output we can see that:  
# The top 5% highest spending customers contributed to 69% of total revenue
# The top 10% contributed to 75% of total revenue
# The top 20% contributed to more than 84% of total revenue


In [13]:
# Purchase Frequency (or just total purchases) in the first N days of a customer

# The plot_purchases_distribution function visualizes the purchase frequency among all purchasers. 
# This function has two input parameters: 
# days_limit: This parameter defines the minimum duration since a customers’ initial interaction, 
# for them to be included in this visualization. In this example, the time frame is set at 60 days, 
# but you can change it to a time frame that makes most sense for your business. 
# (e.g. 30 days, 120 days or 365 days etc.)


# truncate_share: This parameter defines the percentage of all purchasers that are shown in this visualization, or the percentage of ‘outliers’ that are excluded from this visualization, to make this histogram easier to read. In this example, the truncate_share is set to 0.999, which means that the top 0.1% of the highest spenders were excluded from this visualization. 


In [None]:

fig, data = da.plot_purchases_distribution(days_limit=60, truncate_share=0.999)
save_plot(fig, "images/purchases_distribution.png")
fig

# From the demo data output, we can see that:
# 39% of all purchasers have purchased only once 
# 29% purchased twice
# 14% purchased three times

In [15]:
# The plot_customers_histogram_per_conversion_day function visualizes the duration between the initial 
# interaction and the first purchase. 
# This function also uses the days_limit parameter

In [None]:
# Plot when the customers convert (i.e. generate a revenue event), 
# showing how many customers are actually captured by the optimization window of the marketing campaign
fig, data = da.plot_customers_histogram_per_conversion_day(days_limit=60)
save_plot(fig, "images/customers_histogram_per_conversion_day.png")
fig

# From the demo data output we can see that: 
# 55% of first-time purchases happened within 7 days of the initial interaction
# Since the remaining 45% of first purchases happen beyond the 7-day optimization window, 
# it means that the current digital customer acquisition campaign is missing out on 45% of the purchases 
# that happen outside the 7-day optimization window

In [17]:
# The Plot_early_late_revenue_correlation function demonstrates the correlation between short-term and long-term purchase values across various timeframes. 
# This function also uses the days_limit parameter:

In [None]:
# Show the correlation of the revenue in the first N days of a customer (in this case 7) with the revenye up to M days (e.g. 70)
# the less correlated they are, the less reliable is the early revenue as an optimization metric for a marketing campaign
fig, data = da.plot_early_late_revenue_correlation(days_limit=70)
save_plot(fig, "images/early_late_revenue_correlation.jpeg")
# fig

# From the demo data output we can see that: 
# There is high correlation in early time frames. For example, the correlation between day-7 
# revenue and day-10 revenue is a robust 95%
# However, as time progresses, the correlation between day-7 revenue and future revenue weakens significantly. 
# By day-22, this correlation has already dropped below 40%. This suggests that day-7 revenue is not a 
# reliable indicator for revenue on day-22 and beyond.
# This diminishing correlation between early and later revenue is a crucial indicator of the potential 
# value a pLTV strategy could bring to a business.


In [None]:
# If spending breaks is empty, it will find default values, you can specify your own groups in the format Dict[str, float],
# e.g. {'No spend': 0, 'Low spend': 10, 'Medium spend': 100, 'High spend': 1000}
# if you are a mobile/gaming company, use True for is_mobile or False if you are eCommerce
data = da.estimate_ltv_impact(
    days_limit=60,
    early_limit=7, 
    spending_breaks={},
    is_mobile=False)
data