In [None]:
# Copyright (c) Meta Platforms, Inc. and affiliates.

# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

In [None]:
from src import LTVSyntheticData
from src import LTVexploratory

### Generate synthetic data 

In [None]:
synth_data_gen = LTVSyntheticData(n_users=20000, random_seed=42)
customer_table = synth_data_gen.get_customers_data()
event_table = synth_data_gen.get_events_data()

# Call the exploratory class
da = LTVexploratory(
    customer_table, 
    event_table,
    registration_time_col='registration_date',
    event_time_col='event_date',
    event_name_col='event_name',
    value_col='value'
    )

In [None]:
customer_table.head()

In [None]:
event_table.head()

### Run analysis

In [None]:
# Intersection between users in the two datasets
fig, data = da.plot_customers_intersection()
fig

In [None]:
# Paying customers flow from a classification at the beginning to another classification after a longer period.
# If spending breaks is empty, it will find default values, you can specify your own groups in the format Dict[str, float],
# e.g. {'No spend': 0, 'Low spend': 10, 'Medium spend': 100, 'High spend': 1000}
fig, data = da.plot_paying_customers_flow(days_limit=60, early_limit=7, spending_breaks={}, end_spending_breaks={})
fig

In [None]:
data

In [None]:
# Visualize how the revenue is concentrated by looking at % of revenue that the highest spending customers contribute
fig, data = da.plot_revenue_pareto(days_limit=60)

In [None]:
# Purchase Frequency (or just total purchases) in the first N days of a customer
fig, data = da.plot_purchases_distribution(days_limit=60, truncate_share=0.999)
fig


In [None]:
# Plot when the customers convert (i.e. generate a revenue event), 
# showing how many customers are actually captured by the optimization window of the marketing campaign
fig, data = da.plot_customers_histogram_per_conversion_day(days_limit=60)
fig

In [None]:
# Show the correlation of the revenue in the first N days of a customer (in this case 7) with the revenye up to M days (e.g. 70)
# the less correlated they are, the less reliable is the early revenue as an optimization metric for a marketing campaign
fig, data = da.plot_early_late_revenue_correlation(days_limit=70)
fig

In [None]:
# If spending breaks is empty, it will find default values, you can specify your own groups in the format Dict[str, float],
# e.g. {'No spend': 0, 'Low spend': 10, 'Medium spend': 100, 'High spend': 1000}
data = da.estimate_ltv_impact(
    days_limit=60, 
    spending_breaks={},
    population_increase={"No spend": 0.2, "Low spend": 0.1, "High spend": 0.05}, 
    )
data