In [None]:
# Copyright 2022 Google LLC.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 6. Media experiment design

This notebook demonstrates the design of a media experiment by using the
[Experimental Design](https://github.com/google/gps_building_blocks/tree/master/py/gps_building_blocks/analysis/exp_design)
module to activate the predictions from an LTV model. It is vital to design and estimate the impact of media campaigns using valid statistical methods to make sure the limited experimentation budget is utilized effectively and to set the right expectations of the campaign outcome.



### Requirements

* An already scored test dataset, or the model and the test dataset to be scored available in GCP BigQuery.
* This test dataset should contain all the ML instances for at least one snapshot date.

### Install and import required modules

In [None]:
# Uncomment to install required python modules
# !sh ../utils/setup.sh

In [None]:
# Add custom utils module to Python environment
import os
import sys
sys.path.append(os.path.abspath(os.pardir))


import numpy as np
import pandas as pd

from gps_building_blocks.analysis.exp_design import ab_testing_design
from gps_building_blocks.cloud.utils import bigquery as bigquery_utils

from utils import helpers

### Notebook custom settings

In [None]:
# Prints all the outputs from cell (instead of using display each time).
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

### Set parameters

In [None]:
configs = helpers.get_configs('config.yaml')
dest_configs = configs.destination

# GCP project ID
PROJECT_ID = dest_configs.project_id
# Name of the BigQuery dataset
DATASET_NAME = dest_configs.dataset_name

In [None]:
# To distinguish the separate runs of the training pipeline
RUN_ID = '01'

# BigQuery table name containing the test dataset to be scored. This test
# dataset should contain all the instances at least for one snapshot date
FEATURES_TEST_TABLE = f'features_test_table_{RUN_ID}'
# Output BQML model name to save in BigQuery
BQML_MODEL_NAME = f'ltv_model_bqml_{RUN_ID}'
# BigQuery table name containing the scored test dataset
FEATURES_TEST_PREDICTIONS_TABLE = f'predictions_table'
# Selected snapshot date to select the ML instances (reflecting the instances to
# be scored on a given scoring date) to be used for experiment design in
# YYYY-MM-DD format
SELECTED_SNAPSHOT_DATE = '2017-06-15'
# Name of the actual label column
ACTUAL_LABEL_NAME = 'label'
# Name of the prediction column
PREDICTED_LABEL_NAME = 'predicted_label'

# BigQuery client object
bq_utils = bigquery_utils.BigQueryUtils(project_id=PROJECT_ID)

### Select the relevant data for experiment design
Select all the instances for one snapshot date, which resembles the scoring dataset for one day. This dataset is used to design the media experiment.

### Score the test dataset (if not already scored)


In [None]:
# Prediction sql query
prediction_query =f"""
  SELECT *
  FROM ML.PREDICT(MODEL `{PROJECT_ID}.{DATASET_NAME}.{BQML_MODEL_NAME}`,
                  TABLE `{PROJECT_ID}.{DATASET_NAME}.{FEATURES_TEST_TABLE}`)
                  WHERE snapshot_ts='{SELECTED_SNAPSHOT_DATE}';
"""

# Run prediction
print(prediction_query)
df_test_predictions = bq_utils.run_query(prediction_query).to_dataframe()

# Size of the prediction data frame
print(df_test_predictions.shape)

### Read the prediction test dataset (if already scored)

In [None]:
# Data read in sql query
read_query = f"""
SELECT
  {ACTUAL_LABEL_NAME},
  {PREDICTED_LABEL_NAME},
  snapshot_ts
FROM
  `{PROJECT_ID}.{DATASET_NAME}.{FEATURES_TEST_PREDICTIONS_TABLE}`
WHERE
  snapshot_ts = '{SELECTED_SNAPSHOT_DATE}';
"""

# Run prediction
print(read_query)
df_test_predictions = bq_utils.run_query(read_query).to_dataframe()

# Size of the prediction data frame
print(df_test_predictions.shape)

### Experiment Design I: Different Remarketing LTV Groups

One way to use the output from an LTV Model to optimize marketing is to first define different audience groups based on the predicted LTV value (such as High, Medium and Low LTV groups) and then test the same or different marketing strategies with those. This strategy is more useful to understand how different LTV groups respond to remarketing campaigns.

Following step estimates the statistical sample sizes required for different groups (bins) of the predicted LTV based on different combinations of the expected minimum uplift/effect size, statistical power and statistical confidence levels specified as input parameters by using statistical T-test.

Expected output: a Pandas Dataframe containing statistical sample size for each bin for each combination of minimum uplift_percentage, statistical power and statistical confidence level.

Based on the estimated sample sizes and the available group sizes, one can decide what setting (expected minimum uplift/effect size at a given statistical power and a confidence level) to be selected for the experiment. Then the selected sample sizes could be used to set Test and Control cohorts from each LTV group to implement the media experiment.

In [None]:
ab_testing_design.calc_t_sample_sizes_for_bins(
    labels=df_test_predictions[ACTUAL_LABEL_NAME].values,
    numeric_predictions=df_test_predictions[PREDICTED_LABEL_NAME].values,
    number_bins=3, # to have High, Medium and Low bins
    uplift_percentages=(10, 15), # minimum expected effect sizes
    power_percentages=(80, 90),
    confidence_level_percentages=(90, 95))

### Experiment Design II: Top LTV Group

Another way to use the output from an LTV Model to optimize marketing is to target the top X% of users having the highest predicted LTV in a remarketing campaign, or an acquisition campaigns with the similar audience strategy.

Following step estimates the statistical sample sizes required for different cumulative groups (bins) of the predicted LTV (top X%, top 2X% and so on) based on different combinations of the expected minimum uplift/effect size, statistical power and statistical confidence levels specified as input parameters by using statistical T-test.

Expected output: a Pandas Dataframe containing statistical sample size for each cumulative bin for each combination of minimum uplift_percentage, statistical power and statistical confidence level.

Based on the estimated sample sizes and the available group sizes one can decide what setting (what top X% of users with the expected minimum uplift/effect size at a given statistical power and a confidence level) to be selected for the experiment. Then the selected sample size could be used to set Test and Control cohorts from the top X% to implement the media experiment.

In [None]:
ab_testing_design.calc_t_sample_sizes_for_cumulative_bins(
    labels=df_test_predictions[ACTUAL_LABEL_NAME].values,
    numeric_predictions=df_test_predictions[PREDICTED_LABEL_NAME].values,
    number_bins=10, # top 10%, 20%, ..., 100%
    uplift_percentages=(10, 15), # minimum expected effect sizes
    power_percentages=(80, 90),
    confidence_level_percentages=(90, 95))