Skip to content

Commit

Permalink
fix a few utils for net profit optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
edwinnglabs committed Dec 12, 2023
1 parent 7352aa0 commit 0fb16cb
Show file tree
Hide file tree
Showing 5 changed files with 899 additions and 278 deletions.
568 changes: 434 additions & 134 deletions docs/examples/net_returns_max.ipynb

Large diffs are not rendered by default.

404 changes: 289 additions & 115 deletions docs/examples/target_max.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion karpiu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name = "karpiu"
__version__ = "0.0.1"
__version__ = "0.0.2"
80 changes: 72 additions & 8 deletions karpiu/planning/optim/channel_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
for idx in range(len(self.full_channels)):
if ch == self.full_channels[idx]:
self.optim_channels_idx.append(idx)
self.logger.info("Optimizing channels : {}".format(self.optim_channels))

# this is more for calculating all attribution required math
super().__init__(
Expand All @@ -49,8 +50,6 @@ def __init__(
self.budget_start = self.start
self.budget_end = self.end

self.logger.info("Optimizing channels : {}".format(self.optim_channels))

self.response_scaler = response_scaler
self.spend_scaler = spend_scaler
self.constraints = list()
Expand Down Expand Up @@ -100,7 +99,10 @@ def __init__(
# spend allocation on time dimension
# (n_budget_steps, )
if weight is None:
self.weight = self.base_comp_input / np.sum(self.base_comp_input)
# self.weight = self.base_comp_input / np.sum(self.base_comp_input)
self.weight = np.sum(self.init_spend_matrix, -1) / np.sum(
self.init_spend_matrix
)
else:
self.weight = weight

Expand Down Expand Up @@ -256,8 +258,10 @@ def __init__(self, attributor: AttributorGamma, ltv_arr: np.ndarray, **kwargs):
self.attributor = attributor

def objective_func(self, spend: np.ndarray, extra_info: bool = False):
# spend(n_optim_channels, ) -> (broadcast) -> spend matrix (n_budget_steps, n_optim_channels)
# time weight(n_budget_steps, ) -> (expand_dim) -> spend matrix (n_budget_steps, n_optim_channels)
# spend(n_optim_channels, ) -> (broadcast) -> input spend matrix (n_budget_steps, n_optim_channels)
# time weight(n_budget_steps, ) -> (expand_dim) -> time weight(n_budget_steps, 1)
# input spend matrix (n_budget_steps, n_optim_channels) * time weight(n_budget_steps, 1)
# -> (multiply) -> distributed spend matrix
input_channel_spend_matrix = (
spend
* np.ones((self.n_budget_steps, self.n_optim_channels))
Expand All @@ -273,8 +277,7 @@ def objective_func(self, spend: np.ndarray, extra_info: bool = False):
spend_matrix = np.concatenate(
[zero_paddings.copy(), spend_matrix, zero_paddings.copy()], axis=0
)

spend_matrix += self.target_regressor_bkg_matrix
spend_matrix += self.target_regressor_bkg_matrix

target_coef_array = self.target_coef_array
target_transformed_matrix = self.attributor._derive_target_transformed_matrix(
Expand Down Expand Up @@ -303,7 +306,8 @@ def objective_func(self, spend: np.ndarray, extra_info: bool = False):
attr_marketing=attr_marketing,
)

# For attribution, revenue, and cost are calculated with all channels spend (not just the two we are optimizing) as the input
# For attribution, revenue, and cost are calculated
# with all channels spend (not just the two we are optimizing) as the shape
# (n_optim_channels, )
revenue = self.ltv_arr * np.sum(spend_attr_matrix, 0)
# (n_optim_channels, )
Expand Down Expand Up @@ -331,3 +335,63 @@ def optim_callback(self, xk: np.ndarray, *_):
revs, costs = self.objective_func(xk, extra_info=True)
self.callback_metrics["optim_revenues"].append(revs)
self.callback_metrics["optim_costs"].append(costs)

def ch_based_net_profit_response_curve(ch_npm: ChannelNetProfitMaximizer, model:MMM, n_iters=10):
net_profits = np.empty((n_iters, n_iters))
total_budget = ch_npm.total_budget
date_col = ch_npm.date_col
budget_start = ch_npm.budget_start
budget_end = ch_npm.budget_end

logger = logging.getLogger('karpiu-planning-test')
logger.setLevel(30)

def ch_based_net_profit_response(x1, x2, attributor, time_steps_weight,
base_spend_df, optim_channels, ltv_arr
) -> np.ndarray:
# (n_steps, n_channels)
input_spend_matrix = np.stack([x1, x2]) * time_steps_weight
temp_spend_df = base_spend_df.copy()
temp_spend_df.loc[
(temp_spend_df[date_col] >= budget_start)
& (temp_spend_df[date_col] <= budget_end),
optim_channels
] = input_spend_matrix

attributor = AttributorGamma(
model=model,
df=temp_spend_df,
start=budget_start,
end=budget_end,
logger=logger,
)
_, spend_attr, _, _ = attributor.make_attribution()

# For attribution, revenue, and cost are calculated with all channels spend (not just the two we are optimizing) as the input
cost = np.sum(temp_spend_df.loc[
(temp_spend_df[date_col] >= budget_start)
& (temp_spend_df[date_col] <= budget_end),
# always use full channels in time-based optimization
model.get_spend_cols()
].values)

return np.sum(spend_attr.loc[:, model.get_spend_cols()].values * ltv_arr) - cost

x1s = total_budget * np.linspace(0, 1, n_iters)
x2s = total_budget * np.linspace(0, 1, n_iters)

x1s, x2s = np.meshgrid(x1s, x2s)

for i in range(n_iters):
for j in range(n_iters):
x1 = x1s[i, j]
x2 = x2s[i, j]
net_profits[i, j] = ch_based_net_profit_response(
x1, x2, attributor=ch_npm.attributor,
time_steps_weight=ch_npm.weight,
base_spend_df=ch_npm.df,
optim_channels=ch_npm.optim_channels,
ltv_arr=ch_npm.ltv_arr,
)

return x1s, x2s, net_profits
123 changes: 103 additions & 20 deletions karpiu/planning/optim/time_base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,19 @@ def __init__(
else:
self.logger = logger

self.optim_channels = optim_channels
self.full_channels = model.get_spend_cols()
self.optim_channels_idx = list()
for ch in optim_channels:
for idx in range(len(self.full_channels)):
if ch == self.full_channels[idx]:
self.optim_channels_idx.append(idx)
self.logger.info("Optimizing channels : {}".format(self.optim_channels))

# this is more for calculating all attribution required math
super().__init__(
model=model,
target_regressors=optim_channels,
target_regressors=self.full_channels,
start=budget_start,
end=budget_end,
)
Expand All @@ -49,8 +59,6 @@ def __init__(

self.budget_start = self.start
self.budget_end = self.end
self.optim_channels = optim_channels
self.logger.info("Optimizing channels : {}".format(self.optim_channels))

self.response_scaler = response_scaler
self.spend_scaler = spend_scaler
Expand Down Expand Up @@ -93,6 +101,10 @@ def __init__(
ub=np.ones(self.n_budget_steps) * np.inf,
)

self.full_channels_spend_matrix = self.df.loc[
self.budget_mask, self.full_channels
].values

# spend allocation on time dimension
# (1, n_channels)
if weight is None:
Expand Down Expand Up @@ -209,8 +221,8 @@ def set_bounds_and_constraints(self, df: pd.DataFrame) -> None:
"""_summary_
Args:
df (pd.DataFrame): must contain column named as "channel" which can map with the channel index; a special
channel can be specified once as "total" which will be used as budget constraints instead of bounds
df (pd.DataFrame): df assumes each index sequentially represents the time step in optimization period;
a special index can be specified once as "total" which will be used as budget constraints instead of bounds
"""
# "date" is a reserved keyword
self.bounds_and_constraints_df = df
Expand Down Expand Up @@ -263,19 +275,24 @@ def __init__(
self.attributor = attributor

def objective_func(self, spend: np.ndarray, extra_info: bool = False):
# spend weight(n_optim_channels, ) -> (broadcast) -> spend matrix (n_budget_steps, n_optim_channels)
# time-based spend (n_budget_steps, ) -> (expand_dim) -> spend matrix (n_budget_steps, n_optim_channels)
spend_matrix = (
# spend(n_budget_steps, ) -> (expand) -> spend(n_budget_steps, 1)
# (multiply channels weight(n_optim_channels)
# -> (broadcast) -> input_channel_spend_matrix (n_budget_steps, n_optim_channels)
input_channel_spend_matrix = (
np.expand_dims(spend, -1)
# * np.ones((self.n_budget_steps, self.n_optim_channels))
* self.weight
)
zero_paddings = np.zeros((self.max_adstock, self.n_optim_channels))
# (n_calc_steps, n_optim_channels)
spend_matrix = np.concatenate(
[zero_paddings.copy(), spend_matrix, zero_paddings.copy()], axis=0
)
spend_matrix += self.target_regressor_bkg_matrix
# the full spend matrix pass into attribution calculation
spend_matrix = self.full_channels_spend_matrix.copy()
spend_matrix[:, self.optim_channels_idx] = input_channel_spend_matrix

if self.max_adstock > 0:
zero_paddings = np.zeros((self.max_adstock, self.n_optim_channels))
# (n_calc_steps, n_optim_channels)
spend_matrix = np.concatenate(
[zero_paddings.copy(), spend_matrix, zero_paddings.copy()], axis=0
)
spend_matrix += self.target_regressor_bkg_matrix

target_coef_array = self.target_coef_array
target_transformed_matrix = self.attributor._derive_target_transformed_matrix(
Expand Down Expand Up @@ -303,17 +320,18 @@ def objective_func(self, spend: np.ndarray, extra_info: bool = False):
attr_marketing=attr_marketing,
)

# For attribution, revenue, and cost are calculated
# with all channels spend (not just the two we are optimizing) as the shape
# (n_optim_channels, )
# ignore first column which is organic
revenue = np.sum(self.ltv_arr * np.sum(spend_attr_matrix, 0))
revenue = self.ltv_arr * np.sum(spend_attr_matrix, 0)
# (n_optim_channels, )
cost = np.sum(spend_matrix)
net_profit = revenue - cost
cost = np.sum(spend_matrix, 0)
net_profit = np.sum(revenue - cost)
loss = -1 * net_profit / self.response_scaler
# add punishment of variance of spend; otherwise may risk of identifiability issue with adstock
loss += self.variance_penalty * np.var(spend)
if extra_info:
return revenue, cost
return np.sum(revenue), np.sum(cost)
else:
return loss

Expand All @@ -333,3 +351,68 @@ def optim_callback(self, xk: np.ndarray, *_):
revs, costs = self.objective_func(xk, extra_info=True)
self.callback_metrics["optim_revenues"].append(revs)
self.callback_metrics["optim_costs"].append(costs)


# assert budget start and and end are only two steps
def time_based_net_profit_response_curve(t_npm: TimeNetProfitMaximizer, model:MMM, n_iters=10):
net_profits = np.empty((n_iters, n_iters))
total_budget = t_npm.total_budget
date_col = t_npm.date_col
budget_start = t_npm.budget_start
budget_end = t_npm.budget_end

logger = logging.getLogger('karpiu-planning-test')
logger.setLevel(30)

def time_based_net_profit_response(x1, x2, attributor, channels_weight,
base_spend_df, optim_channels, ltv_arr
) -> np.ndarray:
# (n_steps, n_channels)
input_spend_matrix = np.vstack([x1, x2]) * channels_weight
temp_spend_df = base_spend_df.copy()
temp_spend_df.loc[
(temp_spend_df[date_col] >= budget_start)
& (temp_spend_df[date_col] <= budget_end),
optim_channels
] = input_spend_matrix

attributor = AttributorGamma(
model=model,
df=temp_spend_df,
start=budget_start,
end=budget_end,
logger=logger,
)
_, spend_attr, _, _ = attributor.make_attribution()

# For attribution, revenue, and cost are calculated with all channels spend (not just the two we are optimizing) as the input
cost = np.sum(temp_spend_df.loc[
(temp_spend_df[date_col] >= budget_start)
& (temp_spend_df[date_col] <= budget_end),
# always use full channels in time-based optimization
model.get_spend_cols()
].values)

return np.sum(spend_attr.loc[:, model.get_spend_cols()].values * ltv_arr) - cost

x1s = total_budget * np.linspace(0, 1, n_iters)
x2s = total_budget * np.linspace(0, 1, n_iters)

x1s, x2s = np.meshgrid(x1s, x2s)

for i in range(n_iters):
for j in range(n_iters):
x1 = x1s[i, j]
x2 = x2s[i, j]
net_profits[i, j] = time_based_net_profit_response(
x1, x2, attributor=t_npm.attributor,
channels_weight=t_npm.weight,
base_spend_df=t_npm.df,
optim_channels=t_npm.optim_channels,
ltv_arr=t_npm.ltv_arr,
)

return x1s, x2s, net_profits



0 comments on commit 0fb16cb

Please sign in to comment.