In [None]:
import sys

sys.path.append("/Users/yijuilee/robynpy_release_reviews/Robyn/python/src")

In [None]:
# Test Pareto Optimizer

import pandas as pd
import json
from typing import Dict, Any, List
import numpy as np
from datetime import datetime, timedelta
from robyn.data.entities.mmmdata import MMMData
from robyn.modeling.entities.modeloutputs import ModelOutputs, Trial
from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer, ParetoData
from robyn.data.entities.enums import DependentVarType, PaidMediaSigns, OrganicSigns, ContextSigns

from utils.data_mapper import import_data, load_data_from_json

In [None]:
# Load data from JSON
loaded_data = load_data_from_json(
    "/Users/yijuilee/robynpy_release_reviews/Robyn/python/src/tutorials/utils/test_data.json"
)
imported_data = import_data(loaded_data)
model_outputs = imported_data["model_outputs"]
display((model_outputs.hyper_bound_ng))

In [None]:
mmm_data = imported_data["mmm_data"]
# display(mmm_data.data.head())
# Display Model Outputs

model_outputs = imported_data["model_outputs"]
# display((model_outputs.trials[0].result_hyp_param))

hyperparameters = imported_data["hyperparameters"]
# display(hyperparameters)

featurized_mmm_data = imported_data["featurized_mmm_data"]

holidays_data = imported_data["holidays_data"]

In [None]:
# 3. Create ParetoOptimizer instance
pareto_optimizer = ParetoOptimizer(mmm_data, model_outputs, hyperparameters, featurized_mmm_data, holidays_data)

In [None]:
# 4. Run optimize function
pareto_result = pareto_optimizer.optimize(pareto_fronts="auto", min_candidates=100)

In [None]:
# 5. Check results
print("Pareto Optimization Results:")
print(f"Number of Pareto fronts: {len(pareto_result.pareto_solutions)}")
print(f"MediaVecCollect: {pareto_result.media_vec_collect.shape, pareto_result.media_vec_collect}")
print("\Hyper parameter solutions:")
print(pareto_result.result_hyp_param)

print("\nAggregated decomposition results:")
print(pareto_result.x_decomp_agg)
print("\result Calibration:")
print(pareto_result.result_calibration)
print("\nx Decomp Vec Collect:")
print(pareto_result.x_decomp_vec_collect.shape, pareto_result.x_decomp_vec_collect)
print("\nCarryover percentage all:")
print(pareto_result.df_caov_pct_all.shape, pareto_result.df_caov_pct_all)
print("\Plot Data Collected")
print("NUMBER OF PLOTS Data collected for:", len(pareto_result.plot_data_collect["2_85_2"]))
print("Plot data for solid 2_85_2", pareto_result.plot_data_collect["2_85_2"])

# 6. Validate logic
assert pareto_result.pareto_fronts == "auto" or isinstance(
    pareto_result.pareto_fronts, int
), "Invalid pareto_fronts value"
assert not pareto_result.result_hyp_param.empty, "Empty result_hyp_param DataFrame"
assert not pareto_result.x_decomp_agg.empty, "Empty x_decomp_agg DataFrame"

print("\nAll assertions passed. The optimize function is working as expected.")

## Allocator

In [24]:
from typing import Optional

print("Step 1: Debug ModelOutputs Data")
print("------------------------------")

# Check ModelOutputs contents
print("\nModelOutputs Structure:")
print(f"Number of trials: {len(model_outputs.trials)}")
print(f"Selected model ID: {model_outputs.select_id}")

# Check first trial data
if model_outputs.trials:
    first_trial = model_outputs.trials[0]
    print("\nFirst Trial Data:")
    print(f"Solution ID: {first_trial.sol_id}")
    print("\nx_decomp_agg columns:")
    print(first_trial.x_decomp_agg.columns.tolist())
    print("\nx_decomp_agg first few rows:")
    print(first_trial.x_decomp_agg.head())

# Check aggregated data
print("\nAggregated Data Status:")
print(f"all_x_decomp_agg empty? {model_outputs.all_x_decomp_agg.empty}")
if not model_outputs.all_x_decomp_agg.empty:
    print("all_x_decomp_agg columns:")
    print(model_outputs.all_x_decomp_agg.columns.tolist())
    print("\nFirst few rows:")
    print(model_outputs.all_x_decomp_agg.head())

# Let's try to aggregate the data if it's not already done
print("\nAttempting to aggregate trial data...")
try:
    # Combine x_decomp_agg from all trials
    all_decomp = pd.concat([trial.x_decomp_agg for trial in model_outputs.trials])
    print("\nManually aggregated data:")
    print(f"Shape: {all_decomp.shape}")
    print("Columns:", all_decomp.columns.tolist())
    if "solID" not in all_decomp.columns:
        print("\nNOTE: 'solID' column missing from trial data")
        # Check what might be the identifier column
        print("Available columns that might be identifiers:")
        id_columns = [
            col for col in all_decomp.columns if any(x in col.lower() for x in ["id", "solution", "sol", "model"])
        ]
        print(id_columns)
except Exception as e:
    print(f"\nError aggregating trial data: {str(e)}")

print("\nStep 2: Attempting to find model identifier")
print("----------------------------------------")

# Try different potential column names
potential_id_columns = ["solID", "sol_id", "solution_id", "model_id", "trial_id"]


def check_column_in_df(df: pd.DataFrame, column_names: List[str]) -> Optional[str]:
    """Check which of the potential column names exists in the DataFrame."""
    for col in column_names:
        if col in df.columns:
            return col
    return None


for trial in model_outputs.trials:
    id_column = check_column_in_df(trial.x_decomp_agg, potential_id_columns)
    if id_column:
        print(f"Found ID column in trial data: {id_column}")
        print(f"Unique IDs: {trial.x_decomp_agg[id_column].unique()}")
        break

# Print full trial structure of first trial
if model_outputs.trials:
    print("\nComplete structure of first trial:")
    trial = model_outputs.trials[0]
    print(f"Trial attributes:")
    for attr in vars(trial):
        if isinstance(getattr(trial, attr), pd.DataFrame):
            df = getattr(trial, attr)
            print(f"\n{attr}:")
            print(f"Shape: {df.shape}")
            print(f"Columns: {df.columns.tolist()}")
            print(f"First row: {df.iloc[0].to_dict()}")

Step 1: Debug ModelOutputs Data
------------------------------

ModelOutputs Structure:
Number of trials: 2
Selected model ID: 

First Trial Data:
Solution ID: 

x_decomp_agg columns:
['rn', 'coef', 'xDecompAgg', 'xDecompPerc', 'xDecompMeanNon0', 'xDecompMeanNon0Perc', 'xDecompAggRF', 'xDecompPercRF', 'xDecompMeanNon0RF', 'xDecompMeanNon0PercRF', 'pos', 'train_size', 'rsq_train', 'rsq_val', 'rsq_test', 'nrmse_train', 'nrmse_val', 'nrmse_test', 'nrmse', 'decomp.rssd', 'mape', 'lambda', 'lambda_hp', 'lambda_max', 'lambda_min_ratio', 'solID', 'trial', 'iterNG', 'iterPar']

x_decomp_agg first few rows:
                   rn          coef    xDecompAgg  xDecompPerc  \
0         (Intercept)  1.490415e+06  2.339952e+08     0.838459   
1               trend  8.526674e-02  2.385925e+07     0.085493   
2              season  3.145922e-02  1.151622e+04     0.000041   
3             holiday  2.209385e-02  1.538306e+05     0.000551   
4  competitor_sales_B  1.061375e-02  9.247092e+06     0.033134  

IndexError: single positional indexer is out-of-bounds

In [25]:
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, Any

from robyn.data.entities.mmmdata import MMMData
from robyn.modeling.entities.modeloutputs import ModelOutputs
from robyn.data.entities.hyperparameters import Hyperparameters
from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer, ParetoData
from robyn.allocator.entities.enums import OptimizationScenario, ConstrMode
from robyn.allocator.budget_allocator import BudgetAllocator
from robyn.allocator.entities.allocation_config import AllocationConfig
from robyn.allocator.entities.allocation_constraints import AllocationConstraints

print("Step 1: Prepare Model Data")
print("------------------------")

# Aggregate trial data into all_x_decomp_agg
all_decomp_list = []
for trial in model_outputs.trials:
    all_decomp_list.append(trial.x_decomp_agg)

# Update model_outputs with aggregated data
model_outputs.all_x_decomp_agg = pd.concat(all_decomp_list, axis=0)

print(f"Data loaded successfully:")
print(
    f"- Data timeframe: {mmm_data.data[mmm_data.mmmdata_spec.date_var].min()} to {mmm_data.data[mmm_data.mmmdata_spec.date_var].max()}"
)
print(f"- Number of paid media channels: {len(mmm_data.mmmdata_spec.paid_media_spends)}")
print(f"- Channels: {mmm_data.mmmdata_spec.paid_media_spends}")

print("\nStep 2: Setup Budget Allocator")
print("------------------------------")

# Get available models from aggregated data
available_models = model_outputs.all_x_decomp_agg["solID"].unique()
print(f"Available model IDs: {available_models}")

# Select first available model or specific one
select_model = model_outputs.select_id if model_outputs.select_id else available_models[0]
print(f"Using model ID: {select_model}")

# Initialize budget allocator
try:
    allocator = BudgetAllocator(
        mmm_data=mmm_data, model_outputs=model_outputs, hyperparameter=hyperparameters, select_model=select_model
    )
    print(f"Successfully initialized allocator for model {select_model}")
except Exception as e:
    print(f"Error initializing allocator: {str(e)}")
    print("Available model IDs:", available_models)
    raise e

# Rest of your notebook remains the same...
print("\nStep 3: Configure Allocation")
print("---------------------------")

# Set channel constraints
channel_constraints_low = {channel: 0.7 for channel in mmm_data.mmmdata_spec.paid_media_spends}
channel_constraints_up = {channel: 1.2 for channel in mmm_data.mmmdata_spec.paid_media_spends}

# Create constraints object
constraints = AllocationConstraints(
    channel_constr_low=channel_constraints_low,
    channel_constr_up=channel_constraints_up,
    channel_constr_multiplier=3.0,  # optional, will default to 3.0 if not provided
)

# Create allocation config
max_response_config = AllocationConfig(
    scenario=OptimizationScenario.MAX_RESPONSE,
    total_budget=5000000,  # Example budget
    target_value=None,  # Not needed for max_response scenario
    date_range="last_10",
    constraints=constraints,
    maxeval=100000,  # optional
    optim_algo="SLSQP_AUGLAG",  # optional
    constr_mode=ConstrMode.EQUALITY,  # optional
    plots=True,
    export=True,
    quiet=False,
)

print("Max Response Configuration:")
print(f"- Total Budget: ${max_response_config.total_budget:,.0f}")
print(f"- Date Range: {max_response_config.date_range}")
print(f"- Optimization Algorithm: {max_response_config.optim_algo}")
print(f"- Constraint Mode: {max_response_config.constr_mode}")

print("\nChannel Constraints:")
for channel in mmm_data.mmmdata_spec.paid_media_spends:
    print(
        f"- {channel}: {constraints.channel_constr_low[channel]:.1f}x to {constraints.channel_constr_up[channel]:.1f}x"
    )

print("\nStep 4: Run Allocation Optimization")
print("---------------------------------")

# Run allocation
result = allocator.allocate(max_response_config)

# Display results
print("\nOptimization Results:")
print("--------------------")
print(f"Total Spend: ${result.metrics['total_spend']:,.0f}")
print(f"Total Response: {result.metrics['total_response']:,.0f}")
print(f"Response Lift: {result.metrics['response_lift']*100:.1f}%")
print("\nOptimal Channel Allocations:")

allocations_df = result.optimal_allocations
for channel in mmm_data.mmmdata_spec.paid_media_spends:
    current = allocations_df.loc[allocations_df["channel"] == channel, "current_spend"].iloc[0]
    optimal = allocations_df.loc[allocations_df["channel"] == channel, "optimal_spend"].iloc[0]
    change = (optimal / current - 1) * 100

    print(f"{channel:>10}: ${current:,.0f} -> ${optimal:,.0f} ({change:+.1f}%)")

print("\nStep 5: Target Efficiency Scenario")
print("--------------------------------")

# Configure target efficiency scenario
target_config = AllocationConfig(
    scenario=OptimizationScenario.TARGET_EFFICIENCY,
    total_budget=None,  # Will be determined by optimization
    target_value=2.0,  # Target ROAS/CPA
    date_range="last_10",
    constraints=constraints,
    constr_mode=ConstrMode.EQUALITY,
    plots=True,
    export=True,
    quiet=False,
)

# Run target efficiency optimization
target_result = allocator.allocate(target_config)

print("\nTarget Efficiency Results:")
print("-------------------------")
print(f"Total Spend: ${target_result.metrics['total_spend']:,.0f}")
print(f"Total Response: ${target_result.metrics['total_response']:,.0f}")
print(f"Achieved Efficiency: {target_result.metrics['efficiency']:.2f}")

print("\nStep 6: Available Visualizations")
print("------------------------------")

if result.plots or target_result.plots:
    print("\nAvailable Plots:")
    for plot_type, plot_data in result.plots.items():
        print(f"- {plot_type}")

print("\nDemo Complete")

Step 1: Prepare Model Data
------------------------
Data loaded successfully:
- Data timeframe: DATE    2015-11-23
dtype: object to DATE    2019-11-11
dtype: object
- Number of paid media channels: 5
- Channels: ['tv_S', 'ooh_S', 'print_S', 'facebook_S', 'search_S']

Step 2: Setup Budget Allocator
------------------------------
Available model IDs: ['1_1_1' '1_1_2' '1_1_3' ... '2_112_7' '2_112_8' '2_112_9']
Using model ID: 1_1_1
Error initializing allocator: 'ModelOutputs' object has no attribute 'x_decomp_agg'
Available model IDs: ['1_1_1' '1_1_2' '1_1_3' ... '2_112_7' '2_112_8' '2_112_9']


AttributeError: 'ModelOutputs' object has no attribute 'x_decomp_agg'