In [1]:
# Test Pareto Optimizer

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from robyn.data.entities.mmmdata import MMMData
from robyn.modeling.entities.modeloutputs import ModelOutputs, Trial
from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer
from robyn.data.entities.enums import DependentVarType, PaidMediaSigns, OrganicSigns, ContextSigns


In [2]:

# 1. Create dummy data for MMMData

# Generate date range
date_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='D')

# Create dummy data
np.random.seed(42)
data = pd.DataFrame({
    'date': date_range,
    'sales': np.random.randint(1000, 2000, size=len(date_range)),
    'tv_spend': np.random.randint(100, 500, size=len(date_range)),
    'radio_spend': np.random.randint(50, 200, size=len(date_range)),
    'social_media_spend': np.random.randint(200, 600, size=len(date_range)),
    'competitor_sales': np.random.randint(800, 1500, size=len(date_range)),
    'holiday': np.random.choice([0, 1], size=len(date_range), p=[0.9, 0.1])
})

# Create MMMDataSpec
mmmdata_spec = MMMData.MMMDataSpec(
    dep_var='sales',
    dep_var_type=DependentVarType.REVENUE,
    date_var='date',
    paid_media_spends=['tv_spend', 'radio_spend', 'social_media_spend'],
    paid_media_vars=None,
    paid_media_signs=[PaidMediaSigns.POSITIVE] * 3,
    organic_vars=['competitor_sales'],
    organic_signs=[OrganicSigns.NEGATIVE],
    context_vars=['holiday'],
    context_signs=[ContextSigns.POSITIVE],
    factor_vars=None,
    window_start=datetime(2022, 1, 1),
    window_end=datetime(2022, 12, 31)
)

# Create MMMData instance
mmm_data = MMMData(data, mmmdata_spec)


In [3]:

# 2. Create dummy ModelOutputs

def create_dummy_trial(trial_num):
    num_solutions = 10  # Increase the number of solutions per trial
    media_channels = ['tv_spend', 'radio_spend', 'social_media_spend', 'search_spend', 'print_spend']
    
    result_hyp_param = pd.DataFrame({
        'solID': [f'sol_{trial_num}_{i}' for i in range(num_solutions)],
        'nrmse': np.random.uniform(0.1, 0.3, num_solutions),
        'nrmse_train': np.random.uniform(0.1, 0.3, num_solutions),
        'nrmse_test': np.random.uniform(0.1, 0.3, num_solutions),
        'decomp.rssd': np.random.uniform(0.1, 0.3, num_solutions),
        'mape': np.random.uniform(5, 15, num_solutions),
        'rsq_train': np.random.uniform(0.7, 0.9, num_solutions),
        'rsq_val': np.random.uniform(0.6, 0.8, num_solutions),
        'rsq_test': np.random.uniform(0.5, 0.7, num_solutions),
        'iterNG': [1] * num_solutions,
        'iterPar': [trial_num] * num_solutions,
        'lambda': np.random.uniform(0.01, 0.1, num_solutions),
        'lambda_hp': np.random.uniform(0.01, 0.1, num_solutions),
        'lambda_max': np.random.uniform(0.1, 0.5, num_solutions),
        'lambda_min_ratio': np.random.uniform(0.01, 0.1, num_solutions)
    })
    
    # Ensure some solutions are Pareto-optimal
    result_hyp_param['nrmse'] = np.sort(result_hyp_param['nrmse'])
    result_hyp_param['decomp.rssd'] = np.sort(result_hyp_param['decomp.rssd'])[::-1]
    
    x_decomp_agg = pd.DataFrame({
        'solID': [f'sol_{trial_num}_{i}' for i in range(num_solutions) for _ in media_channels],
        'rn': media_channels * num_solutions,
        'coef': np.random.uniform(0.1, 0.5, num_solutions * len(media_channels)),
        'boot_mean': np.random.uniform(0.1, 0.5, num_solutions * len(media_channels)),
        'iterNG': [1] * (num_solutions * len(media_channels)),
        'iterPar': [trial_num] * (num_solutions * len(media_channels))
    })
    
    decomp_spend_dist = pd.DataFrame({
        'solID': [f'sol_{trial_num}_{i}' for i in range(num_solutions) for _ in media_channels],
        'rn': media_channels * num_solutions,
        'mean_spend': np.random.randint(100, 500, num_solutions * len(media_channels)),
        'total_spend': np.random.randint(10000, 50000, num_solutions * len(media_channels)),
        'xDecompAgg': np.random.randint(5000, 20000, num_solutions * len(media_channels)),
        'spend_share': np.random.uniform(0, 1, num_solutions * len(media_channels)),
        'effect_share': np.random.uniform(0, 1, num_solutions * len(media_channels)),
        'iterNG': [1] * (num_solutions * len(media_channels)),
        'iterPar': [trial_num] * (num_solutions * len(media_channels))
    })
    
    lift_calibration = pd.DataFrame({
        'liftMedia': media_channels * num_solutions,
        'lift': np.random.uniform(1, 2, num_solutions * len(media_channels)),
        'iterNG': [1] * (num_solutions * len(media_channels)),
        'iterPar': [trial_num] * (num_solutions * len(media_channels)),
        'solID': [f'sol_{trial_num}_{i}' for i in range(num_solutions) for _ in media_channels]
    }) if trial_num % 2 == 0 else None
    
    return Trial(
        result_hyp_param=result_hyp_param,
        x_decomp_agg=x_decomp_agg,
        decomp_spend_dist=decomp_spend_dist,
        lift_calibration=lift_calibration,
        nrmse=result_hyp_param['nrmse'].values[0],
        decomp_rssd=result_hyp_param['decomp.rssd'].values[0],
        mape=result_hyp_param['mape'].values[0],
        rsq_train=result_hyp_param['rsq_train'].values[0],
        rsq_val=result_hyp_param['rsq_val'].values[0],
        rsq_test=result_hyp_param['rsq_test'].values[0],
        lambda_=result_hyp_param['lambda'].values[0],
        lambda_hp=result_hyp_param['lambda_hp'].values[0],
        lambda_max=result_hyp_param['lambda_max'].values[0],
        lambda_min_ratio=result_hyp_param['lambda_min_ratio'].values[0],
        pos=trial_num,
        elapsed=np.random.uniform(10, 30),
        elapsed_accum=np.random.uniform(100, 300),
        trial=trial_num,
        iter_ng=1,
        iter_par=trial_num,
        train_size=0.7,
        sol_id=f'sol_{trial_num}_0'
    )

# Create more trials
trials = [create_dummy_trial(i) for i in range(1)]  # Increase the number of trials

model_outputs = ModelOutputs(
    trials=trials,
    train_timestamp='2023-05-01 10:00:00',
    cores=1,
    iterations=10,
    intercept=True,
    intercept_sign='positive',
    nevergrad_algo='NGOpt',
    ts_validation=True,
    add_penalty_factor=True,
    hyper_updated={},
    hyper_fixed=False,
    convergence={},
    select_id='sol_5',
    seed=42,
    hyper_bound_ng={},
    hyper_bound_fixed={}
)

In [4]:

# 3. Create ParetoOptimizer instance
pareto_optimizer = ParetoOptimizer(mmm_data, model_outputs)

# 4. Run optimize function
pareto_result = pareto_optimizer.optimize(pareto_fronts="auto", min_candidates=1)


>> Automatically selected 1 Pareto-fronts to contain at least 1 pareto-optimal models (1)
>>> Calculating response curves for all models' media variables (0)...
resp_collect Empty DataFrame
Columns: []
Index: []
pareto_data.decomp_spend_dist Empty DataFrame
Columns: [solID, rn, mean_spend, total_spend, xDecompAgg, spend_share, effect_share, iterNG, iterPar, trial, robynPareto]
Index: []


  pareto_fronts_df = pd.concat([pareto_fronts_df, pareto_front])


KeyError: 'solID'

In [None]:

# 5. Check results
print("Pareto Optimization Results:")
print(f"Number of Pareto solutions: {len(pareto_result.pareto_solutions)}")
print(f"Number of Pareto fronts: {pareto_result.pareto_fronts}")
print("\nPareto-optimal solutions:")
print(pareto_result.result_hyp_param[['solID', 'nrmse', 'decomp.rssd', 'mape', 'robynPareto']])

print("\nAggregated decomposition results:")
print(pareto_result.x_decomp_agg.head())

print("\nMedia vector collection:")
print(pareto_result.media_vec_collect.head())

print("\nDecomposition vector collection:")
print(pareto_result.x_decomp_vec_collect.head())

# 6. Validate logic
assert len(pareto_result.pareto_solutions) > 0, "No Pareto-optimal solutions found"
assert pareto_result.pareto_fronts > 0, "Invalid number of Pareto fronts"
assert not pareto_result.result_hyp_param.empty, "Empty result_hyp_param DataFrame"
assert not pareto_result.x_decomp_agg.empty, "Empty x_decomp_agg DataFrame"
assert not pareto_result.media_vec_collect.empty, "Empty media_vec_collect DataFrame"
assert not pareto_result.x_decomp_vec_collect.empty, "Empty x_decomp_vec_collect DataFrame"

print("\nAll assertions passed. The optimize function is working as expected.")