In [2]:
# %% [markdown]
# # Testing Pareto Optimizer

# %% [markdown]
# First, let's import the necessary modules and classes:

# %%
import pandas as pd
import numpy as np
from typing import List, Dict, Any
from dataclasses import dataclass
from robyn.data.entities.mmmdata import MMMData
from robyn.modeling.entities.modeloutputs import ModelOutputs
from robyn.modeling.pareto.pareto_optimizer import ParetoOptimizer
from robyn.modeling.pareto.pareto_utils import ParetoUtils

# %% [markdown]
# Now, let's create mock data using the provided data classes:

# %%
@dataclass
class MockMMMDataSpec:
    window_start: int = 0
    window_end: int = 100

@dataclass
class MockMMMData:
    mmmdata_spec: MockMMMDataSpec
    data: pd.DataFrame

@dataclass
class MockModelOutputs:
    cores: int = 1
    media_vec_collect: pd.DataFrame

@dataclass
class MockParetoData:
    decomp_spend_dist: pd.DataFrame
    result_hyp_param: pd.DataFrame
    x_decomp_agg: pd.DataFrame

# %% [markdown]
# Let's create some mock data:

# %%
# Create mock MMMData
mock_mmm_data = MockMMMData(
    mmmdata_spec=MockMMMDataSpec(),
    data=pd.DataFrame({
        'date': pd.date_range(start='2020-01-01', periods=100),
        'sales': np.random.rand(100) * 1000,
        'tv_spend': np.random.rand(100) * 100,
        'radio_spend': np.random.rand(100) * 50,
        'online_spend': np.random.rand(100) * 200
    })
)

# Create mock ModelOutputs
mock_model_outputs = MockModelOutputs(
    cores=2,
    media_vec_collect=pd.DataFrame({
        'solID': ['model1', 'model1', 'model1', 'model2', 'model2', 'model2'],
        'type': ['adstockedMedia'] * 6,
        'tv_spend': np.random.rand(6) * 100,
        'radio_spend': np.random.rand(6) * 50,
        'online_spend': np.random.rand(6) * 200
    })
)

# Create mock ParetoData
mock_pareto_data = MockParetoData(
    decomp_spend_dist=pd.DataFrame({
        'solID': ['model1', 'model1', 'model1', 'model2', 'model2', 'model2'],
        'rn': ['tv_spend', 'radio_spend', 'online_spend'] * 2,
        'mean_spend': np.random.rand(6) * 100,
        'total_spend': np.random.rand(6) * 1000,
        'xDecompAgg': np.random.rand(6) * 500
    }),
    result_hyp_param=pd.DataFrame({
        'solID': ['model1', 'model2'],
        'tv_spend_alphas': [0.5, 0.6],
        'tv_spend_gammas': [0.3, 0.4],
        'radio_spend_alphas': [0.4, 0.5],
        'radio_spend_gammas': [0.2, 0.3],
        'online_spend_alphas': [0.6, 0.7],
        'online_spend_gammas': [0.4, 0.5]
    }),
    x_decomp_agg=pd.DataFrame({
        'solID': ['model1', 'model1', 'model1', 'model2', 'model2', 'model2'],
        'rn': ['tv_spend', 'radio_spend', 'online_spend'] * 2,
        'coef': np.random.rand(6) * 0.1
    })
)

# %% [markdown]
# Now, let's create an instance of ParetoOptimizer and call the optimize function:

# %%
pareto_optimizer = ParetoOptimizer(mock_mmm_data, mock_model_outputs)
result = pareto_optimizer.optimize(mock_pareto_data)

# %% [markdown]
# Let's examine the results:

# %%
print("Keys in the result dictionary:")
print(result.keys())

print("\nShape of decomp_spend_dist:")
print(result['decomp_spend_dist'].shape)

print("\nColumns in decomp_spend_dist:")
print(result['decomp_spend_dist'].columns)

print("\nShape of x_decomp_agg:")
print(result['x_decomp_agg'].shape)

print("\nColumns in x_decomp_agg:")
print(result['x_decomp_agg'].columns)

# %% [markdown]
# Let's check if the required calculations were performed:

# %%
required_columns = ['roi_mean', 'roi_total', 'cpa_mean', 'cpa_total', 'mean_response', 'mean_spend_adstocked', 'mean_carryover']

print("Required columns present in decomp_spend_dist:")
for col in required_columns:
    print(f"{col}: {col in result['decomp_spend_dist'].columns}")

print("\nRequired columns present in x_decomp_agg:")
for col in required_columns:
    print(f"{col}: {col in result['x_decomp_agg'].columns}")

# %% [markdown]
# Let's verify that the calculations are correct for a few rows:

# %%
def verify_calculations(row):
    assert np.isclose(row['roi_mean'], row['mean_response'] / row['mean_spend']), "ROI mean calculation is incorrect"
    assert np.isclose(row['roi_total'], row['xDecompAgg'] / row['total_spend']), "ROI total calculation is incorrect"
    assert np.isclose(row['cpa_mean'], row['mean_spend'] / row['mean_response']), "CPA mean calculation is incorrect"
    assert np.isclose(row['cpa_total'], row['total_spend'] / row['xDecompAgg']), "CPA total calculation is incorrect"

# Check calculations for the first few rows
for _, row in result['decomp_spend_dist'].head().iterrows():
    verify_calculations(row)

print("Calculations verified for the first few rows.")

# %% [markdown]
# Finally, let's check if the parallel processing was used (if applicable):

# %%
if mock_model_outputs.cores > 1:
    print(f"Parallel processing with {mock_model_outputs.cores} cores should have been used.")
else:
    print("Serial processing should have been used.")

# %% [markdown]
# This notebook provides a basic test of the ParetoOptimizer's optimize function. It creates mock data, calls the optimize function, and performs some basic checks on the output. You may want to add more specific tests based on your expected behavior and edge cases.

ModuleNotFoundError: No module named 'robyn'