In [None]:
# Corrected Item Size Experiment for Figure 4c
import warnings
warnings.filterwarnings("ignore")
# FAST Mode Settings
num_experiments = 10
item_sizes = [20, 40, 60, 80]
num_cv = 3

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics.pairwise import rbf_kernel
from scipy.spatial.distance import pdist
from Environment import AvgEnvironment
from Estimator import (
    # IPSEstimator,
    SlateEstimator,
    DirectEstimator,
    DoublyRobustEstimator,
    CMEstimator,
)
from Estimator_CPME import (
    IPSEstimator,
    CMEbis,
    DoublyRobustbis,
    BehaviorPolicyEstimator
)
from Policy import MultinomialPolicy
from ParameterSelector import ParameterSelector

np.random.seed(2)


# Helper function for simulation
def simulate_item_size(item_size, config, num_iter):
    results = []

    user_vectors = np.random.normal(
        0, 1, size=(config["n_users"], config["context_dim"])
    )
    target_user_vectors = user_vectors * np.random.binomial(
        1, 0.5, size=user_vectors.shape
    )
    item_vectors = np.random.normal(0, 1, size=(item_size, config["context_dim"]))

    alpha = -0.3
    null_user_vectors = alpha * target_user_vectors

    null_policy = MultinomialPolicy(
        item_vectors,
        null_user_vectors,
        item_size,
        config["n_reco"],
        temperature=0.5,
        cal_gamma=True,
    )
    logging_policy = null_policy
    target_policy = MultinomialPolicy(
        item_vectors,
        target_user_vectors,
        item_size,
        config["n_reco"],
        temperature=1.0,
        cal_gamma=False,
    )
    environment = AvgEnvironment(item_vectors, user_vectors)

    reg_pow = -1
    reg_params = (10.0**reg_pow) / config["n_observation"]
    bw_params = 10.0**0
    params = [reg_params, bw_params, bw_params]
    
    seeds = np.random.randint(np.iinfo(np.int32).max, size=num_iter)

    for seed in tqdm(seeds, desc=f"Item size {item_size}"):
        np.random.seed(seed)

        sim_data = []
        for _ in range(config["n_observation"]):
            user = environment.get_context()

            logging_reco, logging_multinomial, logging_user_vector = logging_policy.recommend(user)
            target_reco, target_multinomial, _ = target_policy.recommend(user)

            observation = {
                "logging_context_vec": logging_user_vector,
                "target_context_vec": logging_user_vector,
                "logging_reco": tuple(logging_reco),
                "logging_reco_vec": np.concatenate(item_vectors[logging_reco]),
                "logging_reward": environment.get_reward(user, logging_reco),
                "target_reco": tuple(target_reco),
                "target_multinomial": target_multinomial,
                "target_reco_vec": np.concatenate(item_vectors[target_reco]),
                "target_reward": environment.get_reward(user, target_reco),
                "logging_multinomial": logging_multinomial,
                "user": user,
            }

            sim_data.append(observation)

        sim_data = pd.DataFrame(sim_data)
        
        # === Prepare estimators ===
        behavior_estimator = BehaviorPolicyEstimator(item_size)
        print(item_size)
        user_features = np.vstack(sim_data["logging_context_vec"].values)
        print(user_features.shape)
        actions = [r[0] for r in sim_data["logging_reco"].values]  # Taking first item as action

        behavior_estimator.fit(user_features, actions)


        estimators = [
            IPSEstimator(behavior_estimator, target_policy),
            SlateEstimator(config["n_reco"], null_policy),
            DirectEstimator(),
            DoublyRobustEstimator(config["n_reco"], null_policy, target_policy),
            # CMEstimator(rbf_kernel, rbf_kernel, params),
            CMEbis(rbf_kernel, rbf_kernel, params),
            DoublyRobustbis(rbf_kernel, rbf_kernel, params, behavior_estimator, target_policy)
        ]
    
        direct_selector = ParameterSelector(estimators[2])
        params_grid = [(n_hiddens, 1024, 100) for n_hiddens in [50, 100, 150, 200]]
        direct_selector.select_from_propensity(
            sim_data, params_grid, behavior_estimator, target_policy
        )
        estimators[2] = direct_selector.estimator

        estimators[3].params = direct_selector.parameters

        cme_selector = ParameterSelector(estimators[4])
        params_grid = [
            [(10.0**p) / config["n_observation"], 1.0, 1.0] for p in np.arange(-6, 0, 1)
        ]
        cme_selector.select_from_propensity(
            sim_data, params_grid, behavior_estimator, target_policy
        )
        estimators[4] = cme_selector.estimator

        # drcme_selector = ParameterSelector(estimators[5])
        # params_grid = [
        #     [(10.0**p) / config["n_observation"], 1.0, 1.0] for p in np.arange(-6, 0, 1)
        # ]
        # drcme_selector.select_from_propensity(
        #     sim_data, params_grid, behavior_estimator, target_policy
        # )
        # estimators[5] = drcme_selector.estimator

        actual_value = np.mean(sim_data["target_reward"])

        for estimator in estimators:
            est_value = estimator.estimate(sim_data)
            mse = (est_value - actual_value) ** 2
            results.append(
                {"Estimator": estimator.name, "MSE": mse, "Item Size": item_size}
            )

    return pd.DataFrame(results)


# Updated Simulation configuration
config = {
    "n_users": 50,
    "context_dim": 10,
    "n_reco": 4,
    "n_observation": 500,
}

# Running the simulation
full_results = pd.concat(
    [simulate_item_size(s, config, num_experiments) for s in item_sizes]
)

# Plotting results
sns.set(style="whitegrid")
plt.figure(figsize=(8, 5))
sns.lineplot(data=full_results, x="Item Size", y="MSE", hue="Estimator", marker="o")
plt.yscale("log")
plt.title("Item Size vs MSE (FAST Mode)")
plt.tight_layout()
plt.show()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    5.4s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:   

Item size 20:   0%|                                                                         | 0/10 [00:00<?, ?it/s]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x104008730>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  10%|██████▌                                                          | 1/10 [00:11<01:45, 11.69s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x3044d41c0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  20%|█████████████                                                    | 2/10 [00:23<01:34, 11.86s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a728070>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  30%|███████████████████▌                                             | 3/10 [00:35<01:23, 11.87s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x306283ca0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  40%|██████████████████████████                                       | 4/10 [00:47<01:11, 11.90s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be5d360>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  50%|████████████████████████████████▌                                | 5/10 [00:59<00:59, 11.97s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8ee30>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


Item size 20:  60%|███████████████████████████████████████                          | 6/10 [01:11<00:48, 12.02s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bbf0160>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  70%|█████████████████████████████████████████████▌                   | 7/10 [01:23<00:36, 12.08s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d658970>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


Item size 20:  80%|████████████████████████████████████████████████████             | 8/10 [01:36<00:24, 12.13s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30a92f4f0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20:  90%|██████████████████████████████████████████████████████████▌      | 9/10 [01:48<00:12, 12.18s/it]

20
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d65b580>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 20: 100%|████████████████████████████████████████████████████████████████| 10/10 [02:00<00:00, 12.07s/it]


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   59.2s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   59.2s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   59.6s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   59.7s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   59.8s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   59.9s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  1

Item size 40:   0%|                                                                         | 0/10 [00:00<?, ?it/s]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30d249330>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


Item size 40:  10%|██████▌                                                          | 1/10 [00:13<02:03, 13.72s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30bee3550>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  20%|█████████████                                                    | 2/10 [00:26<01:44, 13.06s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31fa291b0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  30%|██████████████████▌                                           | 3/10 [30:42<1:37:35, 836.47s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30af5d6f0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  40%|███████████████████████▌                                   | 4/10 [1:19:52<2:47:04, 1670.83s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31ed2b2e0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


Item size 40:  50%|█████████████████████████████▌                             | 5/10 [1:51:09<2:25:26, 1745.30s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31f9e38e0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


Item size 40:  60%|███████████████████████████████████▍                       | 6/10 [2:14:43<1:48:50, 1632.54s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31da2e5f0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  70%|█████████████████████████████████████████▎                 | 7/10 [2:30:13<1:10:08, 1402.69s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31c83b160>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  80%|████████████████████████████████████████████████▊            | 8/10 [2:53:03<46:24, 1392.41s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x30be8db40>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40:  90%|██████████████████████████████████████████████████████▉      | 9/10 [3:42:04<31:16, 1876.53s/it]

40
(500, 10)
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>
<Estimator_CPME.BehaviorPolicyEstimator object at 0x31dd67cd0>
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


Item size 40: 100%|████████████████████████████████████████████████████████████| 10/10 [4:12:25<00:00, 1514.59s/it]


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
