In [1]:
num_experiments = 10
observation_sizes = [100, 1000, 5000]
num_cv = 5

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics.pairwise import rbf_kernel
from scipy.spatial.distance import pdist
from Environment import AvgEnvironment
from Policy import MultinomialPolicy
from ParameterSelector import ParameterSelector
# from kernel import Exponential, Gaussian, Linear, Polynomial, RationalQuadratic
from Estimator_CPME import *

2025-05-18 17:49:14.733288: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747590555.689942 2316174 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747590556.050538 2316174 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747590558.338777 2316174 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747590558.338879 2316174 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747590558.338885 2316174 computation_placer.cc:177] computation placer alr

In [2]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
config = {
    "n_users": 50,
    "n_items": 20,
    "context_dim": 10,
    "n_reco": 4,
}

obs_size = 5000
num_iter = 5

In [4]:
# === Generate environment ===
user_vectors = np.random.normal(0, 1, size=(config["n_users"], config["context_dim"]))
target_user_vectors = user_vectors * np.random.binomial(1, 0.5, size=user_vectors.shape)
item_vectors = np.random.normal(0, 1, size=(config["n_items"], config["context_dim"]))

alpha = -0.3
logging_user_vectors = alpha * target_user_vectors

logging_policy = MultinomialPolicy(item_vectors, logging_user_vectors, config["n_items"], config["n_reco"], temperature=0.5, cal_gamma=True)
target_policy = MultinomialPolicy(item_vectors, target_user_vectors, config["n_items"], config["n_reco"], temperature=1.0, cal_gamma=False)
environment = AvgEnvironment(item_vectors, user_vectors)

seed = 0
np.random.seed(seed)

# === Generate simulation data ===
sim_data = []
for _ in range(obs_size):
    user = environment.get_context()

    logging_reco, logging_multinomial, logging_user_vector = logging_policy.recommend(user)
    target_reco, target_multinomial, _ = target_policy.recommend(user)

    sim_data.append({
        "null_context_vec": logging_user_vector,
        "target_context_vec": logging_user_vector,
        "null_reco": tuple(logging_reco),
        "null_reco_vec": np.concatenate(item_vectors[logging_reco]),
        "null_reward": environment.get_reward(user, logging_reco),
        "target_reco": tuple(target_reco),
        "target_multinomial": target_multinomial,
        "target_reco_vec": np.concatenate(item_vectors[target_reco]),
        "target_reward": environment.get_reward(user, target_reco),
        "null_multinomial": logging_multinomial,
        "user": user,
    })

sim_data = pd.DataFrame(sim_data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


2025-05-18 17:49:44.226759: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-18 17:49:44.248971: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747590584.261513 2316242 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747590584.272633 2316242 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
E0000 00:00:1747590584.284686 2316243 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747590584.295953 2316243 cuda_blas.cc:1

[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   18.9s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   23.6s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   23.7s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   28.4s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   28.5s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   33.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   33.4s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   38.0s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   38.3s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   42.9s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   43.1s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:   47.7s
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   47.8s
[Paralle

In [54]:
# === Calculate results ===
actual_value = np.mean(sim_data["target_reward"])

estimator_name = "CMEbis"

if estimator_name == "IPS":
    estimator = IPSEstimator(logging_policy, target_policy, null_propensity_known = True)
elif estimator_name == "DirectEstimator":
    estimator = DirectEstimator()
    # parameter selection
    direct_selector = ParameterSelector(estimator)  # direct estimator
    params_grid = [(n_hiddens, 1024, 100) for n_hiddens in [50, 100, 150, 200]]
    direct_selector.select_from_propensity(sim_data, params_grid, logging_policy, target_policy)
    estimator = direct_selector.estimator
elif estimator_name == "DoublyRobustEstimator":
    estimator = DoublyRobustEstimator(logging_policy, target_policy, null_propensity_known = True)
    try:
        estimator.params = direct_selector.parameters  # doubly robust estimator
    except:
        pass
elif estimator_name == "CMEbis":
    estimator = CMEbis(rbf_kernel, rbf_kernel, params=[5e-8, 1.0, 1.0])
    cme_selector = ParameterSelector(estimator)  # cme estimator
    params_grid = [[(10.0 ** p) / obs_size, 1.0, 1.0] for p in np.arange(-9, -2, 1)]
    cme_selector.select_from_propensity(sim_data, params_grid, logging_policy, target_policy)
    estimator = cme_selector.estimator

elif estimator_name == "DoublyRobustbis":
    estimator = DoublyRobustbis(rbf_kernel, rbf_kernel, [5e-5, 1.0, 1.0], logging_policy, target_policy, null_propensity_known = True)
    estimator.params = cme_selector.estimator._params
    
# === Prepare features for reward estimators ===
logging_context_vec = np.vstack(sim_data["null_context_vec"].dropna().values)
logging_reco_vec = np.vstack(sim_data["null_reco_vec"].dropna().values)
logging_reward = sim_data["null_reward"].dropna().values

X_logging = np.hstack([logging_context_vec, logging_reco_vec])
# === Train reward models where needed ===
if isinstance(estimator, DirectEstimator):
    estimator.fit(X_logging, logging_reward, n_hidden_units=estimator.params[0], batch_size=1024, epochs=100)
if isinstance(estimator, DoublyRobustEstimator):
    estimator.fit(X_logging, logging_reward)
est_value = estimator.estimate(sim_data)
mse = (est_value - actual_value) ** 2
print("MSE : {}".format(mse))

MSE : 0.0005904208689124463


In [27]:
estimator._params

[np.float64(2e-13), 1.0, 1.0]

In [49]:
abs(2e-13 - 10 ** -7)

9.99998e-08