### Check wheter expected interaction reward matches with environment reward

In [None]:
import json
from Environment import Environment
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from Model.Evaluator.OneStepEvaluator import OneStepEvaluator

from Model.Product import *
import numpy as np

# ==== PARAMETERS TO CHANGE ====
config_path = "./Configs/config1.json"
RANDOM_ARM = False
arm = [1, 2, 0, 1, 0]
n_experiments = 500
# ==============================

f = open(config_path)
config = json.load(f)
f.close()

env = Environment(config_path=config_path)
marginsPerPrice = config["margins"]

if RANDOM_ARM:
    arm = np.floor(np.random.rand(len(marginsPerPrice)) * len(marginsPerPrice[0]))
    arm = np.array(arm.tolist(), dtype=int)

margins = [marginsPerPrice[i][arm[i]] for i in range(0,len(arm))]
# print(margins)
obtained_margins = []

conf_classes = config["classes"]
for uc in conf_classes:
    armConvRates = [uc["conversionRates"][i][arm[i]] for i in range(0,len(arm))]
    productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
    eval = GraphEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], convert_units=True, verbose=False)
    baseline = Baseline(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], convert_units=True, verbose=False)
    oneStep = OneStepEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], verbose=False)

env.setPriceLevels(arm)
for i in range(0,n_experiments):
  inters = env.round()
  total = 0
  for inter in inters:
    total += inter.linearizeMargin(marginsPerPrice)
    obtained_margins.append(inter.linearizeMargin(marginsPerPrice))
  total = total / len(inters)
  # obtained_margins.append(total)

print("SINGLE ITERATION/SESSION REWARDS FOR CONFIG {}:".format(arm))
print("   - [EMPIRICAL] Mean reward ({} experiments from env): {}".format(n_experiments, np.array(obtained_margins).mean()))
print("   - [THEORETICAL] Graph expected reward: {}".format(eval.computeMargin()))
print("   - [THEORETICAL] Baseline expected reward: {}".format(baseline.computeMargin()))
print("   - [THEORETICAL] One-Step expected reward: {}".format(oneStep.computeMargin()))


### Compute the best arm for each class by brute force

In [None]:
import json
from Environment import Environment
import numpy as np
from Learner.BruteForce import *
from Model.UserClass import *
from Model.Product import *
from Model.GraphProbabilities import *
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from tqdm import tqdm
from IPython.display import clear_output



# ==== PARAMETERS TO CHANGE ====
CONFIG_PATH = './Configs/configuration4.json'
# ==============================


f = open(CONFIG_PATH)
config = json.load(f)
f.close()

opt_arms = []
opt_margins = []
daily_users = []
print("Starting the analysis ...\n")
for k in range(0, len(config["classes"])):
    uc = config["classes"][k]

    productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]

    conversionRateLevels = uc["conversionRates"]
    marginsPerPrice = config["margins"]
    click_prob = np.array(uc["clickProbability"])
    lambda_p = uc["lambda"]
    alphas = uc["alphas"]
    units_mean = uc["unitsShape"]
    # Early transform for efficiency reason
    actual_means = []
    for i in range(0,len(units_mean)):
        empiric_mean = np.ceil(np.random.gamma(units_mean[i], 1, size=1000000)).mean()
        actual_means.append(int(empiric_mean*100) / 100)
    units_mean = actual_means

    daily_users.append(uc["usersMean"])
    num_prices = len(conversionRateLevels[0])
    num_prods = len(alphas)

    print("Brute forcing class {}".format(k))
    bf = BruteForce(num_prices=num_prices, num_products=num_prods)
    for i in tqdm(range(0, num_prices**num_prods)):
        pulledArm = bf.pull_arm()
        margins = []
        convRates = []
        for k in range(0,len(pulledArm)):
            margins.append(marginsPerPrice[k][pulledArm[k]])
            convRates.append(conversionRateLevels[k][pulledArm[k]])

        price_configuration_margin = 0
        eval = GraphEvaluator(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, convert_units=False, verbose=False)
        eval2 = Baseline(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, convert_units=False, verbose=False)

        overall_margin = eval.computeMargin()
        baseline = eval2.computeMargin()
        # print("Configuration {}; ConvRates {}; Margins {}; Overall Margin {}; Baseline {}".format(pulledArm,convRates,margins,int(overall_margin*100)/100,int(baseline*100)/100))
        # if overall_margin < baseline:
            # print("VAFFANCULOOOOO {} - {} = {}".format(overall_margin,baseline,overall_margin-baseline))
        bf.update(overall_margin)

    opt_arms.append(bf.get_optima())
    opt_margins.append(bf.get_optima_margin())
clear_output(wait=True)
print("BRUTE FORCE OF CONFIG {} CLASSES:".format(CONFIG_PATH))
for i in range(0,len(opt_arms)):
    print("   - [CLASS {}] Optimal arm is {} with margin {}".format(i,opt_arms[i], opt_margins[i]))

daily_users = np.array(daily_users)
classes_weights = daily_users / daily_users.sum()
opt_margins = np.array(opt_margins)
print("\nThe optimal weighted mean expected margin given the mean daily users {} is {}".format(daily_users, np.multiply(classes_weights, opt_margins).sum()))

# Best single arm possible
print("\nWhich arm among the best ones gives the better results in non contextual optmization?")
equal_arm_rew = []
for i in range(0,len(opt_arms)):
    arm = opt_arms[i]
    class_rewards = []
    for k in range(0, len(config["classes"])):
        uc = config["classes"][k]
        productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
        conversionRateLevels = uc["conversionRates"]
        marginsPerPrice = config["margins"]
        click_prob = np.array(uc["clickProbability"])
        lambda_p = uc["lambda"]
        alphas = uc["alphas"]
        units_mean = uc["unitsShape"]
        num_prices = len(conversionRateLevels[0])
        num_prods = len(alphas)

        pulledArm = arm
        margins = []
        convRates = []
        for k in range(0,len(pulledArm)):
            margins.append(marginsPerPrice[k][pulledArm[k]])
            convRates.append(conversionRateLevels[k][pulledArm[k]])

        eval = GraphEvaluator(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, verbose=False)
        eval2 = Baseline(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, verbose=False)
                    
        class_rewards.append(eval.computeMargin())
    
    weighted_reward = np.multiply(classes_weights, np.array(class_rewards)).sum()
    equal_arm_rew.append(weighted_reward)
    if class_rewards[i] < eval2.computeMargin():
        print("   - [ARM {}] Class-weighted expected margin is {}, but baseline is greater than weighted".format(arm, weighted_reward))
    else:
        print("   - [ARM {}] Class-weighted expected margin is {}".format(arm, weighted_reward))


### Average distance from Environment data and GraphEvaluator

In [5]:
import json
from Environment import Environment
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from Model.Evaluator.OneStepEvaluator import OneStepEvaluator

from Model.Product import *
import numpy as np

# ==== PARAMETERS TO CHANGE ====
config_path = "./Configs/config1.json"
RANDOM_ARM = False
n_experiments = 250
# ==============================

f = open(config_path)
config = json.load(f)
f.close()

env = Environment(config_path=config_path)
marginsPerPrice = config["margins"]

evaluatorEnvDifference = []

bf = BruteForce(num_prices=num_prices, num_products=num_prods)
for i in tqdm(range(0, num_prices**num_prods)):
  arm = bf.pull_arm() 
  margins = [marginsPerPrice[i][arm[i]] for i in range(0,len(arm))]
  # print(margins)
  obtained_margins = []

  conf_classes = config["classes"]
  for uc in conf_classes:
      armConvRates = [uc["conversionRates"][i][arm[i]] for i in range(0,len(arm))]
      productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
      eval = GraphEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], convert_units=False, verbose=False)
      baseline = Baseline(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], convert_units=False, verbose=False)
      oneStep = OneStepEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], verbose=False)

  env.setPriceLevels(arm)
  for i in range(0,n_experiments):
    inters = env.round()
    total = 0
    for inter in inters:
      total += inter.linearizeMargin(marginsPerPrice)
      obtained_margins.append(inter.linearizeMargin(marginsPerPrice))
    total = total / len(inters)
    # obtained_margins.append(total)
  
  environmentMean = np.array(obtained_margins).mean()
  # Percentage error of graph eval from environment
  evaluatorEnvDifference.append(abs(environmentMean - eval.computeMargin()) / environmentMean)

evaluatorEnvDifference = np.array(evaluatorEnvDifference)
print("PERCENTAGE ERROR OF GRAPH EVALUATOR FROM ENVIRONMENT ({}):".format(config_path))
print("   - [MEAN] {}".format(evaluatorEnvDifference.mean()))
print("   - [STD] {}".format(evaluatorEnvDifference.std()))
print("   - [MAX] {}".format(evaluatorEnvDifference.max()))
print("   - [MIN] {}".format(evaluatorEnvDifference.min()))


100%|██████████| 1024/1024 [24:00<00:00,  1.41s/it]

PERCENTAGE ERROR OF GRAPH EVALUATOR FROM ENVIRONMENT (./Configs/config1.json):
   - [MEAN] 0.022692850664425425
   - [STD] 0.01767915360945945
   - [MAX] 0.09019686027980131
   - [MIN] 4.097997882295597e-06



