### Check wheter expected interaction reward matches with environment reward

In [7]:
import json
from Environment import Environment
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from Model.Evaluator.OneStepEvaluator import OneStepEvaluator

from Model.Product import *
import numpy as np

# ==== PARAMETERS TO CHANGE ====
config_path = "./Configs/config3.json"
RANDOM_ARM = True
arm = [1, 2, 0, 1, 0]
n_experiments = 500
# ==============================

f = open(config_path)
config = json.load(f)
f.close()

env = Environment(config_path=config_path)
marginsPerPrice = config["margins"]

if RANDOM_ARM:
    arm = np.floor(np.random.rand(len(marginsPerPrice)) * len(marginsPerPrice[0]))
    arm = np.array(arm.tolist(), dtype=int)

margins = [marginsPerPrice[i][arm[i]] for i in range(0,len(arm))]
# print(margins)
obtained_margins = []

conf_classes = config["classes"]
for uc in conf_classes:
    armConvRates = [uc["conversionRates"][i][arm[i]] for i in range(0,len(arm))]
    productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
    eval = GraphEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], convert_units=True, verbose=False)
    baseline = Baseline(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], convert_units=True, verbose=False)
    oneStep = OneStepEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=margins, units_mean=uc["unitsShape"], verbose=False)

env.setPriceLevels(arm)
for i in range(0,n_experiments):
  inters = env.round()
  total = 0
  for inter in inters:
    total += inter.linearizeMargin(marginsPerPrice)
    obtained_margins.append(inter.linearizeMargin(marginsPerPrice))
  total = total / len(inters)
  # obtained_margins.append(total)

print("SINGLE ITERATION/SESSION REWARDS FOR CONFIG {}:".format(arm))
print("   - [EMPIRICAL] Mean reward ({} experiments from env): {}".format(n_experiments, np.array(obtained_margins).mean()))
print("   - [THEORETICAL] Graph expected reward: {}".format(eval.computeMargin()))
print("   - [THEORETICAL] Baseline expected reward: {}".format(baseline.computeMargin()))
print("   - [THEORETICAL] One-Step expected reward: {}".format(oneStep.computeMargin()))


SINGLE ITERATION/SESSION REWARDS FOR CONFIG [3 2 0 1 0]:
   - [EMPIRICAL] Mean reward (500 experiments from env): 22.06977460450791
   - [THEORETICAL] Graph expected reward: 38.8
   - [THEORETICAL] Baseline expected reward: 19.290399999999998
   - [THEORETICAL] One-Step expected reward: 32.800000000000004


### Compute the best arm for each class by brute force

In [1]:
import json
from Environment import Environment
import numpy as np
from Learner.BruteForce import *
from Model.UserClass import *
from Model.Product import *
from Model.GraphProbabilities import *
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from tqdm import tqdm
from IPython.display import clear_output



# ==== PARAMETERS TO CHANGE ====
CONFIG_PATH = './Configs/configuration4.json'
# ==============================


f = open(CONFIG_PATH)
config = json.load(f)
f.close()

opt_arms = []
opt_margins = []
daily_users = []
print("Starting the analysis ...\n")
for k in range(0, len(config["classes"])):
    uc = config["classes"][k]

    productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]

    conversionRateLevels = uc["conversionRates"]
    marginsPerPrice = config["margins"]
    click_prob = np.array(uc["clickProbability"])
    lambda_p = uc["lambda"]
    alphas = uc["alphas"]
    units_mean = uc["actualUnitsMean"]
    # Early transform for efficiency reason
    # actual_means = []
    # for i in range(0,len(units_mean)):
    #    empiric_mean = np.ceil(np.random.gamma(units_mean[i], 1, size=1000000)).mean()
    #     actual_means.append(int(empiric_mean*100) / 100)
    # units_mean = actual_means

    daily_users.append(uc["usersMean"])
    num_prices = len(conversionRateLevels[0])
    num_prods = len(alphas)

    print("Brute forcing class {}".format(k))
    bf = BruteForce(num_prices=num_prices, num_products=num_prods)
    for i in tqdm(range(0, num_prices**num_prods)):
        pulledArm = bf.pull_arm()
        margins = []
        convRates = []
        for k in range(0,len(pulledArm)):
            margins.append(marginsPerPrice[k][pulledArm[k]])
            convRates.append(conversionRateLevels[k][pulledArm[k]])

        price_configuration_margin = 0
        eval = GraphEvaluator(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, convert_units=False, verbose=False)
        eval2 = Baseline(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, convert_units=False, verbose=False)

        overall_margin = eval.computeMargin()
        baseline = eval2.computeMargin()
        # print("Configuration {}; ConvRates {}; Margins {}; Overall Margin {}; Baseline {}".format(pulledArm,convRates,margins,int(overall_margin*100)/100,int(baseline*100)/100))
        # if overall_margin < baseline:
            # print("VAFFANCULOOOOO {} - {} = {}".format(overall_margin,baseline,overall_margin-baseline))
        bf.update(overall_margin)

    opt_arms.append(bf.get_optima())
    opt_margins.append(bf.get_optima_margin())
clear_output(wait=True)
print("BRUTE FORCE OF CONFIG {} CLASSES:".format(CONFIG_PATH))
for i in range(0,len(opt_arms)):
    print("   - [CLASS {}] Optimal arm is {} with margin {}".format(i,opt_arms[i], opt_margins[i]))

daily_users = np.array(daily_users)
classes_weights = daily_users / daily_users.sum()
opt_margins = np.array(opt_margins)
print("\nThe optimal weighted mean expected margin given the mean daily users {} is {}".format(daily_users, np.multiply(classes_weights, opt_margins).sum()))

# Best single arm possible
print("\nWhich arm among the best ones gives the better results in non contextual optmization?")
equal_arm_rew = []
for i in range(0,len(opt_arms)):
    arm = opt_arms[i]
    class_rewards = []
    for k in range(0, len(config["classes"])):
        uc = config["classes"][k]
        productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
        conversionRateLevels = uc["conversionRates"]
        marginsPerPrice = config["margins"]
        click_prob = np.array(uc["clickProbability"])
        lambda_p = uc["lambda"]
        alphas = uc["alphas"]
        units_mean = uc["unitsShape"]
        num_prices = len(conversionRateLevels[0])
        num_prods = len(alphas)

        pulledArm = arm
        margins = []
        convRates = []
        for k in range(0,len(pulledArm)):
            margins.append(marginsPerPrice[k][pulledArm[k]])
            convRates.append(conversionRateLevels[k][pulledArm[k]])

        eval = GraphEvaluator(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, verbose=False)
        eval2 = Baseline(products_list=productList, click_prob_matrix=click_prob, lambda_prob=lambda_p, conversion_rates=convRates,
                        alphas=alphas, margins=margins, units_mean=units_mean, verbose=False)
                    
        class_rewards.append(eval.computeMargin())
    
    weighted_reward = np.multiply(classes_weights, np.array(class_rewards)).sum()
    equal_arm_rew.append(weighted_reward)
    if class_rewards[i] < eval2.computeMargin():
        print("   - [ARM {}] Class-weighted expected margin is {}, but baseline is greater than weighted".format(arm, weighted_reward))
    else:
        print("   - [ARM {}] Class-weighted expected margin is {}".format(arm, weighted_reward))


BRUTE FORCE OF CONFIG ./Configs/configuration4.json CLASSES:
   - [CLASS 0] Optimal arm is [3, 3, 3, 3, 3] with margin 34.5
   - [CLASS 1] Optimal arm is [0, 3, 0, 3, 3] with margin 25.150000000000002
   - [CLASS 2] Optimal arm is [3, 3, 3, 3, 3] with margin 32.800000000000004

The optimal weighted mean expected margin given the mean daily users [30 30 20] is 30.56875

Which arm among the best ones gives the better results in non contextual optmization?
   - [ARM [3, 3, 3, 3, 3]] Class-weighted expected margin is 32.35625
   - [ARM [0, 3, 0, 3, 3]] Class-weighted expected margin is 30.50625
   - [ARM [3, 3, 3, 3, 3]] Class-weighted expected margin is 32.35625


### Average distance from Environment data and GraphEvaluator

In [14]:
import json
from Environment import Environment
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from Model.Evaluator.OneStepEvaluator import OneStepEvaluator
from Learner.BruteForce import *
from Model.Product import *
import numpy as np
from tqdm import tqdm
from IPython.display import clear_output

# ==== PARAMETERS TO CHANGE ====
config_path = "./Configs/config3.json"
RANDOM_ARM = False
n_experiments = 100
# ==============================

f = open(config_path)
config = json.load(f)
f.close()

env = Environment(config_path=config_path)
marginsPerPrice = config["margins"]
num_prods = len(config["margins"])
num_prices = len(config["margins"][0])

evaluatorEnvDifference = []

bf = BruteForce(num_prices=num_prices, num_products=num_prods)
for i in tqdm(range(0, num_prices**num_prods)):
  arm = bf.pull_arm() 
  margins = [marginsPerPrice[i][arm[i]] for i in range(0,len(arm))]
  # print(margins)
  obtained_margins = []

  conf_classes = config["classes"]
  for uc in conf_classes:
      armConvRates = [uc["conversionRates"][i][arm[i]] for i in range(0,len(arm))]
      productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
      eval = GraphEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], convert_units=False, verbose=False)
      baseline = Baseline(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], convert_units=False, verbose=False)
      oneStep = OneStepEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                  alphas=uc["alphas"], margins=margins, units_mean=uc["actualUnitsMean"], verbose=False)

  env.setPriceLevels(arm)
  for i in range(0,n_experiments):
    inters = env.round()
    total = 0
    for inter in inters:
      total += inter.linearizeMargin(marginsPerPrice)
      obtained_margins.append(inter.linearizeMargin(marginsPerPrice))
    total = total / len(inters)
    # obtained_margins.append(total)
  
  environmentMean = np.array(obtained_margins).mean()
  # Percentage error of graph eval from environment
  evaluatorEnvDifference.append(abs(environmentMean - eval.computeMargin()) / environmentMean)

clear_output(wait=True)
evaluatorEnvDifference = np.array(evaluatorEnvDifference)
print("PERCENTAGE ERROR OF GRAPH EVALUATOR FROM ENVIRONMENT ({}):".format(config_path))
print("   - [MEAN] {}".format(evaluatorEnvDifference.mean()))
print("   - [STD] {}".format(evaluatorEnvDifference.std()))
print("   - [MAX] {}".format(evaluatorEnvDifference.max()))
print("   - [MIN] {}".format(evaluatorEnvDifference.min()))


PERCENTAGE ERROR OF GRAPH EVALUATOR FROM ENVIRONMENT (./Configs/config3.json):
   - [MEAN] 0.5032961892238179
   - [STD] 0.23131999345319565
   - [MAX] 1.363867790668559
   - [MIN] 0.03304399780923764


### Graph Evaluator vs Environment visiting probability

In [30]:
import json
from Environment import Environment
from Model.Evaluator.GraphEvaluator import GraphEvaluator
from Model.Evaluator.Baseline import Baseline
from Model.Evaluator.OneStepEvaluator import OneStepEvaluator
from Learner.BruteForce import *
from Model.Product import *
import numpy as np
from tqdm import tqdm
from IPython.display import clear_output

# ==== PARAMETERS TO CHANGE ====
config_path = "./Configs/config3.json"
RANDOM_ARM = True
arm = [0, 0, 0, 0, 0]
n_experiments = 250
# ==============================

f = open(config_path)
config = json.load(f)
f.close()

env = Environment(config_path=config_path)
marginsPerPrice = config["margins"]
num_prods = len(config["margins"])
num_prices = len(config["margins"][0])

if RANDOM_ARM:
    arm = np.floor(np.random.rand(len(marginsPerPrice)) * len(marginsPerPrice[0]))
    arm = np.array(arm.tolist(), dtype=int)
env.setPriceLevels(arm)

cumulative_visits = np.zeros(num_prods)
cumulative_interactions = 0
for i in tqdm(range(0,n_experiments)):
    interactions = env.round()
    cumulative_interactions += len(interactions)
    for inter in interactions:
        cumulative_visits = cumulative_visits + inter.linearizeVisits()
clear_output(wait=True)

environment_probs = cumulative_visits / cumulative_interactions
print("DIFFERENCE FROM ENVIRONMENT AND GE VISITING PROBABILITY, ARM {} ({}):".format(arm, config_path))
print("   - [ENV] {}".format(environment_probs))


for j in range(0,num_prods):
    for k in range(0,len(config["classes"])):
        env.classes[k].alphas = np.full(num_prods, 0.000001).tolist()
        env.classes[k].alphas[j] = 1.00004
    cumulative_visits = np.zeros(num_prods)
    cumulative_interactions = 0
    for i in range(0,n_experiments):
        interactions = env.round()
        cumulative_interactions += len(interactions)
        for inter in interactions:
            cumulative_visits = cumulative_visits + inter.linearizeVisits()
    print("      - [STARTING {}] {}".format(j, cumulative_visits / cumulative_interactions))
print("")
conf_classes = config["classes"]
i = 0
weighted_classes = np.zeros((num_prods))
tot_users_daily = 0
for uc in conf_classes:
    armConvRates = [uc["conversionRates"][i][arm[i]] for i in range(0,len(arm))]
    productList = [Product(int(key), uc["secondary"][key]) for key in uc["secondary"]]
    eval = GraphEvaluator(products_list=productList, click_prob_matrix=uc["clickProbability"], lambda_prob=uc["lambda"], conversion_rates=armConvRates,
                alphas=uc["alphas"], margins=marginsPerPrice, units_mean=uc["actualUnitsMean"], convert_units=False, verbose=False)
    visit_prob = eval.getVisitingProbability()
    weighted_classes = weighted_classes + visit_prob * uc["usersMean"]
    tot_users_daily += uc["usersMean"]
    print("   - [G/E CLASS {}] {}".format(i, visit_prob))
    for k in range(0,num_prods):
        print("      - [STARTING {}] {}".format(k, eval.computeSingleProduct(k)))
    i += 1

print("")
ge_weighted_probs = weighted_classes / tot_users_daily
print("   - [G/E TOTAL] {}\n".format(ge_weighted_probs))

print("PERFORMANCE INDEX => {} => {}".format(np.abs(ge_weighted_probs - environment_probs), np.abs(ge_weighted_probs - environment_probs).mean()))



DIFFERENCE FROM ENVIRONMENT AND GE VISITING PROBABILITY, ARM [0 1 1 0 2] (./Configs/config3.json):
   - [ENV] [0.34301645 0.40334541 0.41683858 0.3604126  0.37914692]
      - [STARTING 0] [0.99994403 0.3489869  0.36986455 0.18319713 0.17625658]
      - [STARTING 1] [0.03267791 1.         0.24220757 0.12842141 0.18489554]
      - [STARTING 2] [0.13689744 0.10888044 1.         0.51219103 0.4247847 ]
      - [STARTING 3] [0.2581871  0.12982005 0.1066838  1.         0.29523863]
      - [STARTING 4] [0.01475042 0.17134044 0.04273696 0.06029164 1.        ]

   - [G/E CLASS 0] [0.52158577 0.64772205 0.38838941 0.19775595 0.24047966]
      - [STARTING 0] [1.         0.46615872 0.493344   0.21619248 0.23390821]
      - [STARTING 1] [0.02490137 1.         0.2403631  0.10350908 0.17698683]
      - [STARTING 2] [0.09891594 0.10980953 1.         0.408064   0.35468739]
      - [STARTING 3] [0.24       0.15742597 0.13659287 1.         0.28477755]
      - [STARTING 4] [0.01561942 0.18425088 0.04914614