#Step 7
Context generation

In [29]:
import copy

import numpy as np
import matplotlib.pyplot as plt

import Code.utils as utils
import Code.environment.settings as settings
from Code.environment.EnvironmentContextual import EnvironmentContextual
from Code.environment.Customer import Customer
from Code.UCBLearner4 import UCBLearner4
from Code.TSLearner4 import TSLearner4
from Code.ContextManager import ContextManager

%matplotlib inline

In [30]:
cust_json = "customer_classes.json"
today_customer = 100
var_cust = 0
p_lambda = 1.0
graph_json = "business_full_graph.json"
arms = settings.prices

environment = EnvironmentContextual(cust_json,today_customer,var_cust,p_lambda, graph_json, arms)

clairvoyant_arms, clairvoyant_reward, exp_c, rew_per_arm = environment.estimate_clairvoyant()

print("\n", clairvoyant_arms, clairvoyant_reward, exp_c, rew_per_arm.values())

[array([[0.3, 0.8, 0.5, 0.4],
       [0.5, 0.9, 0.6, 0.5],
       [0.2, 0.7, 0.1, 0.4],
       [0.1, 0.5, 0.8, 0.4],
       [0.9, 0.1, 0.6, 0.6]]), array([[0.3, 0.8, 0.5, 0.4],
       [0.5, 0.9, 0.6, 0.5],
       [0.2, 0.7, 0.1, 0.4],
       [0.1, 0.5, 0.8, 0.4],
       [0.9, 0.1, 0.6, 0.6]]), array([[0.3, 0.8, 0.5, 0.4],
       [0.5, 0.9, 0.6, 0.5],
       [0.2, 0.7, 0.1, 0.4],
       [0.1, 0.5, 0.8, 0.4],
       [0.9, 0.1, 0.6, 0.6]]), array([[0.3, 0.8, 0.5, 0.4],
       [0.5, 0.9, 0.6, 0.5],
       [0.2, 0.7, 0.1, 0.4],
       [0.1, 0.5, 0.8, 0.4],
       [0.9, 0.1, 0.6, 0.6]])]
loading: [                              ] 0.0%
loading: [=                             ] 1.0%
loading: [=                             ] 2.0%
loading: [=                             ] 2.9%
loading: [==                            ] 3.9%
loading: [==                            ] 4.9%
loading: [==                            ] 5.9%
loading: [===                           ] 6.8%
loading: [===                      

In [31]:
clairvoyant_arms

{(0, 0): [3, 3, 1, 2, 0],
 (0, 1): [3, 3, 1, 2, 0],
 (1, 0): [3, 3, 1, 2, 0],
 (1, 1): [3, 3, 1, 2, 0]}

In [32]:
clairvoyant_reward

{(0, 0): 922.5322227200003,
 (0, 1): 576.5826392000001,
 (1, 0): 230.63305568000007,
 (1, 1): 576.5826392000001}

In [33]:
rew_per_arm[(1, 0)][(3, 3, 3, 3, 3)]

200.04131776000006

In [34]:
aggregated_clairvoyant_reward = sum(clairvoyant_reward.values())
aggregated_clairvoyant_reward

2306.3305568000005

In [35]:
n_arms = 4
n_products = 5 

feature1 = 0
feature2 = 0

CONTEXT_INTERVAL = 14
TIME_HORIZON = 100
NUM_ITERATION = 1

In [36]:
#unknown parameters
alphas = np.array([0.2, 0.2, 0.2, 0.2, 0.2]) # uniform
prods_dist = np.ones((n_products,n_arms))   # all 1

customers = copy.deepcopy(environment.customers)

for customer in customers:
    customer.set_distribution_alpha(alphas)
    customer.set_num_prods(prods_dist)

UCB

In [38]:
rewards_iterations = []
aggregated_rewards_iterations  = []

for i in range(NUM_ITERATION):
    ucb4_learner = ContextManager(UCBLearner4, n_arms, n_products, customers, graph_json, arms, environment.customers_distribution)
    print('Iteration #', i+1)
    for t in range(TIME_HORIZON):
        if t % CONTEXT_INTERVAL == 0:
            ucb4_learner.build_context()
            print(str(ucb4_learner.tree))
        if t % 10 == 0:
            utils.progress_bar(t, TIME_HORIZON)
        super_arm = ucb4_learner.select_superarm()
        print(super_arm)
        report = environment.round(super_arm)
        ucb4_learner.update(super_arm.copy(), report)
    rewards = ucb4_learner.history_rewards
    rewards_iterations.append(np.array(rewards))
    aggregated_rewards_iterations.append(ucb4_learner.aggregate_rewards)
rewards_iterations = np.array(rewards_iterations)
rewards_iterations.shape

Iteration # 1


Process ForkPoolWorker-12425:
Process ForkPoolWorker-12430:
Process ForkPoolWorker-12427:
Process ForkPoolWorker-12428:
Process ForkPoolWorker-12426:
Process ForkPoolWorker-12432:
Traceback (most recent call last):
Process ForkPoolWorker-12431:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):


KeyboardInterrupt: 

In [None]:
ucb4_learner.build_context()
str(ucb4_learner.tree)

In [None]:
ucb4_learner.tree.l.l.learner.estimate_conversion_rates()

In [None]:
plt.figure(figsize=(10, 5))

mean = np.mean(aggregated_rewards_iterations, axis=0)
stdev = np.std(aggregated_rewards_iterations, axis=0)

plt.plot(range(len(mean)), mean)
plt.fill_between(range(len(mean)), (mean-stdev), (mean+stdev), color='b', alpha=.1)

plt.plot([_ for _ in range(len(rewards))], [aggregated_clairvoyant_reward for _ in range(len(rewards))])
plt.show()


In [None]:
features_list = list(clairvoyant_arms.keys())
rewards_iterations_tmp = {}
for features in features_list:
    rewards_iterations_tmp[features] = [[val[features] for val in row] for row in rewards_iterations]
rewards_iterations = rewards_iterations_tmp

In [None]:
for features in features_list:
    plt.figure(figsize=(10, 5))

    mean = np.mean(rewards_iterations[features], axis=0)
    stdev = np.std(rewards_iterations[features], axis=0)

    plt.plot(range(len(mean)), mean)
    plt.fill_between(range(len(mean)), (mean-stdev), (mean+stdev), color='b', alpha=.1)

    plt.plot([_ for _ in range(len(rewards))], [clairvoyant_reward[features] for _ in range(len(rewards))])
    plt.show()


In [None]:
for features in features_list:
    plt.figure(figsize=(8,5))

    regret = clairvoyant_reward[features] - rewards_iterations[features]
    mean_regret = np.mean(regret, axis=0)

    plt.plot(range(len(mean_regret)), mean_regret)
    plt.show()

In [None]:
for features in features_list:
    plt.figure(figsize=(8,5))
    regret = clairvoyant_reward[features] - rewards_iterations[features]
    cumulative_regret = np.cumsum(regret, axis=1)
    cum_reg_mean = np.mean(cumulative_regret, axis=0)

    plt.plot(range(len(cum_reg_mean)), cum_reg_mean)
    plt.show()

TS

In [None]:
rewards_iterations = []
aggregated_rewards_iterations = []
for i in range(NUM_ITERATION):
    ts_learner4 = ContextManager(TSLearner4, n_arms, n_products, customers, graph_json, arms, environment.customers_distribution)
    print('\nIteration #', i+1)
    for t in range(TIME_HORIZON):
        if t % 14 == 0:
            ts_learner4.build_context()
            print(str(ts_learner4.tree))

        if t % 10 == 0:
            utils.progress_bar(t, TIME_HORIZON)
        super_arm = ts_learner4.select_superarm()
        report = environment.round(super_arm)
        ts_learner4.update(super_arm.copy(), report)
    rewards = ts_learner4.history_rewards
    rewards_iterations.append(np.array(rewards))
    aggregated_rewards_iterations.append(ucb4_learner.aggregate_rewards)

rewards_iterations = np.array(rewards_iterations)
rewards_iterations.shape

In [None]:
features_list = list(clairvoyant_arms.keys())
rewards_iterations_tmp = {}
for features in features_list:
    rewards_iterations_tmp[features] = [[val[features] for val in row] for row in rewards_iterations]
rewards_iterations = rewards_iterations_tmp

In [None]:
plt.figure(figsize=(10, 5))

mean = np.mean(aggregated_rewards_iterations, axis=0)
stdev = np.std(aggregated_rewards_iterations, axis=0)

plt.plot(range(len(mean)), mean)
plt.fill_between(range(len(mean)), (mean-stdev), (mean+stdev), color='b', alpha=.1)

plt.plot([_ for _ in range(len(rewards))], [aggregated_clairvoyant_reward for _ in range(len(rewards))])
plt.show()


In [None]:
for features in features_list:
    plt.figure(figsize=(10, 5))
    mean = np.mean(rewards_iterations[features], axis=0)
    stdev = np.std(rewards_iterations[features], axis=0)

    plt.plot(range(len(mean)), mean)
    plt.fill_between(range(len(mean)), (mean-stdev), (mean+stdev), color='b', alpha=.1)

    plt.plot([_ for _ in range(len(rewards))], [clairvoyant_reward[features] for _ in range(len(rewards))])
    plt.show()

In [None]:
for features in features_list:
    plt.figure(figsize=(8,5))

    regret = clairvoyant_reward[features] - rewards_iterations[features]
    mean_regret = np.mean(regret, axis=0)

    plt.plot(range(len(mean_regret)), mean_regret)
    plt.show()

In [None]:
a = np.mean(regret[:,200:], axis=0)
np.shape(a)
print(np.mean(a))

In [None]:
for features in features_list:
    plt.figure(figsize=(8,5))
    regret = clairvoyant_reward[features] - rewards_iterations[features]
    cumulative_regret = np.cumsum(regret, axis=1)
    cum_reg_mean = np.mean(cumulative_regret, axis=0)

    plt.plot(range(len(cum_reg_mean)), cum_reg_mean)
    plt.show()