In [None]:
import pandas as pd
import numpy as np
from vowpalwabbit import pyvw
import matplotlib.pyplot as plt
import math
import slates
%matplotlib inline

In [None]:
actions = [list(range(10)),list(range(10))]
slate_action_sets = [["x="+str(i) for i in actions[0]],["y="+str(i) for i in actions[1]]]
cb_action_set = slates.combine(actions, ["x", "y"])
print(len(cb_action_set))

slate_args = "--ccb_explore_adf --cb_type ips --power_t 0 -l 0.000001 --slate --quiet --epsilon 0.2 --interactions :: UAS"
cb_args = "--cb_explore_adf --cb_type ips --power_t 0 -l 0.000001 --quiet --epsilon 0.2 --interactions ::"

In [None]:
NUM_ITER = 10000
NUM_ROUNDS = 10
EXAMPLES_IN_EACH_CHECKPOINT=100

In [None]:
outcomes = dict()

In [None]:
def cost_func_slates(prediction):
    if prediction[0][0][0] == 3 and prediction[1][0][0] == 3:
        return -1
    else:
        return 1
    
def cost_func_cb(chosen_action):
    if chosen_action == "x=3 y=3":
        return -1
    else:
        return 1

# Slates

In [None]:
from collections import defaultdict

cost_dicts = defaultdict(list)
current_cost_dict = defaultdict(int)

for num in range(NUM_ROUNDS):
    print('Iter:',num)
    model = pyvw.vw(slate_args)
    slates_outcomes = []
    cost_dict = defaultdict(int)
    for i in range(1,1+NUM_ITER):
        shared_context = "constant"
        examples = slates.create_slates_example(model, shared_context, slate_action_sets)
#         print("\n".join(examples))
#         break
        pred = slates.slate_pred_conv(model.predict(examples, prediction_type=pyvw.pylibvw.vw.pDECISION_SCORES))
        model.finish_example(examples)

        # Choose the slot to sample
        chosen_slot = np.random.choice(len(pred))
        slot_to_sample = pred[chosen_slot]
        # Sample an index from this slot
        index = slates.sample_index(slot_to_sample)
        # Swap sampled action if it was not the 0th item.
        if index != 0:
            slot_to_sample[0], slot_to_sample[index] = slot_to_sample[index], slot_to_sample[0]
        # Assign the potentially modified slot back into the prediction
        pred[chosen_slot] = slot_to_sample

        cost = cost_func_slates(pred)
        x_outcome = (pred[0][0][0], cost, pred[0][0][1])
        y_outcome = (pred[0][0][0], cost, pred[1][0][1])
        if(pred[0][0][1] > 0.5 and pred[1][0][1] > 0.5):
            slates_outcomes.append((x_outcome, y_outcome))

        current_cost_dict[cost] += 1
        if i % EXAMPLES_IN_EACH_CHECKPOINT == 0:
            print(i, current_cost_dict)
            cost_dicts[i].append(current_cost_dict[-1])
            current_cost_dict = defaultdict(int)

        examples = slates.create_slates_example(model, shared_context, slate_action_sets,  [x_outcome,y_outcome])
        model.learn(examples)
        model.finish_example(examples)

outcomes[slate_args] = cost_dicts

# Combinatorial CB

In [None]:
from collections import defaultdict

cost_dicts = defaultdict(list)
current_cost_dict = defaultdict(int)

for num in range(NUM_ROUNDS):
    
    print('Iter:',num)
    cb_model = pyvw.vw(cb_args)
    # --interactions UUU AAA UUA AAU UUUA UUUAA UUUAAA"

    cb_outcomes = []
    current_cost_dict = defaultdict(int)

    for i in range(1,1 + NUM_ITER):
        shared_context = "constant"
        examples = slates.create_cb_example(cb_model, shared_context, cb_action_set)   
        pred = cb_model.predict(examples, prediction_type=pyvw.pylibvw.vw.pACTION_SCORES)
        cb_model.finish_example(examples)

        # Sample
        chosen_action_index = np.random.choice(len(pred), p=slates.normalize(pred))
        cost = cost_func_cb(cb_action_set[chosen_action_index])
        chosen_pred = pred[chosen_action_index]

        outcome = (chosen_action_index, cost, chosen_pred)
        current_cost_dict[cost] += 1
        if(chosen_pred > 0.5):
            cb_outcomes.append(outcome)
        if i % EXAMPLES_IN_EACH_CHECKPOINT == 0:
            print(i, current_cost_dict)
            cost_dicts[i].append(current_cost_dict[-1])
            current_cost_dict = defaultdict(int)       
                        
        examples = slates.create_cb_example(cb_model, shared_context, cb_action_set, outcome=outcome)
        cb_model.learn(examples)
        cb_model.finish_example(examples)
outcomes[cb_args] = cost_dicts

In [None]:
plt.figure(figsize=(20, 8))
number_of_samples = max(len(cb_outcomes), len(slates_outcomes))
plt.plot(pd.Series([cost for ((idx,cost,prob),(_,_,_)) in slates_outcomes]).rolling(100, min_periods=0).mean(), label="slate")
plt.plot(pd.Series([cost for (idx,cost,prob) in cb_outcomes]).rolling(100, min_periods=0).mean(), label="combinatorial")

plt.ylabel("Cost")
# Plot optimal policy by sampling optimal policy number_of_samples times
optimal_policy_results = [-1 for i in range(number_of_samples)]
plt.plot(pd.Series(optimal_policy_results).rolling(5, min_periods=0).mean(), color='b', linestyle=':', label="best")

plt.legend(loc="upper right")
plt.show()

In [None]:
plt.figure(figsize=(20, 10))
for args,results in outcomes.items():
    x = [x for x, y_list in results.items()]
    y = [np.average(y_list)/EXAMPLES_IN_EACH_CHECKPOINT for x, y_list in results.items()]
    error = [np.std(y_list)/EXAMPLES_IN_EACH_CHECKPOINT for x, y_list in results.items()]
    
    plt.ylim(top=1.1)

    plt.errorbar(x=x,y=y,yerr=error,label=args)
    plt.xticks(np.arange(min(x), max(x)+1, EXAMPLES_IN_EACH_CHECKPOINT*10))
   
plt.plot(list(range(100,10000)),[0.8 for i in range(100,10000)], color='b', linestyle=':')
plt.legend(loc="upper right")
plt.ylabel("Proportion of right actions chosen in last {} examples".format(EXAMPLES_IN_EACH_CHECKPOINT))
plt.xlabel("Number of examples trained")
plt.savefig('plot.svg', bbox_inches='tight')
plt.show()