In [1]:
import numpy as np
import pandas as pd

from mabwiser.mab import MAB, LearningPolicy

######################################################################################
#
# MABWiser
# Scenario: A/B Testing for Website Layout Design
#
# An e-commerce website experiments with 2 different layouts options for their homepage
# Each layouts decision leads to generating different revenues
#
# What should the choice of layouts be based on historical data?
#
######################################################################################


In [2]:
# Arms
options = [1, 2]

# Historical data of layouts decisions and corresponding rewards
layouts = [1, 1,    1,  2, 1, 2, 2, 1, 2,  1,  2, 2, 1, 2, 1]
revenues = [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]

# Arm to features
arm_to_features = {1: [0, 0, 1], 2: [1, 1, 0], 3: [1, 0, 1]}

In [3]:
###################################
# Epsilon Greedy Learning Policy
###################################

# Epsilon Greedy learning policy with random exploration set to 15%
greedy = MAB(arms=options,
             learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
             seed=123456)

# Learn from previous layouts decisions and revenues generated
greedy.fit(decisions=layouts, rewards=revenues)

In [4]:
print(greedy.neighborhood_policy)

None


In [5]:
# Predict the next best layouts decision
prediction = greedy.predict()

In [6]:
# Expected revenues of each layouts learnt from historical data based on epsilon greedy policy
expectations = greedy.predict_expectations()

In [7]:
# Results
print("Epsilon Greedy: ", prediction, " ", expectations)
assert(prediction == 2)

Epsilon Greedy:  2   {1: 10.875, 2: 14.714285714285714}


In [8]:
np.average([10,17, 22, 4, 8, 9, 7, 10])

10.875

In [9]:
np.average([9,0,7,20,50,5,12])

14.714285714285714

In [10]:
# Additional historical data becomes available which allows _online learning
additional_layouts = [1, 2, 1, 2]
additional_revenues = [0, 12, 7, 19]

# Online updating of the model
greedy.partial_fit(additional_layouts, additional_revenues)

In [11]:
greedy.predict_expectations() # これ何

{1: 0.9552527396157818, 2: 0.906050936603124}

In [12]:
# Adding a new layout option
greedy.add_arm(3)

arm:3


In [13]:
# Warm start new arm
greedy.warm_start(arm_to_features, distance_quantile=0.5)

copy
cold_arm_to_warm_arm:{3: 1}
cold:3, warm:1


In [14]:
greedy.predict_expectations()

{1: 9.4, 2: 14.88888888888889, 3: 9.4}

In [15]:
np.average([10,17, 22, 4, 8, 9, 7, 10, 0, 7])

9.4

In [16]:
np.average([9,0,7,20,50,5,12, 12, 19])

14.88888888888889