In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB, LearningPolicy, NeighborhoodPolicy

######################################################################################
#
# MABWiser
# Scenario: Advertisement Optimization
#
# An e-commerce website needs to solve the problem of which ad to display to online users
# Each advertisement decision leads to generating different revenues
#
# What should the choice of advertisement be given the context of an online user
# based on customer data such as age, click rate, subscriber?
#
######################################################################################

# Arms
ads = [1, 2, 3, 4, 5]

# Historical data of ad decisions with corresponding revenues and context information
train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                                     'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                                     'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                                     'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                                                    0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                                     'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                                     )

# Arm features for warm start
arm_to_features = {1: [0, 1, 1], 2: [0, 0.5, 0.5], 3: [1, 1, 0.5], 4: [0.2, 1, 0], 5: [0, 1, 0.1], 6: [0, 0.5, 0.5]}

# Test data to for new prediction
test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
test_df_revenue = pd.Series([7, 13])

# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(train_df[['age', 'click_rate', 'subscriber']])
test = scaler.transform(test_df)

##################################################
# Linear Upper Confidence Bound Learning Policy
##################################################

# LinUCB learning policy with alpha 1.25 and l2_lambda 1
linucb = MAB(arms=ads,
                             learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1))

# Learn from previous ads shown and revenues generated
linucb.fit(decisions=train_df['ad'], rewards=train_df['revenues'], contexts=train)

# Predict the next best ad to show
prediction = linucb.predict(test)

# Expectation of each ad based on learning from past ad revenues
expectations = linucb.predict_expectations(test)

# Results
print("LinUCB: ", prediction, " ", expectations)
assert(prediction == [5, 2])

# Online update of model
linucb.partial_fit(decisions=prediction, rewards=test_df_revenue, contexts=test)

# Update the model with new arm
linucb.add_arm(6)

# Warm start new arm
linucb.warm_start(arm_to_features, distance_quantile=0.75)

LinUCB:  [5, 2]   [{1: -1.3094012976959437, 2: 0.10514406010286892, 3: -1.9129767639752258, 4: -10.291130147595512, 5: 9.66516106096636}, {1: -4.524478702267455, 2: 14.652897134463046, 3: 4.52468441018687, 4: -3.257354449781908, 5: -8.684806295018186}]


In [43]:
from mabwiser.mab import MAB, LearningPolicy
from mabwiser.simulator import Simulator

arms = ['Arm1', 'Arm2']
decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1']
rewards = [20, 17, 25, 9]
mab1 = MAB(arms, LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=123456)
mab2 = MAB(arms, LearningPolicy.EpsilonGreedy(epsilon=0.30), seed=123456)
bandits = [('EG 25%', mab1), ('EG 30%', mab2)]
offline_sim = Simulator(bandits, decisions, rewards, test_size=0.5, batch_size=1)
offline_sim.run()
offline_sim

2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,680 INFO Simulation Parameters
2023-02-02 16:48:57,685 INFO 	 bandits: [('EG 25%', <mabwiser.mab.MAB object at 0x00000267BAD47DF0>), ('EG 30%', <mabwiser.mab.MAB object at 0x00000267BAD47A30>)]
2023-02-02 16:48:57,685 INFO 	 bandits: [('EG 25%', <mabwiser.mab.MAB object at 0x00000267BAD47DF0>), ('EG 30%', <mabwiser.mab.MAB object at 0x00000267BAD47A30>)]
2023-02-02 16:48:57,685 INFO 	 bandits: [('EG 25%', <mabwiser.mab.MAB object at 0x00000267BAD47DF0>), ('EG 30%', <mabwiser.mab.MAB object at 0x00000267BAD47A30>)]
2023-02-02 16:48:57,685 INFO 	 bandits: [('EG 25%', <mabwiser.mab.MAB object at 0x00000267BAD47DF0>), (

2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,764 INFO Train/Test Split
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,769 INFO Train size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test size: 2
2023-02-02 16:48:57,774 INFO Test si

2023-02-02 16:48:57,861 INFO EG 25% batch 0 confusion matrix: [[0 1]
 [0 0]]
2023-02-02 16:48:57,861 INFO EG 25% batch 0 confusion matrix: [[0 1]
 [0 0]]
2023-02-02 16:48:57,861 INFO EG 25% batch 0 confusion matrix: [[0 1]
 [0 0]]
2023-02-02 16:48:57,861 INFO EG 25% batch 0 confusion matrix: [[0 1]
 [0 0]]
2023-02-02 16:48:57,867 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,867 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,867 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,867 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': 

2023-02-02 16:48:57,896 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25.0, 'min': 25.0, 'max': 25.0, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,896 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25.0, 'min': 25.0, 'max': 25.0, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,896 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25.0, 'min': 25.0, 'max': 25.0, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,896 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25.0, 'min': 25.0, 'max': 25.0, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,896 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25.0, 'min': 25.0, 'max'

2023-02-02 16:48:57,929 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,929 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,929 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 1, 'sum': 25, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,934 INFO EG 25% updated
2023-02-02 16:48:57,939 INFO EG 30% batch 1 c

2023-02-02 16:48:57,969 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,969 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,969 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,969 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,972 INFO EG 25% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50.0, 'min': 25.0, 'max': 25.0, 'mean': 25.0, 's

2023-02-02 16:48:57,995 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,995 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,995 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,995 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0.0}}
2023-02-02 16:48:57,995 INFO EG 30% {'Arm1': {'count': 0, 'sum': nan, 'min': nan, 'max': nan, 'mean': nan, 'std': nan}, 'Arm2': {'count': 2, 'sum': 50, 'min': 25, 'max': 25, 'mean': 25.0, 'std': 0

<mabwiser.simulator.Simulator at 0x267bad479d0>

In [7]:
import pandas as pd
df = pd.read_csv('dat/choice_data.csv')
print(df)

     action   DB  DBVar  ND   AU  No  rain_1_3_c  rain_4_10_c  temp_1_3_c  \
0       SRC  800     15  24  200   0   319.44146   366.180853   -0.497128   
1        AC  800     30  24  200   1   319.44146   366.180853   -0.497128   
2        SQ  400     15   3    0   1   319.44146   366.180853   -0.497128   
3       SRC  600     15  20  200   0   319.44146   366.180853   -0.497128   
4        AC  800     30  20  100   0   319.44146   366.180853   -0.497128   
...     ...  ...    ...  ..  ...  ..         ...          ...         ...   
7123     AC  400     30  24    0   0   -90.34188   -83.341093    0.841481   
7124     SQ  400     15   3    0   1   -90.34188   -83.341093    0.841481   
7125    SRC  600     30  16  100   1   -90.34188   -83.341093    0.841481   
7126     AC  800     30  24  100   1   -90.34188   -83.341093    0.841481   
7127     SQ  400     15   3    0   1   -90.34188   -83.341093    0.841481   

      temp_4_10_c   dd_1_3_c  dd_4_10_c  r20_1_3_c  r20_4_10_c   hd_1_3_c  

In [6]:
# Arms
ads = ['SRC', 'AC', 'SQ']

In [23]:
from sklearn.preprocessing import StandardScaler
# Scale the training and test data
scaler = StandardScaler()
train = scaler.fit_transform(df[['DB',  'DBVar',  'ND',   'AU',  'No', 'rain_1_3_c',  'rain_4_10_c',  'temp_1_3_c', 'temp_4_10_c',   'dd_1_3_c',  'dd_4_10_c',  'r20_1_3_c',  'r20_4_10_c',   'hd_1_3_c', 'hd_4_10_c']])

In [24]:
from mabwiser.mab import MAB, LearningPolicy, NeighborhoodPolicy

##################################################
# Linear Upper Confidence Bound Learning Policy
##################################################

# LinUCB learning policy with alpha 1.25 and l2_lambda 1
linucb = MAB(arms=ads,
                             learning_policy=LearningPolicy.LinUCB(alpha=1.25, l2_lambda=1))

# Learn from previous ads shown and revenues generated
linucb.fit(decisions=df['action'], rewards=df['linpred'], contexts=train)

prediction = linucb.predict(train)


In [28]:
prediction.count('AC')

617