# Contextual Bandit using Vowpal Wabbit

https://vowpalwabbit.org/docs/vowpal_wabbit/python/latest/tutorials/python_Contextual_bandits_and_Vowpal_Wabbit.html

In [1]:
import pandas as pd

In [2]:
train_data = [
    {
        "action": 1,
        "cost": 2,
        "probability": 0.4,
        "feature1": "a",
        "feature2": "c",
        "feature3": "",
    },
    {
        "action": 3,
        "cost": 0,
        "probability": 0.2,
        "feature1": "b",
        "feature2": "d",
        "feature3": "",
    },
    {
        "action": 4,
        "cost": 1,
        "probability": 0.5,
        "feature1": "a",
        "feature2": "b",
        "feature3": "",
    },
    {
        "action": 2,
        "cost": 1,
        "probability": 0.3,
        "feature1": "a",
        "feature2": "b",
        "feature3": "c",
    },
    {
        "action": 3,
        "cost": 1,
        "probability": 0.7,
        "feature1": "a",
        "feature2": "d",
        "feature3": "",
    },
]

train_df = pd.DataFrame(train_data)

# Add index to data frame
train_df["index"] = range(1, len(train_df) + 1)
train_df = train_df.set_index("index")
train_df

Unnamed: 0_level_0,action,cost,probability,feature1,feature2,feature3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,2,0.4,a,c,
2,3,0,0.2,b,d,
3,4,1,0.5,a,b,
4,2,1,0.3,a,b,c
5,3,1,0.7,a,d,


In [3]:
test_data = [
    {"feature1": "b", "feature2": "c", "feature3": ""},
    {"feature1": "a", "feature2": "", "feature3": "b"},
    {"feature1": "b", "feature2": "b", "feature3": ""},
    {"feature1": "a", "feature2": "", "feature3": "b"},
]

test_df = pd.DataFrame(test_data)

# Add index to data frame
test_df["index"] = range(1, len(test_df) + 1)
test_df = test_df.set_index("index")
test_df

Unnamed: 0_level_0,feature1,feature2,feature3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,b,c,
2,a,,b
3,b,b,
4,a,,b


In [4]:
import vowpalwabbit

vw = vowpalwabbit.Workspace("--cb 4", quiet=False)

using no cache
Reading datafile = none
num sources = 0
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, shared_feature_merger, cb_to_cbadf
Input label = CB
Output pred = MULTICLASS
average  since         example        example        current        current  current
loss     last          counter         weight          label        predict features


In [5]:
for i in train_df.index:
    action, cost, probability, feature1, feature2, feature3 = train_df.loc[i]

    # Construct the example in the required vw format.
    learn_example = f"{action}:{cost}:{probability} | {feature1} {feature2} {feature3}"

    # Here we do the actual learning.
    vw.learn(learn_example)

5.000000 5.000000            1            1.0        0:2:0.4            0:0       12
2.500000 0.000000            2            2.0        2:0:0.2            1:0       12
2.083333 1.666667            4            4.0        1:1:0.3            1:0       16


In [6]:
for j in test_df.index:
    feature1, feature2, feature3 = test_df.loc[j]
    test_example = f"| {feature1} {feature2} {feature3}"
    choice = vw.predict(test_example)
    print(j, choice)

1 3
2 3
3 3
4 3


1.952381 1.428571            8            8.0        unknown         2:0.13       12


In [7]:
vw.save("cb.model")
del vw

vw = vowpalwabbit.Workspace("--cb 4 -i cb.model", quiet=True)
print(vw.predict("| a b"))

3



finished run
number of examples = 9
weighted example sum = 9.000000
weighted label sum = 0.000000
average loss = 1.952381
total feature number = 112


In [8]:
print(vw.predict("| a"))

3


In [9]:
print(vw.predict("| b"))

3


In [10]:
print(vw.predict("| a b c"))

3


In [11]:
print(vw.predict("| a d"))

4


In [12]:
while True:
    n = input("enter a number between 1 to 4:") or "1"
    action, cost, probability = int(n), 1, 0.25
    if action < 0:
        break
    # Construct the example in the required vw format.
    learn_example = f"{action}:{cost}:{probability} | a b"

    # Here we do the actual learning.
    vw.learn(learn_example)

    print(vw.predict("| a b"))

enter a number between 1 to 4: -1
