In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        covar = np.dot(self.alpha**2, self.A_inv)
        beta_sampled = rng.multivariate_normal(self.beta, covar)        
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)

arms=[1, 2, 3]    
context_history = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                           [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                           [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                           [0, 2, 1, 0, 0]])


# decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3]
# rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1]
decisions=np.array([1, 1, 1, 2, 2, 3, 3, 3, 3, 3])
rewards=np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 1])


new_contexts = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]])

# Mac OS

In [28]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [29]:
np.__version__

'1.18.5'

In [30]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.fit(decisions, rewards, context_history)

print(mab.predict_expectations(new_contexts))

[{1: -0.04482350855651185, 2: 0.8333060877337737, 3: 0.29807186705074135}, {1: -0.7293017823743778, 2: -0.45329259123180277, 3: 0.5506491290763366}]


In [31]:
for _ in range(5):
    print(mab.predict(new_contexts))

[1, 3]
[1, 2]
[3, 3]
[3, 2]
[3, 3]


In [32]:
for arm in arms:
    _, s, _ = np.linalg.svd(mab.arm_to_model[arm].A_inv)
    print(s)

[1.         1.         0.57734082 0.34455356 0.02305974]
[1.         1.         1.         0.14477694 0.01996294]
[1.         0.48149479 0.24964331 0.13360284 0.00922232]


# Windows

In [2]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [3]:
np.__version__

'1.19.4'

In [4]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.fit(decisions, rewards, context_history)

print(mab.predict_expectations(new_contexts))

[{1: -0.04482350855651229, 2: 0.8333060877337741, 3: 0.29807186705074135}, {1: -0.7293017823743778, 2: -0.45329259123180277, 3: 0.5506491290763366}]


In [5]:
for _ in range(5):
    print(mab.predict(new_contexts))

[1, 3]
[1, 2]
[3, 3]
[3, 2]
[3, 3]


# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
np.__version__

'1.19.5'

In [4]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.fit(decisions, rewards, context_history)

print(mab.predict_expectations(new_contexts))

[{1: -0.04482350855651185, 2: 0.8333060877337737, 3: 0.29807186705074135}, {1: -0.7293017823743778, 2: -0.45329259123180277, 3: 0.5506491290763366}]


In [5]:
for _ in range(5):
    print(mab.predict(new_contexts))

[1, 3]
[1, 2]
[3, 3]
[3, 2]
[3, 3]


# Red Hat

In [2]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
np.__version__

'1.19.2'

In [4]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.fit(decisions, rewards, context_history)

print(mab.predict_expectations(new_contexts))

[{1: -0.04482350855651185, 2: 0.8333060877337737, 3: 0.29807186705074135}, {1: -0.7293017823743778, 2: -0.45329259123180277, 3: 0.5506491290763366}]


In [5]:
for _ in range(5):
    print(mab.predict(new_contexts))

[1, 3]
[1, 2]
[3, 3]
[3, 2]
[3, 3]
