In [2]:
import pandas as pd
import numpy as np
import os
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB, LearningPolicy
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        covar = np.dot(self.alpha**2, self.A_inv)
        beta_sampled = rng.multivariate_normal(self.beta, covar)        
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)


# SageMaker

In [18]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [19]:
print(np.__version__)

1.19.5


In [20]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [21]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [22]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=10, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7f72aeb0ec50>

In [23]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.03593678774680008

In [24]:
pickle.dump(mab, open(os.path.join('output', 'sgm_ml_mab2.pkl'), 'wb'))
pickle.dump(expectations, open(os.path.join('output', 'sgm_ml_expectations2.pkl'), 'wb'))

# Cholesky

In [9]:
arms = list(responses['item id'].unique())
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f164a00fda0>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589859

In [11]:
pickle.dump(mab, open(os.path.join('output', 'sgm_ml_ch_mab2.pkl'), 'wb'))
pickle.dump(expectations, open(os.path.join('output', 'sgm_ml_ch_expectations2.pkl'), 'wb'))

In [12]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])