In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB, LearningPolicy

# Windows

In [2]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [3]:
print(np.__version__)

1.19.4


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [6]:
arms = list(responses['item id'].unique())

mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x28ab60709e8>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3714850355006511

In [8]:
pickle.dump(mab, open('win_ml_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('win_ml_ch_expectations.pkl', 'wb'))

In [9]:
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x28ab6020ba8>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589789

In [11]:
pickle.dump(mab, open('win_ml_ch_mab2.pkl', 'wb'))
pickle.dump(expectations, open('win_ml_ch_expectations2.pkl', 'wb'))

In [12]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])

# Mac OS

In [2]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.5


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [7]:
arms = list(responses['item id'].unique())
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7fb86b93a580>

In [8]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.37148503550093875

In [9]:
pickle.dump(mab, open('mac_ml_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('mac_ml_ch_expectations.pkl', 'wb'))

In [10]:
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7fb814576100>

In [11]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589859

In [12]:
pickle.dump(mab, open('mac_ml_ch_mab2.pkl', 'wb'))
pickle.dump(expectations, open('mac_ml_ch_expectations2.pkl', 'wb'))

In [14]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])

# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
print(np.__version__)

1.19.5


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
arms = list(responses['item id'].unique())
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f164b2f5cc0>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.37148503550094386

In [8]:
pickle.dump(mab, open('sgm_ml_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('sgm_ml_ch_expectations.pkl', 'wb'))

In [9]:
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f164a00fda0>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589859

In [11]:
pickle.dump(mab, open('sgm_ml_ch_mab2.pkl', 'wb'))
pickle.dump(expectations, open('sgm_ml_ch_expectations2.pkl', 'wb'))

In [12]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])

# Red Hat

In [2]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
print(np.__version__)

1.19.2


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
arms = list(responses['item id'].unique())
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f3718e43cd0>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.37148503550093875

In [8]:
pickle.dump(mab, open('rh_ml_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('rh_ml_ch_expectations.pkl', 'wb'))

In [9]:
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f3718e0df70>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589859

In [11]:
pickle.dump(mab, open('rh_ml_ch_mab2.pkl', 'wb'))
pickle.dump(expectations, open('rh_ml_ch_expectations2.pkl', 'wb'))

In [12]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])

# Mac OS 2

In [2]:
platform.platform()

'Darwin-19.5.0-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.1


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [6]:
arms = list(responses['item id'].unique())
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7fd4eba3eb90>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.37148503550127704

In [8]:
pickle.dump(mab, open('dar_ml_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('dar_ml_ch_expectations.pkl', 'wb'))

In [9]:
mab = MAB(arms=arms, learning_policy=LearningPolicy.LinTS(l2_lambda=10, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7fd493730dd0>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2992644622589947

In [11]:
pickle.dump(mab, open('dar_ml_ch_mab2.pkl', 'wb'))
pickle.dump(expectations, open('dar_ml_ch_expectations2.pkl', 'wb'))

In [12]:
mab._imp.arm_to_model[1].beta

array([-0.07936336, -0.00871014,  0.03361507, -0.02956334, -0.02518599,
        0.02451638,  0.00301226,  0.05208961,  0.01395249,  0.01485094,
        0.01148366, -0.02269606,  0.01052057, -0.01819606, -0.02941985,
        0.01537429, -0.00853869,  0.00354568, -0.01047156,  0.00412024,
       -0.01159282,  0.00889091,  0.01726125, -0.01977439, -0.00274761])