In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB, LearningPolicy

# Mac OS

In [2]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.5


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
decisions[:10]

array([2, 1, 1, 3, 1, 3, 2, 0, 3, 2])

In [6]:
mab = MAB(arms=[0, 1, 2, 3], learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7fe12afe3e80>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

1.178443781792346

In [8]:
pickle.dump(mab, open('mac_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('mac_ch_expectations.pkl', 'wb'))

# Windows

In [2]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [3]:
print(np.__version__)

1.19.4


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
mab = MAB(arms=[0, 1, 2, 3], learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x1a22c07cd30>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

1.1784437817923858

In [7]:
pickle.dump(mab, open('win_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('win_ch_expectations.pkl', 'wb'))

# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
print(np.__version__)

1.19.5


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
mab = MAB(arms=[0, 1, 2, 3], learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f2c3c7da0b8>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

1.1784437817923858

In [8]:
pickle.dump(mab, open('sgm_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('sgm_ch_expectations.pkl', 'wb'))

# Red Hat

In [2]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
print(np.__version__)

1.19.2


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
mab = MAB(arms=[0, 1, 2, 3], learning_policy=LearningPolicy.LinTS(l2_lambda=1, alpha=1), n_jobs=1, backend=None, seed=11)
mab._imp.arm_to_model[1]

<mabwiser.linear._LinTS at 0x7f85b3b1af40>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

1.178443781792346

In [7]:
pickle.dump(mab, open('rh_ch_mab.pkl', 'wb'))
pickle.dump(expectations, open('rh_ch_expectations.pkl', 'wb'))