In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        beta_sampled = rng.multivariate_normal(self.beta, self.A_inv)
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)


# Create Data Set

In [2]:
from sklearn.datasets import make_classification

dfs = []

for i in range(4):
    X, y = make_classification(n_samples=100, n_features=20, n_classes=2, n_informative=15, random_state=i)
    df = pd.DataFrame(X)
    df['arm'] = i
    df['reward'] = y
    dfs.append(df)

In [3]:
data = pd.concat(dfs)

In [4]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, random_state=43, test_size=0.3)

In [5]:
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,arm,reward
65,7.540774,-0.221831,-3.919337,5.258477,2.940734,1.002549,-0.466184,0.265691,2.335075,-0.760819,...,-1.365283,5.473917,0.980097,3.790203,-1.019954,1.58716,2.09628,-1.206127,2,0
70,2.061396,3.037662,-1.115289,-0.492968,-2.571289,-6.85665,-0.944877,2.612648,-2.18005,-3.281273,...,-3.568702,-0.866955,0.578315,-1.581841,1.164671,-1.383679,2.810146,-1.066569,1,1
30,3.717529,0.984694,1.234988,-0.063337,-3.725663,-1.704918,1.066633,0.147504,-1.397789,-4.930524,...,-3.388638,0.512434,1.690352,0.200777,0.349084,0.404068,0.264601,0.762271,1,1
14,-0.86083,-0.34111,-1.548911,-3.554408,-2.774293,1.516058,0.337577,-0.090652,-0.175691,-1.877193,...,-0.102403,-0.415136,-1.545302,1.982714,0.560556,0.021631,0.710947,-1.854943,3,1
63,0.070855,-0.733082,0.850703,-1.869372,-0.531692,-1.227601,-1.257254,1.685047,1.679314,-2.429365,...,-3.333774,0.92782,-2.260054,1.922119,0.981994,0.438562,-1.033361,-0.654288,1,1


In [6]:
context_features = [c for c in data.columns if c not in ['arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

In [7]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=[0, 1, 2, 3], l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)

mab.fit(decisions, rewards, contexts)

for arm in mab.arms:
    u, s, vh = np.linalg.svd(mab.arm_to_model[arm].A_inv)
    print(s)

[1.00000000e+00 1.00000000e+00 2.86302665e-02 2.49631876e-02
 1.67183560e-02 1.14625948e-02 7.48548520e-03 6.94226983e-03
 6.24752368e-03 4.86211058e-03 4.30483507e-03 4.09512136e-03
 2.64346291e-03 2.41356688e-03 2.17645881e-03 1.74739584e-03
 1.56565185e-03 1.16327123e-03 4.26029986e-04 2.91526816e-04]
[1.00000000e+00 1.00000000e+00 2.35621785e-02 1.80508449e-02
 1.53421918e-02 1.10594297e-02 7.25757949e-03 6.76718760e-03
 6.14979642e-03 4.10534885e-03 3.65685288e-03 3.29945335e-03
 2.98752262e-03 2.62949453e-03 2.24223003e-03 1.68395285e-03
 1.17455957e-03 9.53555929e-04 4.60938566e-04 2.78960532e-04]
[1.00000000e+00 1.00000000e+00 2.05213365e-02 1.59086895e-02
 1.26671284e-02 1.13250703e-02 9.33134542e-03 5.71547399e-03
 4.92269898e-03 3.46588619e-03 3.40709704e-03 2.72711582e-03
 2.42891150e-03 2.19617576e-03 2.02383136e-03 1.48586695e-03
 1.13183645e-03 9.18100640e-04 5.38788820e-04 1.42572960e-04]
[1.00000000e+00 1.00000000e+00 3.14178107e-02 2.23480639e-02
 1.63204058e-02 1.398

The data has duplicate singular values, and will thus be able to reproduce the non-deterministic behavior

In [8]:
context_features

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [9]:
train['set'] = 'train'
test['set'] = 'test'

data = pd.concat([train, test])
data.to_csv('simulated_data.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


# Mac OS 

In [2]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.5


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=[0, 1, 2, 3], l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7ffbeddea790>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-3.842999860229387

In [7]:
pickle.dump(mab, open('mac_mab.pkl', 'wb'))
pickle.dump(expectations, open('mac_expectations.pkl', 'wb'))

# Windows

In [10]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [11]:
print(np.__version__)

1.19.4


In [12]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [13]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=[0, 1, 2, 3], l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x1ef0acd92e8>

In [14]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-3.8432664691656653

In [15]:
pickle.dump(mab, open('win_mab.pkl', 'wb'))
pickle.dump(expectations, open('win_expectations.pkl', 'wb'))

# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
print(np.__version__)

1.19.5


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=[0, 1, 2, 3], l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fbe3444dcc0>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-3.842611094122434

In [7]:
pickle.dump(mab, open('sgm_mab.pkl', 'wb'))
pickle.dump(expectations, open('sgm_expectations.pkl', 'wb'))

# Red Hat

In [2]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
print(np.__version__)

1.19.2


In [4]:
data = pd.read_csv('simulated_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

decisions = MAB._convert_array(train['arm'])
rewards = MAB._convert_array(train['reward'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

print(context_features)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']


In [5]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=[0, 1, 2, 3], l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7ff224be3220>

In [6]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-3.842999860229387

In [7]:
pickle.dump(mab, open('rh_mab.pkl', 'wb'))
pickle.dump(expectations, open('rh_expectations.pkl', 'wb'))