In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        beta_sampled = rng.multivariate_normal(self.beta, self.A_inv)
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)


# Create Data Set

In [2]:
from sklearn.datasets import make_multilabel_classification

X, y = make_multilabel_classification(n_samples=1000, n_features=25, n_classes=20, n_labels=3, length=50, 
                                      allow_unlabeled=True, sparse=False, return_indicator='dense', 
                                      return_distributions=False, random_state=11)

In [3]:
y[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])

In [4]:
data = pd.DataFrame(X, columns=['feature_'+str(i) for i in range(25)])
responses = pd.DataFrame(y, columns=['item_'+str(i) for i in range(20)])

In [5]:
data = data.merge(responses, how='left', left_index=True, right_index=True)
data.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,item_10,item_11,item_12,item_13,item_14,item_15,item_16,item_17,item_18,item_19
0,0.0,1.0,1.0,2.0,3.0,3.0,2.0,1.0,4.0,6.0,...,0,0,0,0,0,1,0,0,0,0
1,3.0,2.0,1.0,4.0,1.0,2.0,3.0,1.0,3.0,2.0,...,0,0,0,0,0,1,0,0,0,1
2,2.0,4.0,3.0,4.0,3.0,5.0,3.0,2.0,1.0,3.0,...,1,0,0,0,0,1,0,0,0,0
3,0.0,3.0,4.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,...,0,0,1,0,0,1,0,1,0,0
4,3.0,1.0,1.0,1.0,3.0,3.0,0.0,1.0,1.0,1.0,...,1,0,0,0,0,0,0,0,0,0


In [6]:
data['user'] = [i for i in range(1000)]

In [7]:
data.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,item_11,item_12,item_13,item_14,item_15,item_16,item_17,item_18,item_19,user
0,0.0,1.0,1.0,2.0,3.0,3.0,2.0,1.0,4.0,6.0,...,0,0,0,0,1,0,0,0,0,0
1,3.0,2.0,1.0,4.0,1.0,2.0,3.0,1.0,3.0,2.0,...,0,0,0,0,1,0,0,0,1,1
2,2.0,4.0,3.0,4.0,3.0,5.0,3.0,2.0,1.0,3.0,...,0,0,0,0,1,0,0,0,0,2
3,0.0,3.0,4.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,...,0,1,0,0,1,0,1,0,0,3
4,3.0,1.0,1.0,1.0,3.0,3.0,0.0,1.0,1.0,1.0,...,0,0,0,0,0,0,0,0,0,4


In [8]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, random_state=43, test_size=0.3)

In [9]:
id_vars = ['user'] + ['feature_'+str(i) for i in range(25)]
train = train.melt(id_vars=id_vars, var_name='item_id', value_name='response')
train.head()

Unnamed: 0,user,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,item_id,response
0,651,3.0,2.0,3.0,2.0,2.0,2.0,5.0,0.0,2.0,...,2.0,1.0,0.0,6.0,2.0,4.0,2.0,1.0,item_0,0
1,789,3.0,0.0,2.0,3.0,5.0,2.0,1.0,1.0,1.0,...,0.0,5.0,5.0,3.0,2.0,2.0,0.0,1.0,item_0,0
2,609,1.0,5.0,4.0,3.0,3.0,4.0,0.0,1.0,4.0,...,2.0,2.0,2.0,3.0,3.0,4.0,3.0,0.0,item_0,0
3,506,2.0,8.0,5.0,2.0,3.0,1.0,1.0,1.0,2.0,...,0.0,3.0,2.0,4.0,1.0,3.0,2.0,1.0,item_0,0
4,847,4.0,0.0,2.0,7.0,2.0,2.0,1.0,1.0,4.0,...,1.0,3.0,0.0,0.0,5.0,3.0,1.0,0.0,item_0,0


In [10]:
test = test.melt(id_vars=id_vars, var_name='item_id', value_name='response')
test.head()

Unnamed: 0,user,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,item_id,response
0,858,3.0,1.0,2.0,2.0,3.0,3.0,2.0,3.0,2.0,...,3.0,4.0,1.0,1.0,2.0,2.0,1.0,2.0,item_0,0
1,986,0.0,1.0,2.0,0.0,1.0,1.0,2.0,0.0,1.0,...,3.0,2.0,2.0,2.0,5.0,2.0,1.0,2.0,item_0,1
2,183,0.0,2.0,0.0,2.0,0.0,2.0,1.0,2.0,6.0,...,5.0,0.0,3.0,4.0,7.0,1.0,1.0,0.0,item_0,0
3,502,0.0,0.0,1.0,0.0,2.0,1.0,2.0,1.0,1.0,...,0.0,2.0,2.0,1.0,2.0,1.0,1.0,2.0,item_0,1
4,710,0.0,3.0,0.0,0.0,0.0,3.0,3.0,3.0,1.0,...,4.0,2.0,1.0,4.0,0.0,1.0,1.0,2.0,item_0,0


In [11]:
# Check that there are train positives
train.groupby('item_id')['response'].sum()

item_id
item_0      40
item_1       8
item_10    191
item_11     27
item_12    209
item_13    211
item_14     43
item_15    135
item_16      5
item_17     26
item_18     84
item_19     47
item_2     131
item_3     177
item_4     111
item_5     129
item_6       7
item_7     124
item_8     210
item_9     200
Name: response, dtype: int32

In [12]:
test.groupby('item_id')['response'].sum()

item_id
item_0     28
item_1      2
item_10    78
item_11    16
item_12    95
item_13    80
item_14    24
item_15    67
item_16     2
item_17     8
item_18    32
item_19    20
item_2     54
item_3     69
item_4     45
item_5     47
item_6      1
item_7     46
item_8     91
item_9     80
Name: response, dtype: int32

In [13]:
context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

In [14]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
contexts = scaler.fit_transform(contexts)

In [15]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]
mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)

mab.fit(decisions, rewards, contexts)

for arm in mab.arms:
    u, s, vh = np.linalg.svd(mab.arm_to_model[arm].A_inv)
    print(s)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 

The data has duplicate singular values, and will thus be able to reproduce the non-deterministic behavior

In [16]:
context_features

['feature_0',
 'feature_1',
 'feature_2',
 'feature_3',
 'feature_4',
 'feature_5',
 'feature_6',
 'feature_7',
 'feature_8',
 'feature_9',
 'feature_10',
 'feature_11',
 'feature_12',
 'feature_13',
 'feature_14',
 'feature_15',
 'feature_16',
 'feature_17',
 'feature_18',
 'feature_19',
 'feature_20',
 'feature_21',
 'feature_22',
 'feature_23',
 'feature_24']

In [17]:
train['set'] = 'train'
test['set'] = 'test'

data = pd.concat([train, test])
data.to_csv('simulated_multi_label_data.csv', index=False)

# Mac OS

In [2]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.5


In [4]:
data = pd.read_csv('simulated_multi_label_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# Drop duplicate rows from test for prediction
test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')


In [5]:
scaler = StandardScaler()
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7feeff6c21f0>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3597765333917093

In [8]:
expectations = pd.DataFrame(expectations)
test_unique = test_unique.merge(expectations, how='left', left_index=True, right_index=True)
test_unique.drop(context_features+['item_id', 'response', 'set'], inplace=True, axis=1)

In [9]:
test_unique.head()

Unnamed: 0,user,0,1,2,3,4,5,6,7,8,...,10,11,12,13,14,15,16,17,18,19
0,858,-2.789692,-0.359777,-1.416557,-3.104305,-1.340856,-2.777399,6.720906,0.972022,3.975339,...,2.038669,3.96199,-3.062116,-1.769683,-2.729035,-5.401761,4.720122,-3.214521,-2.623517,-3.079849
1,986,-4.322288,4.521618,1.60323,-0.936059,-3.317534,-15.015161,9.279325,4.113445,-6.931853,...,-7.991521,-7.937471,0.128051,-1.994588,-0.13005,-8.582392,5.28761,-1.311621,-1.016588,-5.937226
2,183,6.342404,7.617242,-1.55702,1.274322,4.855252,9.782143,5.432605,2.093283,3.491232,...,-8.639939,-4.958396,-5.514446,-4.875965,-1.578464,11.329032,9.429463,-5.120949,-4.762511,-4.644297
3,502,-3.401476,-0.454245,2.725258,-10.855894,-6.124133,2.456331,-2.08913,-4.973794,-2.1119,...,-6.753823,6.027732,1.648415,-0.37699,2.34257,-1.50868,0.376729,-2.599851,-5.81741,-0.68172
4,710,3.111428,5.471607,11.548088,6.113824,-11.630026,12.377598,0.655006,-9.735331,-2.758627,...,4.101555,-1.473654,-4.822333,0.591486,-3.970587,1.830636,6.140734,6.510888,-0.732754,10.513313


In [10]:
pickle.dump(mab, open('mac_multi_mab.pkl', 'wb'))
test_unique.to_csv('mac_multi_expectations.csv', index=False)
pickle.dump(scaler, open('multi_scaler.pkl', 'wb'))

# Windows

In [18]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [19]:
print(np.__version__)

1.19.4


In [20]:
data = pd.read_csv('simulated_multi_label_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# Drop duplicate rows from test for prediction
test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')


In [21]:
scaler = pickle.load(open('multi_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [22]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x1c3fd66e208>

In [23]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3597765333917093

In [24]:
expectations = pd.DataFrame(expectations)
test_unique = test_unique.merge(expectations, how='left', left_index=True, right_index=True)
test_unique.drop(context_features+['item_id', 'response', 'set'], inplace=True, axis=1)

In [25]:
pickle.dump(mab, open('win_multi_mab.pkl', 'wb'))
test_unique.to_csv('win_multi_expectations.csv', index=False)


# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
print(np.__version__)

1.19.5


In [4]:
data = pd.read_csv('simulated_multi_label_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# Drop duplicate rows from test for prediction
test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')

In [5]:
scaler = pickle.load(open('multi_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7ff1546be160>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3597765333917093

In [8]:
expectations = pd.DataFrame(expectations)
test_unique = test_unique.merge(expectations, how='left', left_index=True, right_index=True)
test_unique.drop(context_features+['item_id', 'response', 'set'], inplace=True, axis=1)

In [9]:
pickle.dump(mab, open('sgm_multi_mab.pkl', 'wb'))
test_unique.to_csv('sgm_multi_expectations.csv', index=False)


# Red Hat

In [2]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
print(np.__version__)

1.19.2


In [4]:
data = pd.read_csv('simulated_multi_label_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# Drop duplicate rows from test for prediction
test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')

In [5]:
scaler = pickle.load(open('multi_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7f0158650d90>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3597765333917093

In [8]:
expectations = pd.DataFrame(expectations)
test_unique = test_unique.merge(expectations, how='left', left_index=True, right_index=True)
test_unique.drop(context_features+['item_id', 'response', 'set'], inplace=True, axis=1)

In [9]:
pickle.dump(mab, open('rh_multi_mab.pkl', 'wb'))
test_unique.to_csv('rh_multi_expectations.csv', index=False)


# Mac Darwin OS

In [2]:
platform.platform()

'Darwin-19.5.0-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.1


In [4]:
data = pd.read_csv('simulated_multi_label_data.csv')

train = data[data['set']=='train']
test = data[data['set']=='test']

context_features = ['feature_'+str(i) for i in range(25)]

decisions = MAB._convert_array(train['item_id'])
rewards = MAB._convert_array(train['response'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# Drop duplicate rows from test for prediction
test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')

In [5]:
scaler = pickle.load(open('multi_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [6]:
rng = np.random.RandomState(seed=11)
arms = [i for i in range(20)]

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fe7273ff9d0>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3597765333917091

In [8]:
expectations = pd.DataFrame(expectations)
test_unique = test_unique.merge(expectations, how='left', left_index=True, right_index=True)
test_unique.drop(context_features+['item_id', 'response', 'set'], inplace=True, axis=1)

In [9]:
pickle.dump(mab, open('dar_multi_mab.pkl', 'wb'))
test_unique.to_csv('dar_multi_expectations.csv', index=False)
