In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import pickle
import platform
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        covar = np.dot(self.alpha**2, self.A_inv)
        beta_sampled = rng.multivariate_normal(self.beta, covar)        
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)


# Data Pre-Processing

In [None]:
data = pd.read_csv('ml-100k/u.data', sep="\t", header=None)
data.columns = ['user id', 'item id', 'rating', 'timestamp']

users = pd.read_csv('ml-100k/u.user', sep="|", header=None)
users.columns = ['user id', 'age', 'gender', 'occupation', 'zip code']

In [None]:
data.head()

In [None]:
users.head()

In [None]:
users['gender'].value_counts()

In [None]:
users['gender'] = users.apply(lambda x: 0 if x['gender']=='M' else 1, axis=1)

In [None]:
users['occupation'].value_counts()

In [None]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

ord_en = OrdinalEncoder()
users['zip code'] = ord_en.fit_transform(users['zip code'].values.reshape(-1, 1))

In [None]:
oh_en = OneHotEncoder()
occupations = oh_en.fit_transform(users['occupation'].values.reshape(-1, 1))
occupations = pd.DataFrame.sparse.from_spmatrix(occupations)

In [None]:
oh_en.categories_

In [None]:
occupations.columns = list(oh_en.categories_[0])

In [None]:
occupations['uncategorized'] = occupations.apply(lambda x: x['none'] + x['other'], axis=1)

In [None]:
users = users.merge(occupations, how='left', left_index=True, right_index=True)
users.drop('occupation', axis=1, inplace=True)

users.head()

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(users, random_state=43, test_size=0.3)

train['set'] = 'train'
test['set'] = 'test'

users = pd.concat([train, test])
users.to_csv('movielens_users.csv', index=False)

In [None]:
data['rated'] = 1

user_ids = users['user id'].to_list()
item_ids = list(data['item id'].unique())

unrated = list()
for u in user_ids:
    for i in item_ids:
        unrated.append([u, i, 0])

unrated = pd.DataFrame(unrated, columns = ['user id', 'item id', 'rated'])

responses = pd.concat([data[['user id', 'item id', 'rated']], unrated])
responses = responses.groupby(['user id', 'item id'])['rated'].max().reset_index()

responses.head()

In [None]:
responses['rated'].value_counts()

In [None]:
responses.to_csv('movielens_responses.csv', index=False)

In [None]:
train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = StandardScaler()
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [None]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=item_ids, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)

mab.fit(decisions, rewards, contexts)


u, s, vh = np.linalg.svd(mab.arm_to_model[1].A_inv)
print(s)


In [None]:
pickle.dump(scaler, open('movielens_scaler.pkl', 'wb'))

# Mac OS

In [2]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.5


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7f873c7f87f0>

In [10]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.3367554577366122

In [11]:
mab.arm_to_model[1].beta

array([-0.08095699, -0.00854225,  0.03406988, -0.02977242, -0.02564018,
        0.0249423 ,  0.00339479,  0.05286781,  0.01410376,  0.01511638,
        0.01176439, -0.02297439,  0.010684  , -0.01826324, -0.02972517,
        0.0154348 , -0.00859744,  0.0035536 , -0.01022135,  0.00420363,
       -0.01174913,  0.00831367,  0.01745217, -0.02003198, -0.00278235])

In [12]:
pickle.dump(mab, open('mac_ml_mab.pkl', 'wb'))
pickle.dump(expectations, open('mac_ml_expectations.pkl', 'wb'))

# Windows

In [2]:
platform.platform()

'Windows-10-10.0.18362-SP0'

In [3]:
print(np.__version__)

1.19.4


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [6]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x237853b9358>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.2090335388485438

In [8]:
mab.arm_to_model[1].beta

array([-0.08095699, -0.00854225,  0.03406988, -0.02977242, -0.02564018,
        0.0249423 ,  0.00339479,  0.05286781,  0.01410376,  0.01511638,
        0.01176439, -0.02297439,  0.010684  , -0.01826324, -0.02972517,
        0.0154348 , -0.00859744,  0.0035536 , -0.01022135,  0.00420363,
       -0.01174913,  0.00831367,  0.01745217, -0.02003198, -0.00278235])

In [9]:
pickle.dump(mab, open('win_ml_mab.pkl', 'wb'))
pickle.dump(expectations, open('win_ml_expectations.pkl', 'wb'))

# SageMaker

In [2]:
platform.platform()

'Linux-4.14.225-121.362.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
print(np.__version__)

1.19.5


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [6]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fe343811748>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.1432304537644518

In [8]:
mab.arm_to_model[1].beta

array([-0.08095699, -0.00854225,  0.03406988, -0.02977242, -0.02564018,
        0.0249423 ,  0.00339479,  0.05286781,  0.01410376,  0.01511638,
        0.01176439, -0.02297439,  0.010684  , -0.01826324, -0.02972517,
        0.0154348 , -0.00859744,  0.0035536 , -0.01022135,  0.00420363,
       -0.01174913,  0.00831367,  0.01745217, -0.02003198, -0.00278235])

In [9]:
pickle.dump(mab, open('sgm_ml_mab.pkl', 'wb'))
pickle.dump(expectations, open('sgm_ml_expectations.pkl', 'wb'))

# Red Hat

In [3]:
platform.platform()

'Linux-3.10.0-1160.15.2.el7.x86_64-x86_64-with-glibc2.10'

In [4]:
print(np.__version__)

1.19.2


In [5]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [6]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [7]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7f41d283dac0>

In [8]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.1432304537642795

In [9]:
mab.arm_to_model[1].beta

array([-0.08095699, -0.00854225,  0.03406988, -0.02977242, -0.02564018,
        0.0249423 ,  0.00339479,  0.05286781,  0.01410376,  0.01511638,
        0.01176439, -0.02297439,  0.010684  , -0.01826324, -0.02972517,
        0.0154348 , -0.00859744,  0.0035536 , -0.01022135,  0.00420363,
       -0.01174913,  0.00831367,  0.01745217, -0.02003198, -0.00278235])

In [10]:
pickle.dump(mab, open('rh_ml_mab.pkl', 'wb'))
pickle.dump(expectations, open('rh_ml_expectations.pkl', 'wb'))

# Mac OS 2

In [2]:
platform.platform()

'Darwin-19.5.0-x86_64-i386-64bit'

In [3]:
print(np.__version__)

1.18.1


In [4]:
users = pd.read_csv('movielens_users.csv')
responses = pd.read_csv('movielens_responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

scaler = pickle.load(open('movielens_scaler.pkl', 'rb'))

contexts = scaler.transform(contexts)
test_contexts = scaler.transform(test_contexts)



In [6]:
rng = np.random.RandomState(seed=11)
arms = list(responses['item id'].unique())

mab = LinearExample(rng=rng, arms=arms, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fac77eb1210>

In [7]:
mab.fit(decisions, rewards, contexts)
expectations = mab.predict_expectations(test_contexts)

expectations[0][1]

-0.14323045376436855

In [8]:
mab.arm_to_model[1].beta

array([-0.08095699, -0.00854225,  0.03406988, -0.02977242, -0.02564018,
        0.0249423 ,  0.00339479,  0.05286781,  0.01410376,  0.01511638,
        0.01176439, -0.02297439,  0.010684  , -0.01826324, -0.02972517,
        0.0154348 , -0.00859744,  0.0035536 , -0.01022135,  0.00420363,
       -0.01174913,  0.00831367,  0.01745217, -0.02003198, -0.00278235])

In [9]:
pickle.dump(mab, open('dar_ml_mab.pkl', 'wb'))
pickle.dump(expectations, open('dar_ml_expectations.pkl', 'wb'))