In [2]:
import numpy as np
import sklearn

print(np.__version__)
print(sklearn.__version__)

1.18.5
0.24.1


In [122]:
%%writefile example_lints.py

# Run this cell for every experiment
from datetime import datetime
import json
import pandas as pd
import numpy as np
import os
import platform
import pickle
from sklearn.preprocessing import StandardScaler
import sys

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

from utils import contains_repeated_eigenvalues, all_positive_definite

random_option = sys.argv[1]

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        if self.scaler is not None:
            x = self._scale_predict_context(x) 
        if random_option == 'cholesky':
            beta_sampled = rng2.multivariate_normal(self.beta, self.A_inv, method='cholesky')
        else:
            beta_sampled = rng2.multivariate_normal(self.beta, self.A_inv)
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)


# Dataset 1
users = pd.read_csv('movielens/users.csv')
users['unknown'] = np.logical_or(users['other'], users['none'])
# users['joint'] = users['other'] + users['none']
responses = pd.read_csv('movielens/responses.csv')
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
# extra_user = train[train['user id'] == 748].copy()
# uid = train['user id'].max() + 1
# extra_user['user id'] = uid
# train = pd.concat([train, extra_user])
context_features = [c for c in users.columns if c not in ['user id', 'set']]
none_ind = context_features.index('none')
other_ind = context_features.index('other')

decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

# contexts = np.hstack([contexts, np.logical_or(contexts[:, none_ind], contexts[:, other_ind]).reshape(-1, 1)])

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')
# test_contexts = np.hstack([test_contexts, np.logical_or(test_contexts[:, none_ind], test_contexts[:, other_ind]).reshape(-1, 1)])

scaler = StandardScaler()
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)
item_ids = list(responses['item id'].unique())


# Dataset 2
# data = pd.read_csv('simulated_data.csv')

# train = data[data['set']=='train']
# test = data[data['set']=='test']

# context_features = [c for c in data.columns if c not in ['set', 'arm', 'reward']]

# decisions = MAB._convert_array(train['arm'])
# rewards = MAB._convert_array(train['reward'])
# contexts = MAB._convert_matrix(train[context_features]).astype('float')

# test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

# item_ids = np.array([0, 1, 2, 3])

# Dataset 3
# data = pd.read_csv('simulated_multi_label_data.csv')

# train = data[data['set']=='train']
# test = data[data['set']=='test']

# context_features = ['feature_'+str(i) for i in range(25)]

# decisions = MAB._convert_array(train['item_id'])
# rewards = MAB._convert_array(train['response'])
# contexts = MAB._convert_matrix(train[context_features]).astype('float')

# # Drop duplicate rows from test for prediction
# test_unique = test.drop_duplicates(subset=['user']+context_features).reset_index(drop=True)
# test_contexts = MAB._convert_matrix(test_unique[context_features]).astype('float')

# scaler = StandardScaler()
# contexts = scaler.fit_transform(contexts)
# test_contexts = scaler.transform(test_contexts)
# item_ids = np.array([i for i in range(20)])

if random_option == 'randomstate':
    rng = np.random.RandomState(seed=11)
    rng2 = rng
elif random_option == 'svd':
    rng = np.random.RandomState(seed=11)
    rng2 = np.random.default_rng(11)
elif random_option == 'cholesky':
    rng = np.random.RandomState(seed=11)
    rng2 = np.random.default_rng(11)

mab = LinearExample(rng=rng, arms=item_ids, l2_lambda=10, alpha=1, regression='ts', n_jobs=1, backend=None)


np.random.seed(42)
mab.fit(decisions, rewards, contexts)
print(contains_repeated_eigenvalues(mab))
print(all_positive_definite(mab))
exps = mab.predict_expectations(test_contexts)

recs = [max(user_exps, key=user_exps.get).item() for user_exps in exps]

# dt_str = datetime.now().strftime("%Y-%m-%d")
# env_str = os.path.basename(os.path.dirname(os.path.dirname(sys.executable)))
# pth = os.path.join('data', f"{dt_str}_{env_str}")
# os.makedirs(pth, exist_ok=True)
# with open(os.path.join(pth, 'mab.pkl'), 'wb') as fp:
#     pickle.dump(mab, fp)
# with open(os.path.join(pth, 'recs.json'), 'w') as fp:
#     json.dump(recs, fp)

print(recs)

Overwriting example_lints.py


In [123]:
import subprocess

python_exec = '~/Tools/miniconda3/envs/reprod/bin/python'
res = subprocess.run([python_exec, 'example_lints.py', 'randomstate'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis = eval(res)

Contains repeated eigenvalues: False
All covariances positive definite: True


In [124]:
python_exec2 = '~/Tools/miniconda3/envs/reprod2/bin/python'
res = subprocess.run([python_exec2, 'example_lints.py', 'randomstate'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis2 = eval(res)

Contains repeated eigenvalues: True
All covariances positive definite: True


In [125]:
recs_lis == recs_lis2

True

In [116]:
import subprocess

python_exec = '~/Tools/miniconda3/envs/reprod/bin/python'
res = subprocess.run([python_exec, 'example_lints.py', 'svd'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis = eval(res)

Contains repeated eigenvalues: False
All covariances positive definite: True


In [117]:
python_exec2 = '~/Tools/miniconda3/envs/reprod2/bin/python'
res = subprocess.run([python_exec2, 'example_lints.py', 'svd'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis2 = eval(res)

Contains repeated eigenvalues: True
All covariances positive definite: True


In [118]:
recs_lis == recs_lis2

False

In [119]:
import subprocess

python_exec = '~/Tools/miniconda3/envs/reprod/bin/python'
res = subprocess.run([python_exec, 'example_lints.py', 'cholesky'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis = eval(res)

Contains repeated eigenvalues: False
All covariances positive definite: True


In [120]:
python_exec2 = '~/Tools/miniconda3/envs/reprod2/bin/python'
res = subprocess.run([python_exec2, 'example_lints.py', 'cholesky'], capture_output=True).stdout.decode('utf-8').strip()
contains_repeated_eigenvalues, all_positive_definite, res = res.split('\n')
print(f"Contains repeated eigenvalues: {contains_repeated_eigenvalues}")
print(f"All covariances positive definite: {all_positive_definite}")
recs_lis2 = eval(res)

Contains repeated eigenvalues: True
All covariances positive definite: True


In [121]:
recs_lis == recs_lis2

True