# Experiments on the Reproducibility of Results with LinTS
## Imports and Custom Class

In [1]:
# Run this cell for every experiment
import pandas as pd
import numpy as np
import platform
import pickle
from sklearn.preprocessing import StandardScaler

from mabwiser.mab import MAB
from mabwiser.linear import _RidgeRegression, _Linear

class LinTSExample(_RidgeRegression):
    def predict(self, x):
        beta_sampled = rng.multivariate_normal(x, self.A_inv)
        return np.dot(x, beta_sampled)
    
class LinearExample(_Linear):
    factory = {"ts": LinTSExample}

    def __init__(self, rng, arms, n_jobs=1, backend=None, l2_lambda=1, alpha=1, regression='ts', arm_to_scaler = None):
        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, regression)
       
        self.l2_lambda = l2_lambda
        self.alpha = alpha
        self.regression = regression

        # Create ridge regression model for each arm
        self.num_features = None

        if arm_to_scaler is None:
            arm_to_scaler = dict((arm, None) for arm in arms)

        self.arm_to_model = dict((arm, LinearExample.factory.get(regression)(rng, l2_lambda,
                                                                       alpha, arm_to_scaler[arm])) for arm in arms)
 

## Preprocessing
For these experiments we are using the MovieLens 100k data set, available for download at [https://grouplens.org/datasets/movielens/](https://grouplens.org/datasets/movielens/)

In [2]:
data = pd.read_csv('ml-100k/u.data', sep="\t", header=None)

In [3]:
data.head()

Unnamed: 0,0,1,2,3
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
data.columns = ['user id', 'item id', 'rating', 'timestamp']

In [5]:
users = pd.read_csv('ml-100k/u.user', sep="|", header=None)
users.columns = ['user id', 'age', 'gender', 'occupation', 'zip code']

In [6]:
users.head()

Unnamed: 0,user id,age,gender,occupation,zip code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [7]:
users['gender'].value_counts()

M    670
F    273
Name: gender, dtype: int64

In [8]:
users['gender'] = users.apply(lambda x: 0 if x['gender']=='M' else 1, axis=1)

In [9]:
users['occupation'].value_counts()

student          196
other            105
educator          95
administrator     79
engineer          67
programmer        66
librarian         51
writer            45
executive         32
scientist         31
artist            28
technician        27
marketing         26
entertainment     18
healthcare        16
retired           14
salesman          12
lawyer            12
none               9
homemaker          7
doctor             7
Name: occupation, dtype: int64

In [10]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

oh_en = OneHotEncoder()
occupations = oh_en.fit_transform(users['occupation'].values.reshape(-1, 1))

In [11]:
occupations[0]

<1x21 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [12]:
occupations

<943x21 sparse matrix of type '<class 'numpy.float64'>'
	with 943 stored elements in Compressed Sparse Row format>

In [13]:
occupations = pd.DataFrame.sparse.from_spmatrix(occupations)

In [14]:
occupations.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
occupations.columns = oh_en.categories_

In [16]:
occupations.head()

Unnamed: 0,administrator,artist,doctor,educator,engineer,entertainment,executive,healthcare,homemaker,lawyer,...,marketing,none,other,programmer,retired,salesman,scientist,student,technician,writer
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
users = users.merge(occupations, how='left', left_index=True, right_index=True)

In [18]:
users.drop('occupation', axis=1, inplace=True)

In [19]:
ord_en = OrdinalEncoder()
users['zip code'] = ord_en.fit_transform(users['zip code'].values.reshape(-1, 1))

In [20]:
users.head()

Unnamed: 0,user id,age,gender,zip code,"(administrator,)","(artist,)","(doctor,)","(educator,)","(engineer,)","(entertainment,)",...,"(marketing,)","(none,)","(other,)","(programmer,)","(retired,)","(salesman,)","(scientist,)","(student,)","(technician,)","(writer,)"
0,1,24,0,622.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2,53,1,689.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,23,0,270.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,4,24,0,331.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,33,1,133.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
cols = [c[0] if isinstance(c, tuple) else c for c in users.columns]

In [22]:
cols

['user id',
 'age',
 'gender',
 'zip code',
 'administrator',
 'artist',
 'doctor',
 'educator',
 'engineer',
 'entertainment',
 'executive',
 'healthcare',
 'homemaker',
 'lawyer',
 'librarian',
 'marketing',
 'none',
 'other',
 'programmer',
 'retired',
 'salesman',
 'scientist',
 'student',
 'technician',
 'writer']

In [23]:
users.columns = cols

In [24]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(users, random_state=43, test_size=0.3)

In [25]:
train.head()

Unnamed: 0,user id,age,gender,zip code,administrator,artist,doctor,educator,engineer,entertainment,...,marketing,none,other,programmer,retired,salesman,scientist,student,technician,writer
747,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
210,211,66,0,275.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
394,395,43,0,333.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
236,237,49,0,502.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
769,770,28,0,125.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [26]:
train['set'] = 'train'
test['set'] = 'test'

users = pd.concat([train, test])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['set'] = 'train'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['set'] = 'test'


In [27]:
users.to_csv('users.csv', index=False)

In [28]:
data.head()

Unnamed: 0,user id,item id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [29]:
data['rated'] = 1

user_ids = users['user id'].to_list()
item_ids = list(data['item id'].unique())

unrated = list()
for u in user_ids:
    for i in item_ids:
        unrated.append([u, i, 0])

unrated = pd.DataFrame(unrated, columns = ['user id', 'item id', 'rated'])

In [30]:
responses = pd.concat([data[['user id', 'item id', 'rated']], unrated])
responses = responses.groupby(['user id', 'item id'])['rated'].max().reset_index()

In [31]:
responses.head()

Unnamed: 0,user id,item id,rated
0,1,1,1
1,1,2,1
2,1,3,1
3,1,4,1
4,1,5,1


In [32]:
responses['rated'].value_counts()

0    1486126
1     100000
Name: rated, dtype: int64

In [33]:
responses.to_csv('responses.csv', index=False)

## Mac OS

In [34]:
platform.platform()

'macOS-10.15.7-x86_64-i386-64bit'

In [35]:
np.__version__

'1.18.5'

In [36]:
train = train.merge(responses, how='left', on='user id')

In [37]:
train.shape

(1110120, 28)

In [38]:
train.head()

Unnamed: 0,user id,age,gender,zip code,administrator,artist,doctor,educator,engineer,entertainment,...,programmer,retired,salesman,scientist,student,technician,writer,set,item id,rated
0,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,1,1
1,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,2,0
2,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,3,0
3,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,4,1
4,748,28,0,710.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,train,5,0


In [39]:
context_features = [c for c in users.columns if c not in ['user id', 'set']]

In [40]:
rng = np.random.RandomState(seed=11)
mab = LinearExample(rng=rng, arms=item_ids, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)

In [41]:
mab.regression

'ts'

In [42]:
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fdab5739430>

In [43]:
decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

In [44]:
scaler = StandardScaler()
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [45]:
mab.fit(decisions, rewards, contexts)

In [46]:
mac_expectations = mab.predict_expectations(test_contexts)

In [47]:
mac_expectations[0][1]

13.975185451078701

In [48]:
mac_expectations[0][2]

13.972781787550622

In [49]:
pickle.dump(scaler, open('scaler.pkl', 'wb'))
pickle.dump(mab, open('mac_mab.pkl', 'wb'))
pickle.dump(mac_expectations, open('mac_expectations.pkl', 'wb'))

## Sagemaker

In [2]:
platform.platform()

'Linux-4.14.225-121.357.amzn1.x86_64-x86_64-with-glibc2.9'

In [3]:
np.__version__

'1.19.5'

In [4]:
users = pd.read_csv('users.csv')
responses = pd.read_csv('responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

In [6]:
decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

In [7]:
rng = np.random.RandomState(seed=11)
item_ids = list(responses['item id'].unique())
mab = LinearExample(rng=rng, arms=item_ids, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7f538b4170f0>

In [8]:
scaler = pickle.load(open('scaler.pkl', 'rb'))
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [9]:
mab.fit(decisions, rewards, contexts)

In [10]:
sgm_expectations = mab.predict_expectations(test_contexts)

In [11]:
sgm_expectations[0][1]

13.999455973138849

In [12]:
sgm_expectations[0][2]

13.619175409413986

In [13]:
pickle.dump(mab, open('sgm_mab.pkl', 'wb'))
pickle.dump(sgm_expectations, open('sgm_expectations.pkl', 'wb'))

## Red Hat Enterprise Linux 7

In [2]:
platform.platform()

'Linux-3.10.0-1160.11.1.el7.x86_64-x86_64-with-glibc2.10'

In [3]:
np.__version__

'1.19.3'

In [4]:
users = pd.read_csv('users.csv')
responses = pd.read_csv('responses.csv')

In [5]:
train = users[users['set']=='train']
test = users[users['set']=='test']

train = train.merge(responses, how='left', on='user id')
context_features = [c for c in users.columns if c not in ['user id', 'set']]

In [6]:
decisions = MAB._convert_array(train['item id'])
rewards = MAB._convert_array(train['rated'])
contexts = MAB._convert_matrix(train[context_features]).astype('float')

test_contexts = MAB._convert_matrix(test[context_features]).astype('float')

In [7]:
rng = np.random.RandomState(seed=11)
item_ids = list(responses['item id'].unique())
mab = LinearExample(rng=rng, arms=item_ids, l2_lambda=1, alpha=1, regression='ts', n_jobs=1, backend=None)
mab.arm_to_model[1]

<__main__.LinTSExample at 0x7fd11c76ca30>

In [8]:
scaler = pickle.load(open('scaler.pkl', 'rb'))
contexts = scaler.fit_transform(contexts)
test_contexts = scaler.transform(test_contexts)

In [9]:
mab.fit(decisions, rewards, contexts)

In [10]:
lin_expectations = mab.predict_expectations(test_contexts)

In [11]:
lin_expectations[0][1]

13.999455973138641

In [12]:
lin_expectations[0][2]

13.619175409413836

In [13]:
pickle.dump(mab, open('lin_mab.pkl', 'wb'))
pickle.dump(lin_expectations, open('lin_expectations.pkl', 'wb'))

## Windows
For Windows run, see LinTS Reproducibility Windows.

# Analysis

In [2]:
mac_expectations = pickle.load(open('mac_expectations.pkl', 'rb'))
sgm_expectations = pickle.load(open('sgm_expectations.pkl', 'rb'))
lin_expectations = pickle.load(open('lin_expectations.pkl', 'rb'))
win_expectations = pickle.load(open('win_expectations.pkl', 'rb'))


In [3]:
users = pd.read_csv('users.csv')
responses = pd.read_csv('responses.csv')
test = users[users['set']=='test']


In [4]:
test.head()

Unnamed: 0,user id,age,gender,zip code,administrator,artist,doctor,educator,engineer,entertainment,...,none,other,programmer,retired,salesman,scientist,student,technician,writer,set
660,383,42,0,538.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,test
661,684,28,0,439.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,test
662,693,43,1,615.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,test
663,417,27,1,361.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,test
664,358,40,0,95.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,test


In [5]:
test.reset_index(inplace=True, drop=True)

In [6]:
test_responses = test.merge(responses, how='left', on='user id')

In [7]:
test_responses = test_responses[['user id', 'item id', 'rated']]
test_responses.shape

(476006, 3)

In [8]:
test_responses.head()

Unnamed: 0,user id,item id,rated
0,383,1,0
1,383,2,0
2,383,3,0
3,383,4,0
4,383,5,0


In [9]:
mac_expectations_df = pd.DataFrame(mac_expectations)
mac_expectations_df.head()

Unnamed: 0,242,302,377,51,346,474,265,465,451,86,...,1594,1626,1645,1659,1682,1674,1640,1637,1630,1641
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.998745,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.556219,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.325775,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.753791,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.251817,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578


In [10]:
mac_expectations_df.shape

(283, 1682)

In [11]:
test.shape

(283, 26)

In [12]:
mac_expectations_df['user id'] = test['user id']

In [13]:
mac_expectations_df.head()

Unnamed: 0,242,302,377,51,346,474,265,465,451,86,...,1626,1645,1659,1682,1674,1640,1637,1630,1641,user id
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409,383
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379,684
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944,693
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746,417
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578,358


In [14]:
sgm_expectations_df = pd.DataFrame(sgm_expectations)
sgm_expectations_df['user id'] = test['user id']

lin_expectations_df = pd.DataFrame(lin_expectations)
lin_expectations_df['user id'] = test['user id']

win_expectations_df = pd.DataFrame(win_expectations)
win_expectations_df['user id'] = test['user id']

In [15]:
sgm_expectations_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1674,1675,1676,1677,1678,1679,1680,1681,1682,user id
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409,383
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379,684
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944,693
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746,417
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578,358


In [16]:
lin_expectations_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1674,1675,1676,1677,1678,1679,1680,1681,1682,user id
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409,383
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379,684
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944,693
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746,417
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578,358


In [17]:
win_expectations_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1674,1675,1676,1677,1678,1679,1680,1681,1682,user id
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409,383
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379,684
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944,693
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746,417
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578,358


In [18]:
mac_expectations_df = pd.melt(mac_expectations_df, id_vars=['user id'], var_name = 'item id', value_name='score')
mac_expectations_df.head()

Unnamed: 0,user id,item id,score
0,383,242,13.999456
1,684,242,5.515976
2,693,242,70.787536
3,417,242,11.734666
4,358,242,11.322969


In [19]:
mac_expectations_df.columns = ['user id', 'item id', 'Mac score']

In [20]:
mac_expectations_df.head()

Unnamed: 0,user id,item id,Mac score
0,383,242,13.999456
1,684,242,5.515976
2,693,242,70.787536
3,417,242,11.734666
4,358,242,11.322969


In [21]:
sgm_expectations_df = pd.melt(sgm_expectations_df, id_vars=['user id'], var_name = 'item id', value_name='SageMaker score')
lin_expectations_df = pd.melt(lin_expectations_df, id_vars=['user id'], var_name = 'item id', value_name='Linux score')
win_expectations_df = pd.melt(win_expectations_df, id_vars=['user id'], var_name = 'item id', value_name='Windows score')


In [22]:
sgm_expectations_df.head()

Unnamed: 0,user id,item id,SageMaker score
0,383,1,13.999456
1,684,1,5.515976
2,693,1,70.787536
3,417,1,11.734666
4,358,1,11.322969


In [23]:
lin_expectations_df.head()

Unnamed: 0,user id,item id,Linux score
0,383,1,13.999456
1,684,1,5.515976
2,693,1,70.787536
3,417,1,11.734666
4,358,1,11.322969


In [24]:
all_expectations = mac_expectations_df.merge(sgm_expectations_df, how='left', on=['user id', 'item id'])

In [25]:
all_expectations.head()

Unnamed: 0,user id,item id,Mac score,SageMaker score
0,383,242,13.999456,13.843137
1,684,242,5.515976,5.597402
2,693,242,70.787536,69.812079
3,417,242,11.734666,11.744721
4,358,242,11.322969,11.124028


In [26]:
all_expectations = all_expectations.merge(lin_expectations_df, how='left', on=['user id', 'item id'])
all_expectations = all_expectations.merge(win_expectations_df, how='left', on=['user id', 'item id'])

In [27]:
all_expectations.head()

Unnamed: 0,user id,item id,Mac score,SageMaker score,Linux score,Windows score
0,383,242,13.999456,13.843137,13.843137,13.843137
1,684,242,5.515976,5.597402,5.597402,5.597402
2,693,242,70.787536,69.812079,69.812079,69.812079
3,417,242,11.734666,11.744721,11.744721,11.744721
4,358,242,11.322969,11.124028,11.124028,11.124028


In [28]:
all_expectations['mean'] = all_expectations.apply(lambda x: np.mean([x['Mac score'], 
                                                                     x['SageMaker score'], 
                                                                     x['Linux score'],
                                                                     x['Windows score']]), axis=1)

all_expectations['std'] = all_expectations.apply(lambda x: np.std([x['Mac score'], 
                                                                     x['SageMaker score'], 
                                                                     x['Linux score'],
                                                                     x['Windows score']]), axis=1)

In [29]:
all_expectations.head()

Unnamed: 0,user id,item id,Mac score,SageMaker score,Linux score,Windows score,mean,std
0,383,242,13.999456,13.843137,13.843137,13.843137,13.882217,0.067688
1,684,242,5.515976,5.597402,5.597402,5.597402,5.577045,0.035259
2,693,242,70.787536,69.812079,69.812079,69.812079,70.055943,0.422385
3,417,242,11.734666,11.744721,11.744721,11.744721,11.742207,0.004354
4,358,242,11.322969,11.124028,11.124028,11.124028,11.173763,0.086144


In [30]:
all_expectations['std'].describe()

count    4.760060e+05
mean     9.001244e-02
std      9.508550e-02
min      4.165926e-15
25%      2.934038e-02
50%      6.392610e-02
75%      1.178328e-01
max      1.931255e+00
Name: std, dtype: float64

In [31]:
all_expectations['ML'] = all_expectations['Mac score'] == all_expectations['Linux score']
all_expectations['SL'] = all_expectations['SageMaker score'] == all_expectations['Linux score']
all_expectations['WL'] = all_expectations['Windows score'] == all_expectations['Linux score']

In [32]:
all_expectations.head()

Unnamed: 0,user id,item id,Mac score,SageMaker score,Linux score,Windows score,mean,std,ML,SL,WL
0,383,242,13.999456,13.843137,13.843137,13.843137,13.882217,0.067688,False,False,False
1,684,242,5.515976,5.597402,5.597402,5.597402,5.577045,0.035259,False,False,False
2,693,242,70.787536,69.812079,69.812079,69.812079,70.055943,0.422385,False,False,False
3,417,242,11.734666,11.744721,11.744721,11.744721,11.742207,0.004354,False,False,False
4,358,242,11.322969,11.124028,11.124028,11.124028,11.173763,0.086144,False,False,False


In [33]:
all_expectations['ML'].sum()

9

In [34]:
all_expectations['SL'].sum()

5645

In [35]:
all_expectations['WL'].sum()

2571

In [37]:
all_expectations.iloc[0]['Mac score']

13.999455973138751

In [38]:
all_expectations['MLclose'] = np.isclose(all_expectations['Mac score'], all_expectations['Linux score'], atol=1e-10)
all_expectations['SLclose'] = np.isclose(all_expectations['SageMaker score'], all_expectations['Linux score'], atol=1e-10)
all_expectations['WLclose'] = np.isclose(all_expectations['Windows score'], all_expectations['Linux score'], atol=1e-10)

In [40]:
all_expectations.head()

Unnamed: 0,user id,item id,Mac score,SageMaker score,Linux score,Windows score,mean,std,ML,SL,WL,MLclose,SLclose,WLclose
0,383,242,13.999456,13.843137,13.843137,13.843137,13.882217,0.067688,False,False,False,False,True,True
1,684,242,5.515976,5.597402,5.597402,5.597402,5.577045,0.035259,False,False,False,False,True,True
2,693,242,70.787536,69.812079,69.812079,69.812079,70.055943,0.422385,False,False,False,False,True,True
3,417,242,11.734666,11.744721,11.744721,11.744721,11.742207,0.004354,False,False,False,False,True,True
4,358,242,11.322969,11.124028,11.124028,11.124028,11.173763,0.086144,False,False,False,False,True,True


In [41]:
all_expectations['MLclose'].sum()

905

In [42]:
all_expectations['SLclose'].sum()

476006

In [43]:
all_expectations['WLclose'].sum()

476006

In [44]:
all_expectations.shape

(476006, 14)

In [45]:
all_expectations['MLclose'].sum()/all_expectations.shape[0]

0.0019012365390352222

All of the Windows and Linux environment values are within a rounding error of each other, but the Mac only has the same values as the other operating systems 0.2% of the time

In [46]:
all_expectations['MLdiff'] = abs(all_expectations['Mac score'] - all_expectations['Linux score'])

In [47]:
all_expectations['MLdiff'].describe()

count    476006.000000
mean          0.207875
std           0.219591
min           0.000000
25%           0.067759
50%           0.147631
75%           0.272123
max           4.460042
Name: MLdiff, dtype: float64

In [48]:
all_expectations['diff_magnitude'] = all_expectations['MLdiff'] / all_expectations['Linux score']

In [49]:
all_expectations['diff_magnitude'].describe()

count    476006.000000
mean          0.010422
std           0.009033
min           0.000000
25%           0.003577
50%           0.008025
75%           0.014820
max           0.102782
Name: diff_magnitude, dtype: float64

## Impact on Model Performance

In [50]:
mac_expectations = pickle.load(open('mac_expectations.pkl', 'rb'))
sgm_expectations = pickle.load(open('sgm_expectations.pkl', 'rb'))
lin_expectations = pickle.load(open('lin_expectations.pkl', 'rb'))
win_expectations = pickle.load(open('win_expectations.pkl', 'rb'))

mac_expectations_df = pd.DataFrame(mac_expectations)
sgm_expectations_df = pd.DataFrame(sgm_expectations)
lin_expectations_df = pd.DataFrame(lin_expectations)
win_expectations_df = pd.DataFrame(win_expectations)

In [51]:
win_expectations_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
0,13.999456,13.619175,14.040468,14.011834,14.001781,13.839774,13.913537,13.784381,14.143585,13.702502,...,13.998745,13.817869,14.085177,13.810363,14.011573,13.889405,13.943352,14.107219,14.07176,13.959409
1,5.515976,5.420092,5.43156,5.375886,5.51584,5.515619,5.445676,5.405522,5.430701,5.362415,...,5.556219,5.496448,5.446256,5.503418,5.42909,5.276094,5.518622,5.489035,5.51764,5.368379
2,70.787536,69.833831,70.017768,69.849716,70.029382,70.104242,69.677341,69.76045,69.830625,69.988987,...,70.325775,70.065303,70.266413,69.741081,69.981905,69.846734,69.537533,70.406474,70.002406,70.551944
3,11.734666,11.90929,11.946744,11.500852,11.610487,11.582492,11.534506,11.564748,11.763328,11.705403,...,11.753791,11.833207,11.617623,11.618735,11.823277,11.713923,11.529636,11.570257,11.644823,11.964746
4,11.322969,11.026973,11.140362,11.109445,11.20126,11.116516,11.247489,11.280999,11.13701,10.885901,...,11.251817,11.170174,11.002162,11.026821,11.2039,11.088557,11.173237,11.121288,10.83554,11.103578


In [52]:
# ensure column order is consistent
cols = list(win_expectations_df.columns)

In [53]:
mac_expectations_df = mac_expectations_df[cols]
sgm_expectations_df = sgm_expectations_df[cols]
lin_expectations_df = lin_expectations_df[cols]

In [64]:
def top_k(df, k=10):
    expectations = df.values
    # sort top k in descending order
    rec_arm_idx_list = np.flip(np.argsort(expectations)[:, -k:], axis=1)

    recommendations = [[]] * df.shape[0]
    for idx in range(df.shape[0]):
        arms = [cols[arm_idx] for arm_idx in rec_arm_idx_list[idx]]
        exp = [expectations[idx, arm_idx] for arm_idx in rec_arm_idx_list[idx]]
        recommendations[idx] =([arms, exp])
    return recommendations

In [65]:
top_k(mac_expectations_df.head(5))

[[[815, 998, 718, 1543, 190, 1188, 1285, 1379, 23, 460],
  [14.475755940945593,
   14.457983450570131,
   14.419136868477059,
   14.415681852634087,
   14.3657102950288,
   14.347865132059916,
   14.344206417880132,
   14.328140955200782,
   14.318805997198707,
   14.309569753046905]],
 [[1585, 388, 72, 1309, 761, 303, 650, 1673, 483, 1010],
  [5.708847378362735,
   5.705749275200288,
   5.698142579339742,
   5.688928413971866,
   5.685484240401584,
   5.684586177082181,
   5.683055558969014,
   5.679664225244708,
   5.674119263727232,
   5.666190091273977]],
 [[218, 1310, 406, 778, 1178, 180, 1585, 313, 1317, 809],
  [71.0967129105133,
   70.97564367895512,
   70.9281947441411,
   70.87882475241753,
   70.87470762308278,
   70.85820981150485,
   70.83606275305546,
   70.82307050617673,
   70.8152445467606,
   70.81468372655647]],
 [[105, 589, 1536, 918, 1603, 809, 950, 1303, 1110, 363],
  [12.165743831489737,
   12.088163001099556,
   12.0812095583853,
   12.074071535611154,
   12.068

In [66]:
mac_top = top_k(mac_expectations_df)
sgm_top = top_k(sgm_expectations_df)
lin_top = top_k(lin_expectations_df)
win_top = top_k(win_expectations_df)

In [70]:
mac_top[0][0]

[815, 998, 718, 1543, 190, 1188, 1285, 1379, 23, 460]

In [72]:
df = pd.DataFrame({'user id': test['user id'], 'item id': [a[0] for a in mac_top], 'score': [a[1] for a in mac_top]})


In [73]:
df.head()

Unnamed: 0,user id,item id,score
0,383,"[815, 998, 718, 1543, 190, 1188, 1285, 1379, 2...","[14.475755940945593, 14.457983450570131, 14.41..."
1,684,"[1585, 388, 72, 1309, 761, 303, 650, 1673, 483...","[5.708847378362735, 5.705749275200288, 5.69814..."
2,693,"[218, 1310, 406, 778, 1178, 180, 1585, 313, 13...","[71.0967129105133, 70.97564367895512, 70.92819..."
3,417,"[105, 589, 1536, 918, 1603, 809, 950, 1303, 11...","[12.165743831489737, 12.088163001099556, 12.08..."
4,358,"[651, 117, 1158, 734, 1268, 679, 1054, 855, 84...","[11.50192293539069, 11.501722844333615, 11.488..."


In [74]:
df1 = df.explode('item id').reset_index(drop=True)
df1['score'] = df.explode('score')['score'].values

In [75]:
df1.head(10)

Unnamed: 0,user id,item id,score
0,383,815,14.4758
1,383,998,14.458
2,383,718,14.4191
3,383,1543,14.4157
4,383,190,14.3657
5,383,1188,14.3479
6,383,1285,14.3442
7,383,1379,14.3281
8,383,23,14.3188
9,383,460,14.3096


In [76]:
sgm_df = pd.DataFrame({'user id': test['user id'], 'item id': [a[0] for a in sgm_top], 'score': [a[1] for a in sgm_top]})
sgm_l_df = sgm_df.explode('item id').reset_index(drop=True)
sgm_l_df['score'] = sgm_df.explode('score')['score'].values

lin_df = pd.DataFrame({'user id': test['user id'], 'item id': [a[0] for a in lin_top], 'score': [a[1] for a in lin_top]})
lin_l_df = lin_df.explode('item id').reset_index(drop=True)
lin_l_df['score'] = lin_df.explode('score')['score'].values

win_df = pd.DataFrame({'user id': test['user id'], 'item id': [a[0] for a in win_top], 'score': [a[1] for a in win_top]})
win_l_df = win_df.explode('item id').reset_index(drop=True)
win_l_df['score'] = win_df.explode('score')['score'].values