# Label Preference
This is a dataset from

    @article{muller2020analysis,
      title={Analysis of active school transportation in hilly urban environments: A case study of Dresden},
      author={M{\"u}ller, Sven and Mejia-Dorantes, Lucia and Kersten, Elisa},
      journal={Journal of transport geography},
      volume={88},
      pages={102872},
      year={2020},
      publisher={Elsevier}
    }


In [17]:
%load_ext autoreload
%autoreload 2
import sys,os
sys.path.append('../')   
import numpy as np
import pandas as pd
from model.erroneousPreference import erroneousPreference
from kernel import BlockRBF
from utility import  paramz
from sklearn.model_selection import train_test_split
# for plotting
import matplotlib.pyplot as plt
import arviz as az


#label preference dataset
df = pd.read_csv("datasets/DDModeChoice.txt", sep="\t", index_col=0)
df.drop_duplicates(inplace=True)#.reset_index(inplace=True)
df = df[(df.Season==0)&(df.CarAvail==0)&(df.Choice!=4) & (df.Grade>9)]#subselect data for homogeneity
df.loc[:,'Choice']=df.loc[:,'Choice']-1
#df.Choice:0:walk, 1:bike, 2:public transport

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
CovCol=['Distance', 'School_location', 'Grade', 'Age', 'Gender',
         'CB_location','Leistung']
Labels=[0,1,2]#possible labels
latent_dim= len(Labels)

df_train,df_test = train_test_split(df, test_size=0.3)

# train 
X_tr = df_train[CovCol].values
n = X_tr.shape[0]

pairs = []
for ii in range(df_train.shape[0]):
    choice = df_train.iloc[ii,0]
    diff = np.setdiff1d(Labels,choice)
    for d in diff:
        pairs.append([ii+n*choice,ii+n*d])
pairs_tr = np.vstack(pairs)


In [19]:
from sklearn.preprocessing import StandardScaler
scalerx = StandardScaler().fit(X_tr)
X_tr_n = scalerx.transform(X_tr)
Xaugm_tr = np.tile(X_tr_n,(latent_dim,1))
Xaugm_tr.shape

(3171, 7)

In [20]:
#fitted hyperparameters
lengthscales=np.array([
    [1.22218717, 1.87808606, 4.        , 2.93232677, 0.27056331,
         0.17910861, 4.        ],
    [0.1       , 1.50303818, 4.        , 0.13099755, 0.18989127,
         0.90070552, 0.62961128],
    [2.60749252, 0.22110211, 4.        , 4.        , 0.16076371,
         2.08030129, 4.        ]
])
variances=np.array([[5.50174791,0.1,0.89]])

In [26]:
# data dictionary
data = {}
data["Pairs"] = pairs_tr
data["X"] = Xaugm_tr

# define kernel and hyperparams
Kernel = BlockRBF

# kernel parameter dictionary

params={}

for i in range(latent_dim):
    params['lengthscale_'+str(i)]={'value':lengthscales[i,:], 
                                'range':np.vstack([[0.1, 4.0]]*Xaugm_tr.shape[1]),
                                'transform': paramz.logexp()}
    params['variance_'+str(i)]={'value':variances[0,[i]], 
                                'range':np.vstack([[0.1, 200.0]]),
                                'transform': paramz.logexp()}



# define preference model 
model = erroneousPreference(data,Kernel,params,inf_method="laplace")
# compute hyperparameters
#model.optimize_hyperparams(num_restarts=1,niterations=5) 
print(model.params)
# sample from posterior
model.sample(nsamples=5000, tune=500)

{'lengthscale_0': {'value': array([1.22218717, 1.87808606, 4.        , 2.93232677, 0.27056331,
       0.17910861, 4.        ]), 'range': array([[0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ]]), 'transform': <utility.paramz.logexp object at 0x7f4b7dcf9d30>}, 'variance_0': {'value': array([5.50174791]), 'range': array([[1.e-01, 2.e+02]]), 'transform': <utility.paramz.logexp object at 0x7f4b7dcf9970>}, 'lengthscale_1': {'value': array([0.1       , 1.50303818, 4.        , 0.13099755, 0.18989127,
       0.90070552, 0.62961128]), 'range': array([[0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ],
       [0.1, 4. ]]), 'transform': <utility.paramz.logexp object at 0x7f4b7dcf9ca0>}, 'variance_1': {'value': array([0.1]), 'range': array([[1.e-01, 2.e+02]]), 'transform': <utility.paramz.logexp object at 0x7f4b7dcf9b20>}, 'lengthscale_2': {'value': array([2.60749252, 

100%|██████████| 5500/5500 [11:39<00:00,  7.86it/s]


In [10]:
{'lengthscale_0': {'value': array([1.22218717, 1.87808606, 4.        , 2.93232677, 0.27056331,
         0.17910861, 4.        ]),
  'range': array([[0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8790>},
 'variance_0': {'value': array([5.50174791]),
  'range': array([[1.e-01, 2.e+02]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e85b0>},
 'lengthscale_1': {'value': array([0.1       , 1.50303818, 4.        , 0.13099755, 0.18989127,
         0.90070552, 0.62961128]),
  'range': array([[0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8460>},
 'variance_1': {'value': array([0.1]),
  'range': array([[1.e-01, 2.e+02]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8610>},
 'lengthscale_2': {'value': array([2.60749252, 0.22110211, 4.        , 4.        , 0.16076371,
         2.08030129, 4.        ]),
  'range': array([[0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8670>},
 'variance_2': {'value': array([0.89495831]),
  'range': array([[1.e-01, 2.e+02]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e86d0>}}

{'lengthscale_0': {'value': array([1.22218717, 1.87808606, 4.        , 2.93232677, 0.27056331,
         0.17910861, 4.        ]),
  'range': array([[0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8790>},
 'variance_0': {'value': array([5.50174791]),
  'range': array([[1.e-01, 2.e+02]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e85b0>},
 'lengthscale_1': {'value': array([0.1       , 1.50303818, 4.        , 0.13099755, 0.18989127,
         0.90070552, 0.62961128]),
  'range': array([[0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ],
         [0.1, 4. ]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8460>},
 'variance_1': {'value': array([0.1]),
  'range': array([[1.e-01, 2.e+02]]),
  'transform': <utility.paramz.logexp at 0x7f4b7e7e8610>},
 'lengthscale_2': {'value':

In [22]:
# predicted samples
X_te = df_test[CovCol].values
X_te_n = scalerx.transform(X_te)
predictions = model.predict(np.tile(X_te_n,(latent_dim,1)))

In [23]:
meanUtil = np.mean(predictions,axis=1).reshape(latent_dim,X_te.shape[0]).T
predChoice = np.argmax(meanUtil,axis=1)
predChoice

array([2, 0, 2, 0, 0, 2, 2, 2, 2, 1, 2, 1, 2, 2, 0, 1, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 1, 1, 0, 0, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0,
       0, 2, 0, 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 0, 1, 2, 1, 0,
       2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 0, 0, 2, 1, 1, 2, 1, 2, 2, 2, 1,
       1, 2, 1, 2, 0, 1, 0, 0, 2, 2, 2, 2, 1, 2, 0, 1, 0, 2, 2, 2, 1, 2,
       2, 0, 0, 2, 0, 2, 1, 2, 2, 1, 1, 2, 1, 2, 0, 2, 0, 2, 2, 0, 2, 1,
       2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 0, 2, 2, 2, 2, 0, 2, 1, 2, 1, 2, 2,
       2, 2, 0, 2, 0, 0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1,
       1, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 2, 1, 2, 0, 2, 0, 2, 2, 1, 1, 0,
       2, 1, 2, 0, 1, 2, 1, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 1,
       0, 2, 2, 0, 2, 2, 0, 2, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 0, 2, 2, 2,
       2, 2, 2, 1, 0, 2, 0, 2, 0, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 0, 1, 2,
       2, 1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 2, 2, 0, 2, 0, 2, 1, 1, 2, 2,
       2, 0, 0, 0, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 0,

In [13]:
meanUtil[0:5,:]

array([[-0.78096769, -0.04209084,  0.37133358],
       [-0.47098297, -0.00262421,  0.77250189],
       [-0.49195134,  0.16335981, -0.03827292],
       [-0.33281191,  0.2104188 ,  0.07194174],
       [-0.91107575,  0.14774056,  0.3813131 ]])

In [14]:
df_test.Choice.values

array([2, 2, 2, 1, 2, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 1, 2, 1, 2, 0, 0, 2, 2, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1,
       2, 2, 2, 2, 1, 2, 0, 1, 2, 1, 2, 1, 0, 2, 2, 2, 2, 1, 0, 0, 1, 1,
       2, 2, 1, 0, 1, 2, 0, 1, 2, 1, 0, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2,
       0, 1, 2, 2, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1,
       1, 0, 2, 2, 0, 0, 0, 2, 2, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 0, 0, 2,
       2, 1, 1, 0, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 0, 0, 2, 0,
       2, 2, 1, 0, 2, 2, 1, 0, 2, 1, 1, 2, 2, 0, 2, 1, 0, 2, 1, 1, 2, 1,
       2, 1, 2, 0, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 0, 1, 1, 2, 0, 2,
       2, 1, 1, 2, 0, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 0, 2, 1, 0, 0, 1,
       0, 1, 2, 2, 0, 1, 2, 1, 2, 0, 2, 1, 1, 2, 0, 1, 0, 1, 2, 2, 0, 1,
       0, 0, 2, 1, 0, 1, 2, 0, 1, 2, 2, 0, 2, 2, 0, 0, 1, 1, 2, 2, 0, 1,
       2, 1, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0, 0, 0, 2, 0, 2, 2, 0, 1, 0, 1,
       2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 0, 2,

In [24]:
accuracy = np.mean(df_test.Choice.values==predChoice)
accuracy

0.7444933920704846

# One level Plackett-Luce

In [20]:
u3

array([ 6.73795614e-33,  3.06422275e-59,  1.67739701e-04, ...,
        2.27898815e-02,  4.22643721e-10, -5.01154800e-07])

In [24]:
u2-u1

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
        7.04620731e-10, -1.01097046e-06,  2.18395446e-07])