# Application of the dyad ranking model BilinPL on the GA-TSP dataset

## Preliminaries: Download GA-TSP dataset

In [1]:
!git clone https://github.com/disc5/ga-tsp-dataset.git

Cloning into 'ga-tsp-dataset'...
remote: Enumerating objects: 276, done.[K
remote: Counting objects: 100% (276/276), done.[K
remote: Compressing objects: 100% (271/271), done.[K
remote: Total 276 (delta 5), reused 269 (delta 3), pack-reused 0[K
Receiving objects: 100% (276/276), 360.98 KiB | 250.00 KiB/s, done.
Resolving deltas: 100% (5/5), done.


## 1 Load GA-TSP Data

In [2]:
import pandas as pd
import numpy as np
from dyadranking import utils

In [3]:
df_instances = pd.read_csv('ga-tsp-dataset/data_meta/instances.csv', sep=',')
df_algorithms = pd.read_csv('ga-tsp-dataset/data_meta/algorithms.csv', sep=',')
df_rankings = pd.read_csv('ga-tsp-dataset/data_meta/rankings.csv', sep=',')


In [4]:
inst_ndarr = df_instances.values
label_feat_ndarr = df_algorithms.values
rankings_ndarr = df_rankings.values

## 2 Split data along the instances (i.e. label ranking alike schema): 90% training, 10 % test

In [5]:
num_inst = inst_ndarr.shape[0]

In [6]:
np.random.seed(42)
rp = np.random.permutation(range(num_inst))

In [7]:
num_tr_inst = np.floor(num_inst/100*90).astype(int)

In [8]:
print("Number of tr instances: {}, te instances: {}, total: {}".format(num_tr_inst, num_inst-num_tr_inst, num_inst))

Number of tr instances: 221, te instances: 25, total: 246


In [9]:
tr_indices = rp[0:num_tr_inst]
te_indices = rp[num_tr_inst:]

In [10]:
tr_inst = inst_ndarr[tr_indices,:]
tr_rankings = rankings_ndarr[tr_indices,:]

In [11]:
te_inst = inst_ndarr[te_indices,:]
te_rankings = rankings_ndarr[te_indices,:]

## 3 Preprocess data

Standardize the data, i.e. transform it using the following formula
$$
    z_i = \frac{x_i - \bar{x}}{\sigma} \enspace.
$$

In [12]:
tr_means = np.mean(tr_inst, 0)

In [13]:
tr_stds = np.std(tr_inst,0)

In [14]:
tr_inst_z = (tr_inst - tr_means) / tr_stds

Standardize label features

In [15]:
label_means = np.mean(label_feat_ndarr[:,0:2],0)

In [16]:
label_stds = np.std(label_feat_ndarr[:,0:2],0)

In [17]:
label_feat_z = (label_feat_ndarr[:,0:2] - label_means) / label_stds
label_feat_z = np.append(label_feat_z, label_feat_ndarr[:,2:], axis = 1)

Data preparation for BilinPL model

In [18]:
N = tr_inst.shape[0]
M = tr_rankings.shape[1]

In [19]:
tr_inst_bilinpl = np.append(tr_inst_z, np.ones((N,1)), axis = 1)

In [20]:
tr_jf_unordered_tensor = utils.get_kronecker_feature_map_tensor(tr_inst_bilinpl, label_feat_z)

In [21]:
tr_jf_unordered_tensor.shape

(221, 72, 25)

In [22]:
tr_orderings = utils.convert_rankingmat_to_orderingmat(tr_rankings)
tr_orderings = tr_orderings - 1

In [23]:
tr_jf_ordered_tensor = np.zeros(tr_jf_unordered_tensor.shape, dtype = tr_jf_unordered_tensor.dtype)

In [24]:
for i in range(tr_rankings.shape[0]):
    tr_jf_ordered_tensor[i,:] = tr_jf_unordered_tensor[i,tr_orderings[i],:]

## 4 Train model

In [25]:
from dyadranking.models import jfpl

In [26]:
model = jfpl.JointFeaturePLModel()

In [27]:
model.train(tr_jf_ordered_tensor)

In [28]:
print(model)

JointFeaturePLModel with parameters: [  0.41951989   5.11314112  -1.00542703   5.6986259   -4.69591214
   1.24328633   3.99284829  -4.25946087   3.83850267   0.42122587
  -3.26779588  -5.80884928   4.46730851 -12.236432     7.76618312
   1.8464012   -1.71123935   2.34662265   2.57509315  -4.92604211
   0.48110146   1.33532984   2.16666454   1.27295127  -3.44336674]


## 5 Evaluate Model

In [29]:
te_inst_z = (te_inst - tr_means) / tr_stds
N_test = te_inst_z.shape[0]

In [30]:
te_inst_z_bilinpl = np.append(te_inst_z, np.ones((N_test,1)), axis = 1)

In [31]:
te_jf_unordered_tensor = utils.get_kronecker_feature_map_tensor(te_inst_z_bilinpl, label_feat_z)

In [32]:
pred_orderings = np.zeros(te_jf_unordered_tensor.shape[0:2], dtype=int)

In [33]:
for i in range(N_test):
    pred_orderings[i] = model.predict(te_jf_unordered_tensor[i]) + 1

In [34]:
pred_rankings = utils.convert_orderingmat_to_rankingmat(pred_orderings)

In [35]:
utils.kendallstau_on_rankingmat(te_rankings, pred_rankings)

(0.7950860719874805,
 array([0.60954617, 0.8458529 , 0.83098592, 0.86071987, 0.72456964,
        0.64632238, 0.83020344, 0.87010955, 0.66979656, 0.9029734 ,
        0.76995305, 0.86384977, 0.65258216, 0.88654147, 0.86071987,
        0.86463224, 0.53442879, 0.78482003, 0.76134585, 0.87167449,
        0.80985915, 0.89045383, 0.76682316, 0.89671362, 0.87167449]))