In [1]:
import pickle
from sklearn.model_selection import RepeatedKFold
from LabelRanking import *
import numpy as np

In [2]:
dir = 'REAL'

In [3]:
import pandas as pd

datasets = [
  'authorship',
  'bodyfat',
  'calhousing',
  'cold',
  'cpu-small',
  'diau',
  'dtt',
  'elevators',
  'fried',
  'glass',
  'heat',
  'housing',
  'iris',
  'pendigits',
  'segment',
  'spo',
  'stock',
  'vehicle',
  'vowel',
  'wine',
  'wisconsin'
]

In [None]:
results = {}

for dataset_name in datasets:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = LabelwiseLabelRanking('Linear', dict(n_jobs=-1)).fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

with open(f'results/{dir}/labelwise_lr.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

In [None]:
with open(f'results/REAL/labelwise_lr.pickle', 'rb') as handle: k = pickle.load(handle)

v = np.round(list(labelwise_lr.values()), 3)
dict(zip(k.keys(), v))

In [None]:
results = {}

for dataset_name in datasets:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = LabelwiseLabelRanking('Decision Tree', dict(criterion='squared_error', max_features=None)).fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

with open(f'results/{dir}/labelwise_dt.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

In [None]:
with open(f'results/REAL/labelwise_dt.pickle', 'rb') as handle: k = pickle.load(handle)

v = np.round(list(k.values()), 3)
dict(zip(k.keys(), v))

In [None]:
results = {}

for dataset_name in datasets:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = LabelwiseLabelRanking('Random Forest', dict(n_jobs=-1)).fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

with open(f'results/{dir}/labelwise_rf.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

In [None]:
with open(f'results/REAL/labelwise_rf.pickle', 'rb') as handle: k = pickle.load(handle)

v = np.round(list(k.values()), 3)
dict(zip(k.keys(), v))

In [None]:
results = {}

for dataset_name in datasets:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = PairwiseLabelRanking('Decision Tree', dict(criterion='gini', max_features=None), aggregation='tournament').fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

with open(f'results/{dir}/pairwise_dt.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

In [None]:
with open(f'results/REAL/pairwise_dt.pickle', 'rb') as handle: k = pickle.load(handle)

v = np.round(list(k.values()), 3)
dict(zip(k.keys(), v))

In [None]:
results = {}

for dataset_name in datasets:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=5, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = PairwiseLabelRanking('Random Forest', dict(n_jobs=-1), aggregation='tournament').fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

with open(f'results/{dir}/pairwise_rf.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

In [None]:
with open(f'results/REAL/pairwise_rf.pickle', 'rb') as handle: k = pickle.load(handle)

v = np.round(list(k.values()), 3)
dict(zip(k.keys(), v))

In [4]:
results = {}

for dataset_name in ['cold']:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=1, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = PairwiseLabelRanking('Homogeneous Halfspace', dict(beta=0.005, sigma=0.1), aggregation='tournament')
    clf.fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

# with open(f'results/{dir}/pairwise_rf.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

{'cold': [0.05696158783450187, 0.03525338353116379]}

In [20]:
results = {}

hh_params = {
  'iris': dict(beta=0.01, sigma=0.1),
  'wine': dict(beta=0.012, sigma=0.2),
  'wisconsin': dict(beta=0.01, sigma=0.1),
  'vowel': dict(beta=0.012, sigma=0.1),
  'cold': dict(beta=0.012, sigma=0.1)
}

for dataset_name in ['calhousing']:
  dataset = pd.read_csv(f'LR_DATASETS/{dataset_name}.txt')

  X = dataset.drop(columns=dataset.loc[:, 'ranking':].columns).to_numpy()
  P = dataset.drop(columns=dataset.loc[:, :'ranking'].columns).to_numpy().astype('int')

  rkf = RepeatedKFold(n_splits=10, n_repeats=1, random_state=42)
  
  KTcorr = []

  for train, test in rkf.split(X):
    X_train, X_test, P_train, P_test = X[train], X[test], P[train], P[test]
    clf = PairwiseLabelRanking('Homogeneous Halfspace', dict(beta=0.01, sigma=0.1), aggregation='tournament')
    # clf = LabelwiseLabelRanking('Linear', dict(n_jobs=-1))
    clf.fit(X_train, P_train)
    preds = clf.predict(X_test)
    res = mean_KTcorrelation(P_test, preds)
    KTcorr.append(res)

  results[dataset_name] = [np.mean(KTcorr), np.std(KTcorr)]

# with open(f'results/{dir}/pairwise_rf.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

results

{'calhousing': [0.16920219638242892, 0.010727952075855056]}