In [1]:
cd SAC2018/

C:\Users\aless\SAC2018


In [1]:
# Data format:
#
# n0, n1, ...
# mu0, k00, k01, ..., k0n
# ...
# mun, kn0, kn1, ..., knn

import csv
import numpy as np

#data_file_name = 'data/axioms-toy.csv'
#data_file_name = 'data/data-tettamanzi-little.csv'
data_file_name = 'data/data-tettamanzi-complete.csv'

with open(data_file_name) as data_file:
    data = np.array(list(csv.reader(data_file)))

n = len(data) - 1

print ('%d data items' % n)

1444 data items


In [2]:
n = 1444

names = np.array(data[0])[1:n+1]
mu = np.array([float(row[0]) for row in data[1:n+1]])
gram = np.array([[float(k.replace('NA', '0')) for k in row[1:n+1]] for row in data[1:n+1]])

assert(len(names.shape) == 1)
assert(len(mu.shape) == 1)
assert(len(gram.shape) == 2)
assert(names.shape[0] == gram.shape[0] == gram.shape[1] == mu.shape[0])

In [3]:
eigvals = np.linalg.eigvals(gram)
assert(sum([abs(e.imag) for e in eigvals]) < 1e-4)
abs_neg_eigvals = [-l.real for l in eigvals if l < 0]
adjustment = max(abs_neg_eigvals) if abs_neg_eigvals else 0
if adjustment:
    print('non PSD matrix: diagonal adjusment of {0}'.format(adjustment))

non PSD matrix: diagonal adjusment of 292.40530143116314


In [5]:
from possibilearn import *
from possibilearn.kernel import PrecomputedKernel
from possibilearn.fuzzifiers import LinearFuzzifier

import csv

axiom_indices = range(n)

# Shuffling indices
# axiom_indices = np.random.permutation(zip(axiom_indices[::2], axiom_indices[1::2])).flatten()

assert(len(axiom_indices)==len(mu)==n)

paired_axioms = [axiom_indices[i:i+2] for i in range(0, n, 2)]
paired_labels = [mu[i:i+2] for i in range(0, n, 2)]

num_external_folds = 5
num_internal_folds = 3

folded_data = cross_validation(paired_axioms, paired_labels, num_external_folds)

k = PrecomputedKernel(gram)

def g(m):
    return np.random.choice(axiom_indices, m if m <= len(axiom_indices) else len(axiom_indices))

cs = np.arange(.001, .003, .0005)
cs = (0.01, .1, .3, .5, .7, 1., 3., 5., 10., 100.)
ks = (k,)

fuzzifier = LinearFuzzifier()

fold_number = 0

metrics_membership_rmse = []
metrics_membership_median = []
metrics_membership_stdev = []

for (paired_values_train, paired_values_test, paired_mu_train, paired_mu_test) in folded_data:
    print('external fold {} of {}'.format(fold_number, num_external_folds))
    # we keep values and labels paired, so that internal cross-validation does not break pairs

    best_c, _, result = model_selection_cv(paired_values_train, paired_mu_train,
                                        num_internal_folds, cs, ks,
                                        sample_generator=g,
                                        log=False,
                                        adjustment=adjustment,
                                        fuzzifier=fuzzifier)
    
    print('in fold {} best C is {}'.format(fold_number, best_c))
    estimated_membership = result[0]
    fold_number += 1
    
    # values and labels are still paired, we need to flatten them out
    values_test = flatten(paired_values_test)
    mu_test = flatten(paired_mu_test)
    
    if estimated_membership:
        membership_square_err = [(estimated_membership(v) - m)**2 for v, m in zip(values_test, mu_test)]
        membership_rmse = math.sqrt(sum(membership_square_err) / len(values_test))
        metrics_membership_rmse.append(membership_rmse)

        membership_median = np.median(membership_square_err)
        metrics_membership_median.append(membership_median)

        membership_stdev = np.std(membership_square_err)
        metrics_membership_stdev.append(membership_stdev)

print('Membership average values:')
print('RMSE: {}'.format(np.average(metrics_membership_rmse)))
print('Median: {}'.format(np.average(metrics_membership_median)))
print('STDEV: {}'.format(np.average(metrics_membership_stdev)))

external fold 0 of 5
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
optimization did not succeed
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
optimization did not succeed
internal fold 1 of 3
optimization did not succeed
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
optimization did not succeed
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
optimization did not succeed
in fold 0 best C is 5.0
external fold 0 of 5
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fold 1 of 3
internal fold 2 of 3
internal fold 0 of 3
internal fol