In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 26 12:06:53 2020.

@author: malchiodi
"""

from mulearn import FuzzyInductor
from mulearn.kernel import PrecomputedKernel
from mulearn.fuzzifier import *
from mulearn.optimization import GurobiSolver
import csv
import numpy as np
import statistics
from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer
from mulearn.distributions import *

def get_kernel_and_solver(gram):
    eigvals = np.linalg.eigvals(gram)
    assert(sum([abs(e.imag) for e in eigvals]) < 1e-4)
    abs_neg_eigvals = [-l.real for l in eigvals if l < 0]
    adjustment = max(abs_neg_eigvals) if abs_neg_eigvals else 0

    kernel = PrecomputedKernel(gram)
    solver = GurobiSolver(adjustment=adjustment) if adjustment else GurobiSolver()

    return kernel, solver

def get_dataset(filename):
    with open(filename) as data_file:
        data = np.array(list(csv.reader(data_file)))

    n = len(data) - 1
    n = 100

    # ## Extract data names, membership values and Gram matrix

    names = np.array(data[0])[1:n+1]
    mu = np.array([float(row[0]) for row in data[1:n+1]])
    gram = np.array([[float(k.replace('NA', '0')) for k in row[1:n+1]]
                     for row in data[1:n+1]])

    assert(len(names.shape) == 1)
    assert(len(mu.shape) == 1)
    assert(len(gram.shape) == 2)

    assert(names.shape[0] == gram.shape[0] == gram.shape[1] == mu.shape[0])

    X = np.array([[x] for x in np.arange(n)])

    return X, gram, mu


data_file_name = 'data/data-tettamanzi-complete.csv'
X, gram, mu = get_dataset(data_file_name)


out_cv = KFold()

k, solver = get_kernel_and_solver(gram)

fuzzifiers = [CrispFuzzifier(), QuantileConstantPiecewiseFuzzifier(), QuantileLinearPiecewiseFuzzifier(), LinearFuzzifier(), ExponentialFuzzifier(profile = 'alpha', alpha = 0.07)]
mean_scores = []
variance_scores = []

for fuzzifier in fuzzifiers: 
    test_scores = []
    i = 1
    
    fi = FuzzyInductor(k=k, solver=solver, fuzzifier= fuzzifier)

    inner_folds = 5
    rmse = make_scorer(mean_squared_error)
    
    gs = GridSearchCV(fi, {'c': np.logspace(-3, 3, 7)},
                        verbose=0, cv=inner_folds,
                        error_score= np.nan, scoring = rmse, n_jobs= 1,
                        pre_dispatch=10, refit = True)

    for train_idx, test_idx in out_cv.split(X):
        X_train = X[train_idx]
        X_test = X[test_idx]
        mu_train = mu[train_idx]
        mu_test = mu[test_idx]

        try:
            gs.fit(X_train, mu_train)
            print(f"fold {i}: best parameters: {gs.best_params_['c']}")
            #e = rs.estimator.set_params(**rs.best_params_)
            train_score = gs.score(X_train, mu_train)
            test_score = gs.score(X_test, mu_test)
            print(f'fold {i}: train score {train_score:.2f}, test score {test_score:.2f}')
            test_scores.append(test_score)
            i += 1
        except ValueError as e:
            print(e)
            test_scores.append(np.nan)
            i += 1
            continue
        
    mean_scores.append(statistics.mean(test_scores))
    variance_scores.append(statistics.variance(test_scores))

fold 1: best parameters: 0.1
fold 1: train score 0.00, test score 0.00
fold 2: best parameters: 0.1
fold 2: train score 0.00, test score 0.04
fold 3: best parameters: 0.1
fold 3: train score 0.00, test score 0.00
fold 4: best parameters: 0.1
fold 4: train score 0.00, test score 0.00
fold 5: best parameters: 0.1
fold 5: train score 0.00, test score 0.00
fold 1: best parameters: 0.1
fold 1: train score 0.06, test score 0.13
fold 2: best parameters: 0.1
fold 2: train score 0.06, test score 0.12
fold 3: best parameters: 0.1
fold 3: train score 0.06, test score 0.13
fold 4: best parameters: 0.1
fold 4: train score 0.06, test score 0.14
fold 5: best parameters: 0.1
fold 5: train score 0.06, test score 0.11
fold 1: best parameters: 0.1
fold 1: train score 0.05, test score 0.13
fold 2: best parameters: 0.1
fold 2: train score 0.05, test score 0.13
fold 3: best parameters: 0.1
fold 3: train score 0.05, test score 0.13
fold 4: best parameters: 0.1
fold 4: train score 0.05, test score 0.13
fold 5

In [None]:
import pandas as pd

d = {'mean rmse' : mean_scores, 'variance': variance_scores}
df = pd.DataFrame(d, index = ['CrispFuzzifier', 'QuantileConstantPiecewiseFuzzifier', 'QuantileLinearPiecewiseFuzzifier','LinearFuzzifier', 'ExponentialFuzzifier'])
df.head()