In [16]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from mulearn import FuzzyInductor
from mulearn.kernel import PrecomputedKernel, GaussianKernel
from mulearn.fuzzifier import *
from mulearn.optimization import GurobiSolver
import csv
import numpy as np
import statistics
from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer
from mulearn.distributions import *

def get_dataset(filename):
    with open(filename) as data_file:
        data = np.array(list(csv.reader(data_file)))
    n = len(data) - 1
    mu = np.array([float(row[0]) for row in data[1:n+1]])
    X = np.array([[float(k.replace('NA', '0')) for k in row[1:n+1]]
                     for row in data[1:n+1]])
    return X, mu

data_file_name = 'data/data-tettamanzi-complete.csv'
X, mu = get_dataset(data_file_name)

In [21]:
X
X = X[:900,:900]
mu = mu[:900]

In [None]:
out_cv = KFold()

fuzzifiers = [CrispFuzzifier(), QuantileConstantPiecewiseFuzzifier(), QuantileLinearPiecewiseFuzzifier(), LinearFuzzifier(),ExponentialFuzzifier()]
mean_test_scores = []
dev_test_scores = []
mean_train_scores = []
dev_train_scores = []

for fuzzifier in fuzzifiers: 
    test_scores = []
    train_scores = []
    i = 1
    
    fi = FuzzyInductor(k= GaussianKernel(), fuzzifier= fuzzifier)

    inner_folds = 5
    rmse = make_scorer(mean_squared_error)
    
    gs = GridSearchCV(fi, {'c': np.logspace(-1, 1, 7)},
                        verbose=0, cv=inner_folds,
                        error_score= np.nan, scoring = rmse, n_jobs= 1,
                        pre_dispatch=10, refit = True)

    for train_idx, test_idx in out_cv.split(X):
        X_train = X[train_idx]
        X_test = X[test_idx]
        mu_train = mu[train_idx]
        mu_test = mu[test_idx]

        try:
            gs.fit(X_train, mu_train)
            print(f"fold {i}: best parameters: {gs.best_params_['c']}")
            #e = rs.estimator.set_params(**rs.best_params_)
            train_score = gs.score(X_train, mu_train)
            test_score = gs.score(X_test, mu_test)
            print(f'fold {i}: train score {train_score:.2f}, test score {test_score:.2f}')
            test_scores.append(test_score)
            train_scores.append(train_score)
            i += 1
        except ValueError as e:
            print(e)
            test_scores.append(np.nan)
            train_scores.append(np.nan)
            i += 1
            continue
        
    mean_test_scores.append(np.nanmean(test_scores))
    mean_train_scores.append(np.nanmean(train_scores))
    dev_test_scores.append(np.nanstd(test_scores))
    dev_train_scores.append(np.nanstd(train_scores))

In [3]:
import pandas as pd

rmse_test = []
for i in range(len(mean_test_scores)):
    rmse_test.append(str(round(mean_test_scores[i],5)) + " +/- " + str(2*round(dev_test_scores[i],3)))

rmse_train = []
for i in range(len(mean_train_scores)):
    rmse_train.append(str(round(mean_train_scores[i],3)) + ' +/- ' + str(2*round(dev_train_scores[i],3)))
    
d = {'RMSE test' : rmse_test, 'RMSE train' : rmse_train}
df = pd.DataFrame(d, index = ['CrispFuzzifier', 'QuantileConstantPiecewiseFuzzifier', 'QuantileLinearPiecewiseFuzzifier','LinearFuzzifier','ExponentialFuzzifier'])
df.head()

Unnamed: 0,RMSE test,RMSE train
CrispFuzzifier,0.4714 +/- 0.402,0.243 +/- 0.06
QuantileConstantPiecewiseFuzzifier,0.52855 +/- 0.224,0.067 +/- 0.016
QuantileLinearPiecewiseFuzzifier,0.46104 +/- 0.228,0.072 +/- 0.016
LinearFuzzifier,0.45253 +/- 0.33,0.165 +/- 0.028
ExponentialFuzzifier,0.42252 +/- 0.302,0.167 +/- 0.028
