In [1]:
from mulearn import FuzzyInductor
from mulearn.kernel import PrecomputedKernel, GaussianKernel
from mulearn.fuzzifier import *
from mulearn.optimization import GurobiSolver
import csv
import numpy as np
from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV
import random
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

def get_kernel_and_solver(gram):
    eigvals = np.linalg.eigvals(gram)
    assert(sum([abs(e.imag) for e in eigvals]) < 1e-4)
    abs_neg_eigvals = [-l.real for l in eigvals if l < 0]
    adjustment = max(abs_neg_eigvals) if abs_neg_eigvals else 0

    kernel = PrecomputedKernel(gram)
    solver = GurobiSolver(adjustment=adjustment) if adjustment else GurobiSolver()
    return kernel, solver

def get_dataset(filename):
    with open(filename) as data_file:
        data = np.array(list(csv.reader(data_file)))

    n = len(data) - 1

    # ## Extract data names, membership values and Gram matrix

    names = np.array(data[0])[1:n+1]
    mu = np.array([float(row[0]) for row in data[1:n+1]])
    gram = np.array([[float(k.replace('NA', '0')) for k in row[1:n+1]]
                     for row in data[1:n+1]])

    assert(len(names.shape) == 1)
    assert(len(mu.shape) == 1)
    assert(len(gram.shape) == 2)

    assert(names.shape[0] == gram.shape[0] == gram.shape[1] == mu.shape[0])

    X = np.arange(n)

    return X, gram, mu

data_file_name = 'data/data-tettamanzi-complete.csv'
X, gram, mu = get_dataset(data_file_name)
k,s = get_kernel_and_solver(gram)

cs = [0.005, 0.007, 0.01, 0.03, 0.05, 0.07, 0.1, 0.3, 0.5,0.7, 1, 10, 100]

fuzzifiers = [LinearFuzzifier(), ExponentialFuzzifier(), CrispFuzzifier(),
              QuantileConstantPiecewiseFuzzifier(),
              QuantileLinearPiecewiseFuzzifier()]
mean_scores = []
std_scores = []

X = X.reshape(len(X), -1)

In [5]:
for fuzzifier in fuzzifiers:
    test_scores = []

    for i in range(10):
        X_trainval, X_test, mu_trainval, mu_test = \
                train_test_split(X, mu, test_size=0.1)

        
        X_train, X_validation, mu_train, mu_validation = \
                train_test_split(X_trainval, mu_trainval, train_size=8/9)
            
        rmse_min = np.inf
        best_param = None
        
        for c in cs:
            try:
                fi = FuzzyInductor(c=c, fuzzifier=fuzzifier, k=k, solver=s)
                fi.fit(X_train, mu_train)
                predictions = fi.predict(X_validation)
                rmse = mean_squared_error(predictions, mu_validation,
                                          squared=False)
                if  rmse < rmse_min:
                    best_param = c
                    rmse_min = rmse
            except ValueError as e:
                print(e)
                continue
                  
        #print("in experiment " + str(i) + " best c is " + str(best_param))
        print(f"in experiment {i} best c is {best_param}")
        fi = FuzzyInductor(c=best_param, fuzzifier=fuzzifier, k=k, solver=s)
        fi.fit(X_trainval, mu_trainval)
        
        try:
            predictions = fi.predict(X_test)
            rmse = mean_squared_error(predictions, mu_test, squared=False)
            print(f"in experiment {i} test rmse is {rmse}")
            test_scores.append(rmse)
        except ValueError:
            test_scores.append(np.nan)

    mean_scores.append(np.nanmean(test_scores))
    std_scores.append(np.nanstd(test_scores))

Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
Profile fitting returned a negative parameter
in experiment 0 best c is None


TypeError: '<=' not supported between instances of 'NoneType' and 'int'

In [22]:
import pandas as pd

rmse_test = []
for i in range(len(mean_scores)):
    rmse_test.append(str(round(mean_scores[i],5)) + " +/- " + str(2*round(std_scores[i],3)))

d = {'RMSE' : rmse_test}
df = pd.DataFrame(d, index = ['LinearFuzzifier','ExponentialFuzzifier','CrispFuzzifier', 'QuantileConstantPiecewiseFuzzifier', 'QuantileLinearPiecewiseFuzzifier'])
df.head()

Unnamed: 0,RMSE
LinearFuzzifier,nan +/- nan
ExponentialFuzzifier,0.38033 +/- 0.026
CrispFuzzifier,0.45697 +/- 0.09
QuantileConstantPiecewiseFuzzifier,0.33197 +/- 0.028
QuantileLinearPiecewiseFuzzifier,0.29839 +/- 0.034


In [23]:
mu

array([1.        , 0.04920855, 1.        , ..., 0.02828723, 1.        ,
       0.04334869])