In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 26 12:06:53 2020.

@author: malchiodi
"""

from mulearn import FuzzyInductor
from mulearn.kernel import PrecomputedKernel
from mulearn.fuzzifier import *
from mulearn.optimization import GurobiSolver
import csv
import numpy as np
from sklearn.model_selection import KFold, RandomizedSearchCV, GridSearchCV
from alternative_kernel import LengthDistanceKernel

import warnings
warnings.filterwarnings('ignore')

In [3]:
def get_dataset_axioms(filename):
    with open(filename) as data_file:
        data = np.array(list(csv.reader(data_file)))
        
    #n = len(data) - 1
    n = 100
    
    names = np.array(data[0])[1:n+1]
    
    mu = np.array([float(row[0]) for row in data[1:n+1]])
    X = np.array(names.reshape(-1,1))

    return X, mu


data_file_name = 'data/data-tettamanzi-complete.csv'
X, mu = get_dataset_axioms(data_file_name)

out_cv = KFold()
i = 1

fi = FuzzyInductor(k= LengthDistanceKernel(), solver= GurobiSolver(), fuzzifier= QuantileConstantPiecewiseFuzzifier())

inner_folds = 5
gs = GridSearchCV(fi, {'c': np.logspace(-1, 1, 7)},
                        verbose=0, cv=inner_folds,
                        error_score=np.nan, n_jobs= -1,
                        pre_dispatch=10)

for train_idx, test_idx in out_cv.split(X):
    X_train = X[train_idx]
    X_test = X[test_idx]
    mu_train = mu[train_idx]
    mu_test = mu[test_idx]

    gs.fit(X_train, mu_train)
    print(f"fold {i}: best parameters: {gs.best_params_['c']}")
    train_score = gs.score(X_train, mu_train)
    test_score = gs.score(X_test, mu_test)
    print(f'fold {i}: train score {train_score:.2f}, test score {test_score:.2f}')
    i += 1

fold 1: best parameters: 1.0
fold 1: train score -0.53, test score -0.47
fold 2: best parameters: 0.46415888336127786
fold 2: train score -0.55, test score -0.53
fold 3: best parameters: 0.1
fold 3: train score -0.54, test score -0.51
fold 4: best parameters: 0.1
fold 4: train score -0.55, test score -0.64
fold 5: best parameters: 10.0
fold 5: train score -0.57, test score -0.63


In [5]:
from alternative_kernel import LevenshteinKernel

out_cv = KFold()
i = 1

fi = FuzzyInductor(k= LevenshteinKernel(), solver= GurobiSolver(), fuzzifier= QuantileConstantPiecewiseFuzzifier())

inner_folds = 5
gs = GridSearchCV(fi, {'c': np.logspace(-1, 1, 7)},
                        verbose=0, cv=inner_folds,
                        error_score=np.nan, n_jobs= 1,
                        pre_dispatch=10)

for train_idx, test_idx in out_cv.split(X):
    X_train = X[train_idx]
    X_test = X[test_idx]
    mu_train = mu[train_idx]
    mu_test = mu[test_idx]

    gs.fit(X_train, mu_train)
    print(f"fold {i}: best parameters: {gs.best_params_['c']}")
    train_score = gs.score(X_train, mu_train)
    test_score = gs.score(X_test, mu_test)
    print(f'fold {i}: train score {train_score:.2f}, test score {test_score:.2f}')
    i += 1

TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

In [6]:
from alternative_kernel import HammingKernel

out_cv = KFold()
i = 1

fi = FuzzyInductor(k= HammingKernel(), solver= GurobiSolver(), fuzzifier= QuantileConstantPiecewiseFuzzifier())

inner_folds = 5
gs = GridSearchCV(fi, {'c': np.logspace(-1, 1, 7)},
                        verbose=0, cv=inner_folds,
                        error_score=np.nan, n_jobs= 1,
                        pre_dispatch=10)

for train_idx, test_idx in out_cv.split(X):
    X_train = X[train_idx]
    X_test = X[test_idx]
    mu_train = mu[train_idx]
    mu_test = mu[test_idx]

    gs.fit(X_train, mu_train)
    print(f"fold {i}: best parameters: {gs.best_params_['c']}")
    train_score = gs.score(X_train, mu_train)
    test_score = gs.score(X_test, mu_test)
    print(f'fold {i}: train score {train_score:.2f}, test score {test_score:.2f}')
    i += 1

fold 1: best parameters: 0.1
fold 1: train score -0.56, test score -0.64
fold 2: best parameters: 0.1
fold 2: train score -0.58, test score -0.53
fold 3: best parameters: 0.1
fold 3: train score -0.55, test score -0.56
fold 4: best parameters: 0.1
fold 4: train score -0.56, test score -0.51
fold 5: best parameters: 0.1
fold 5: train score -0.56, test score -0.51
