### Performance Comparision with sklearn.tree.DecisionTreeClassifier

In [1]:
import pandas as pd
import ruleset as rs
from ruleset.base import Timer
import pickle

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [59]:
def make_rs_dataset(dataset_filename, testset_filename=None, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    if not testset_filename:
        train, test = train_test_split(df, test_size=.33, random_state=random_state)
    else:
        train, test = (df, pd.read_csv(testset_filename))
        
    return train, test

In [4]:
def make_ripper(train, class_feat, pos_class, k=2, random_state=42, verbosity=0):
    # Train
    rip_clf = rs.RIPPER(k=k, verbosity=verbosity)
    rip_clf.fit(train, class_feat=class_feat, pos_class=pos_class, 
                n_discretize_bins=5, random_state=random_state)
    return rip_clf

In [5]:
def make_irep(train, class_feat, pos_class, random_state=42, verbosity=0):
    # Train
    irep_clf = rs.IREP(verbosity=verbosity)
    irep_clf.fit(train, class_feat=class_feat, pos_class=pos_class, n_discretize_bins=5, random_state=random_state)
    return irep_clf

In [6]:
def score_ruleset(clf, test):
    X_test = test.drop(clf.class_feat,axis=1)
    y_test = test[class_feat]

    precision = clf.score(X_test, y_test, precision_score)
    recall = clf.score(X_test, y_test, recall_score)
    total_conds = clf.ruleset_.count_conds()
    return precision, recall, total_conds

In [69]:
from sklearn.tree import DecisionTreeClassifier

def make_tree_dataset(dataset_filename, class_feat, pos_class, testset_filename=None, random_state=42):
    
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Get number of classes (for X,y splitting when there are dummies)
    n_classes = len(df[class_feat].unique())
    
    # One-hot preprocess the data to make it dummy for sklearn.
    le = LabelEncoder()
    df_le = df.apply(le.fit_transform)
    enc = OneHotEncoder(sparse=False)
    enc.fit(df_le)
    df_hot = enc.transform(df_le)
    
    # Split
    if not testset_filename:
        train, test = train_test_split(df_hot, test_size=.33, random_state=random_state)
    else:
        test_df = pd.read_csv(testset_filename)
        test_le = test_df.apply(le.transform)
        train, test = (df_hot, enc.transform(test_le))
    train_X = train[:,n_classes:]
    train_y = train[:,0]
    test_X = test[:,n_classes:]
    test_y = test[:,0]
    
    return train_X, train_y, test_X, test_y

In [14]:
def make_tree(train_X, train_y, max_depth=None, random_state=42):
    tree = DecisionTreeClassifier(max_depth=max_depth, random_state=random_state)
    tree.fit(train_X, train_y)
    return tree

In [15]:
def score_tree(tree_clf, test_X, test_y):
    predictions = tree_clf.predict(test_X)
    precision = precision_score(test_y, predictions)
    recall = recall_score(test_y, predictions)
    return precision, recall, tree_clf.tree_.node_count

In [57]:
import numpy as np
import math

def run_experiment(filename, class_feat, pos_class, testset_filename=None, k=2, verbosity=0, random_state=42, n_runs=10):
    
    irep_precision_list = []
    irep_recall_list = []
    irep_conds_list = []
    irep_models_list = []
    
    ripper_precision_list = []
    ripper_recall_list = []
    ripper_conds_list = []
    ripper_models_list = []
    
    tree_precision_list = []
    tree_recall_list = []
    tree_nodes_list = []
    tree_models_list = []
    
    max_tree_precision_list = []
    max_tree_recall_list = []
    max_tree_nodes_list = []
    max_tree_models_list = []
    
    for i in range(n_runs):

        # Set random_state
        random_state = i+random_state
        
        # Run IREP
        print(f'irep {i+1} of {n_runs}')
        train, test = make_rs_dataset(filename, testset_filename=testset_filename, random_state=random_state)
        irep_clf = make_irep(train, class_feat, pos_class, verbosity=verbosity, random_state=random_state)
        irep_precision, irep_recall, irep_conds = score_ruleset(irep_clf, test)
        irep_precision_list.append(irep_precision)
        irep_recall_list.append(irep_recall)
        irep_conds_list.append(irep_conds)
        irep_models_list.append(irep_clf)
        #print(f'{irep_precision}, {irep_recall}, {irep_conds}')
        
        # Run RIPPER
        print(f'ripper {i+1} of {n_runs}')
        train, test = make_rs_dataset(filename, testset_filename=testset_filename, random_state=random_state)
        ripper_clf = make_ripper(train, class_feat, pos_class, verbosity=verbosity, random_state=random_state)
        ripper_precision, ripper_recall, ripper_conds = score_ruleset(ripper_clf, test)
        ripper_precision_list.append(ripper_precision)
        ripper_recall_list.append(ripper_recall)
        ripper_conds_list.append(ripper_conds)
        ripper_models_list.append(ripper_clf) 
        #print(f'{ripper_precision}, {ripper_recall}, {ripper_conds}')
        
        # Run tree
        print(f'tree {i+1} of {n_runs}')
        tree_train_X, tree_train_y, tree_test_X, tree_test_y = make_tree_dataset(filename, class_feat, pos_class, testset_filename=testset_filename, random_state=random_state)
        tree_clf = make_tree(tree_train_X, tree_train_y, random_state=random_state)
        tree_precision, tree_recall, tree_nodes = score_tree(tree_clf, tree_test_X, tree_test_y)
        tree_precision_list.append(tree_precision)
        tree_recall_list.append(tree_recall)
        tree_nodes_list.append(tree_nodes)
        tree_models_list.append(tree_clf)
        #print(f'{tree_precision}, {tree_recall}, {tree_nodes}')
        
        # Run max tree
        print(f'max-tree {i+1} of {n_runs}')
        max_depth = max(1,int(math.log2(ripper_conds))) # limit n_nodes to approx n_conds
        max_tree_train_X, max_tree_train_y, max_tree_test_X, max_tree_test_y = make_tree_dataset(filename, class_feat, pos_class, testset_filename=testset_filename, random_state=random_state)
        max_tree_clf = make_tree(max_tree_train_X, max_tree_train_y, max_depth=max_depth, random_state=random_state)
        max_tree_precision, max_tree_recall, max_tree_nodes = score_tree(max_tree_clf, max_tree_test_X, max_tree_test_y)
        max_tree_precision_list.append(max_tree_precision)
        max_tree_recall_list.append(max_tree_recall)
        max_tree_nodes_list.append(max_tree_nodes)
        max_tree_models_list.append(max_tree_clf)
        #print(f'{max_tree_precision}, {max_tree_recall}, {max_tree_nodes}')
    
    irep_means = (np.mean(irep_precision_list), np.mean(irep_recall_list), np.mean(irep_conds_list))
    irep_meds = (np.median(irep_precision_list), np.median(irep_recall_list), np.median(irep_conds_list))
    ripper_means = (np.mean(ripper_precision_list), np.mean(ripper_recall_list), np.mean(ripper_conds_list))
    ripper_meds = (np.median(ripper_precision_list), np.median(ripper_recall_list), np.median(ripper_conds_list))
    
    tree_means = (np.mean(tree_precision_list), np.mean(tree_recall_list), np.mean(tree_nodes_list))
    tree_meds = (np.median(tree_precision_list), np.median(tree_recall_list), np.median(tree_nodes_list))
    max_tree_means = (np.mean(max_tree_precision_list), np.mean(max_tree_recall_list), np.mean(max_tree_nodes_list))
    max_tree_meds = (np.median(max_tree_precision_list), np.median(max_tree_recall_list), np.median(max_tree_nodes_list))

    # Calculate class distribution:
    df = pd.read_csv(filename)
    class_balance = len(df[df[class_feat]==pos_class])/len(df)
    
    
    print()
    print(f'class balance: {class_balance}')
    print()
    print(f'means:')
    print(f'ripper {ripper_means}')
    print(f'irep {irep_means}')
    print(f'tree means {tree_means}')
    print(f'max_tree means {max_tree_means}')
    print()
    print(f'medians')
    print(f'irep medians {irep_meds}')
    print(f'ripper medians {ripper_meds}')
    print(f'tree medians {tree_meds}')
    print(f'max_tree medians {max_tree_meds}')
    
    return {'irep_means':ripper_means,
            'irep_meds':ripper_meds,
            'irep_models':irep_models_list,

            'ripper_means':ripper_means,
            'ripper_meds':ripper_meds,
            'ripper_models':ripper_models_list,
            
            'tree_means':tree_means,
            'tree_meds':tree_meds,
            'tree_models':tree_models_list,
            
            'max_tree_means':max_tree_means,
            'max_tree_meds':max_tree_meds,
            'max_tree_models':max_tree_models_list,
            
            'n':n_runs,
            'class_balance':class_balance
           }

In [11]:
datasets_path = '../datasets/'
random_state = 42

In [80]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
k = 2
n_classes = 2
verbosity=0
n_runs=10

timer = Timer()
congress_results = run_experiment(filename, class_feat, pos_class, n_classes, k=k, verbosity=verbosity, random_state=42, n_runs=n_runs)
timer.stop()
pickle.dump(congress_results,open(filename.replace('.csv','.pkl'),'wb'))
congress_results

irep 1 of 10
ripper 1 of 10
tree 1 of 10
max-tree 1 of 10
irep 2 of 10
ripper 2 of 10
tree 2 of 10
max-tree 2 of 10
irep 3 of 10
ripper 3 of 10
tree 3 of 10
max-tree 3 of 10
irep 4 of 10
ripper 4 of 10
tree 4 of 10
max-tree 4 of 10
irep 5 of 10
ripper 5 of 10
tree 5 of 10
max-tree 5 of 10
irep 6 of 10
ripper 6 of 10
tree 6 of 10
max-tree 6 of 10
irep 7 of 10
ripper 7 of 10
tree 7 of 10
max-tree 7 of 10
irep 8 of 10
ripper 8 of 10
tree 8 of 10
max-tree 8 of 10
irep 9 of 10
ripper 9 of 10
tree 9 of 10
max-tree 9 of 10
irep 10 of 10
ripper 10 of 10
tree 10 of 10
max-tree 10 of 10
means:
ripper (0.9786908705329758, 0.8321058042468513, 3.2)
irep (0.9411800688487462, 0.8642916901682195, 2.9)
tree means (0.9521518769680185, 0.9616182203222212, 37.6)
max_tree means (0.9825004273580683, 0.9519365186042265, 4.6)

medians
irep medians (0.9406565656565656, 0.8667929292929293, 3.0)
ripper medians (0.9862781954887219, 0.8539714151827553, 3.0)
tree medians (0.95, 0.966471028971029, 38.0)
max_tree med

{'irep_means': (0.9786908705329758, 0.8321058042468513, 3.2),
 'irep_meds': (0.9862781954887219, 0.8539714151827553, 3.0),
 'irep_models': [<IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>],
 'max_tree_means': (0.9825004273580683, 0.9519365186042265, 4.6),
 'max_tree_meds': (0.9870919387075283, 0.9602272727272727, 3.0),
 'max_tree_models': [DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, presort=False, random_state=42,
              splitter='best'),
  DecisionTre

In [47]:
# Set up
dataset = 'balance-scale.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'B' #???
n_classes = 3
k=2
verbosity=1
n_runs=1

In [83]:
# Set up
dataset = 'breast-cancer.csv'
filename = datasets_path + dataset
class_feat = 'Recurrence'
pos_class = 'recurrence-events'
n_classes = 2
k=2
n_runs=10

timer = Timer()
breast_cancer_results = run_experiment(filename, class_feat, pos_class, n_classes, k=2, verbosity=verbosity, random_state=42, n_runs=n_runs)
timer.stop()
pickle.dump(breast_cancer_results,open(filename.replace('.csv','.pkl'),'wb'))
breast_cancer_results

irep 1 of 10
ripper 1 of 10
tree 1 of 10
max-tree 1 of 10
irep 2 of 10
ripper 2 of 10
tree 2 of 10
max-tree 2 of 10
irep 3 of 10
ripper 3 of 10
tree 3 of 10
max-tree 3 of 10
irep 4 of 10
ripper 4 of 10
tree 4 of 10
max-tree 4 of 10
irep 5 of 10
ripper 5 of 10
tree 5 of 10
max-tree 5 of 10
irep 6 of 10
ripper 6 of 10
tree 6 of 10
max-tree 6 of 10
irep 7 of 10
ripper 7 of 10
tree 7 of 10
max-tree 7 of 10
irep 8 of 10
ripper 8 of 10
tree 8 of 10
max-tree 8 of 10
irep 9 of 10
ripper 9 of 10
tree 9 of 10
max-tree 9 of 10
irep 10 of 10
ripper 10 of 10
tree 10 of 10
max-tree 10 of 10

means:
ripper (0.6359574571769693, 0.28957228823848735, 3.7)
irep (0.5756503800858639, 0.31708747162271006, 2.4)
tree means (0.7675098970296164, 0.7343734657077312, 127.6)
max_tree means (0.775415667153092, 0.883709599636459, 6.2)

medians
irep medians (0.5, 0.3220046082949309, 2.0)
ripper medians (0.6181818181818182, 0.2596153846153846, 4.0)
tree medians (0.7583333333333333, 0.7503785420722475, 127.0)
max_tree 

{'irep_means': (0.6359574571769693, 0.28957228823848735, 3.7),
 'irep_meds': (0.6181818181818182, 0.2596153846153846, 4.0),
 'irep_models': [<IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>,
  <IREP object with fit ruleset>],
 'max_tree_means': (0.775415667153092, 0.883709599636459, 6.2),
 'max_tree_meds': (0.7739648326715826, 0.9095238095238095, 7.0),
 'max_tree_models': [DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=2,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, presort=False, random_state=42,
              splitter='best'),
  DecisionTree

In [39]:
# Set up

# Merge good, vgood
dataset = 'car.csv'
filename = datasets_path + dataset
df = pd.read_csv(filename)
df['mod_class'] = df['class'].map(lambda x: 'good-vgood' if x in {'good','vgood'} else x)
#df.to_csv(datasets_path + 'car_mod.csv')

k=2
verbosity=0
n_runs=10

dataset = 'car_mod.csv'
filename = datasets_path + dataset
class_feat = 'mod_class'
pos_class = 'good-vgood'
car_results = run_experiment(filename, class_feat, pos_class, k=k, verbosity=verbosity, random_state=42, n_runs=n_runs)
pickle.dump(car_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10
ripper 1 of 10
tree 1 of 10
max-tree 1 of 10
irep 2 of 10
ripper 2 of 10
tree 2 of 10
max-tree 2 of 10
irep 3 of 10
ripper 3 of 10
tree 3 of 10
max-tree 3 of 10
irep 4 of 10
ripper 4 of 10
tree 4 of 10
max-tree 4 of 10
irep 5 of 10
ripper 5 of 10
tree 5 of 10
max-tree 5 of 10
irep 6 of 10
ripper 6 of 10
tree 6 of 10
max-tree 6 of 10
irep 7 of 10
ripper 7 of 10
tree 7 of 10
max-tree 7 of 10
irep 8 of 10
ripper 8 of 10
tree 8 of 10
max-tree 8 of 10
irep 9 of 10
ripper 9 of 10
tree 9 of 10
max-tree 9 of 10
irep 10 of 10
ripper 10 of 10
tree 10 of 10
max-tree 10 of 10

class balance: 0.0775462962962963

means:
ripper (0.6578135727811323, 0.7817602017667922, 70.1)
irep (0.6869360799875506, 0.40549524833785633, 14.4)
tree means (0.372499976001423, 0.24426395328504286, 606.4)
max_tree means (0.5888716139491696, 0.23457488871972113, 28.2)

medians
irep medians (0.6810924369747899, 0.4476744186046512, 12.5)
ripper medians (0.6475155279503106, 0.7866531850353893, 67.5)
tree medians 

In [None]:
# Set up
dataset = 'breast-cancer.csv'
filename = datasets_path + dataset
class_feat = 'Recurrence'
pos_class = 'recurrence-events'
n_classes = 2
random_state=30

In [72]:
# Set up
dataset = 'connect-4.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'win'
verbosity=0
n_runs=10

connect4_results = run_experiment(filename, class_feat, pos_class, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(connect4_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10


KeyboardInterrupt: 

In [11]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
n_classes = 2
verbosity=0
n_runs=10

timer = Timer()
congress_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
timer.stop()
pickle.dump(congress_results,open(filename.replace('.csv','.pkl'),'wb'))
congress_results

ripper 1 of 10
tree 1 of 10
ripper 2 of 10
tree 2 of 10
ripper 3 of 10
tree 3 of 10
ripper 4 of 10
tree 4 of 10
ripper 5 of 10
tree 5 of 10
ripper 6 of 10
tree 6 of 10
ripper 7 of 10
tree 7 of 10
ripper 8 of 10
tree 8 of 10
ripper 9 of 10
tree 9 of 10
ripper 10 of 10
tree 10 of 10
ripper means (0.9822343858411969, 0.8574556909273443, 2.9)
ripper medians (0.9869275461380724, 0.8539714151827553, 3.0)
tree means (0.9541566437840467, 0.9564409277290078, 39.8)
tree medians (0.9612237420615535, 0.9662878787878788, 40.0)
max-tree 1 of 10
max-tree 2 of 10
max-tree 3 of 10
max-tree 4 of 10
max-tree 5 of 10
max-tree 6 of 10
max-tree 7 of 10
max-tree 8 of 10
max-tree 9 of 10
max-tree 10 of 10
maxtree means (0.9798652374805726, 0.9582411320379064, 3.0)
maxtree medians (0.9826122672508215, 0.9562828475871954, 3.0)

[<RIPPER object fit ruleset=[physician-fee-freeze=n]>, <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^mx-missile=y] V [education-spending=n^physici

{'max_t_means': (0.9798652374805726, 0.9582411320379064, 3.0),
 'max_t_meds': (0.9826122672508215, 0.9562828475871954, 3.0),
 'n': 10,
 'r_means': (0.9822343858411969, 0.8574556909273443, 2.9),
 'r_meds': (0.9869275461380724, 0.8539714151827553, 3.0),
 'r_models': [<RIPPER object fit ruleset=[physician-fee-freeze=n]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^mx-missile=y] V [education-spending=n^physician-fee-freeze=?]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [adoption-of-the-budget-resolution=y^anti-satellite-test-ban=n^Water-project-cost-sharing=y] V [synfuels-corporation-cutback=y^mx-missile=y]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^adoption-of-the-budget-resolution=y^anti-satellite-test-ban=n]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^adoption-of-the-budget-resolution=y]>,
  <RIPPER object fit ruleset=[physician-fee-freez

In [79]:
# Set up
dataset = 'kr-vs-kp.csv'
filename = datasets_path + dataset
class_feat = 'won/lost'
pos_class = 'won'
n_classes = 2
k=2
verbosity=1
n_runs=10

chess_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(chess_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10


ValueError: Invalid file path or buffer object type: <class 'int'>

In [35]:
# Set up
dataset = 'mushroom.csv'
filename = datasets_path + dataset
class_feat = 'Poisonous/Edible'
pos_class = 'p'
k=2
verbosity=0
n_runs=10

mushroom_results = run_experiment(filename, class_feat, pos_class, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(mushroom_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10
0.923458540042523, 1.0, 6
ripper 1 of 10
1.0, 1.0, 33
tree 1 of 10
1.0, 1.0, 27
max-tree 1 of 10
0.9992748368382887, 1.0, 23
irep 2 of 10
0.9358703312191684, 1.0, 10
ripper 2 of 10
1.0, 1.0, 27
tree 2 of 10
1.0, 1.0, 27
max-tree 2 of 10
0.9985029940119761, 0.9859571322985957, 19
irep 3 of 10
0.9261887863733144, 1.0, 6
ripper 3 of 10
0.9938931297709923, 0.9977011494252873, 32
tree 3 of 10
0.9971014492753624, 1.0, 21
max-tree 3 of 10
0.9971014492753624, 1.0, 21
irep 4 of 10
0.9316725978647686, 1.0, 6
ripper 4 of 10
1.0, 1.0, 24
tree 4 of 10
1.0, 1.0, 27
max-tree 4 of 10
0.9992603550295858, 0.9846938775510204, 19
irep 5 of 10
0.9186704384724187, 1.0, 6
ripper 5 of 10
1.0, 1.0, 32
tree 5 of 10
1.0, 1.0, 29
max-tree 5 of 10
0.9992706053975201, 0.9913169319826338, 23
irep 6 of 10
0.9224199288256227, 1.0, 6
ripper 6 of 10
1.0, 1.0, 31
tree 6 of 10
1.0, 1.0, 25
max-tree 6 of 10
0.9985347985347985, 0.984115523465704, 19
irep 7 of 10
0.9283154121863799, 1.0, 6
ripper 7 of 10
1.0, 1.

In [50]:
mushroom_df = pd.read_csv(datasets_path + 'mushroom.csv')
mushroom_results['ripper_models'][2].predict(mushroom_df.drop('Poisonous/Edible',axis=1).head(),give_reasons=True)

([True, False, False, True, False],
 [[<Rule object: [Gill-size=n^Population=s]>,
   <Rule object: [Gill-size=n^Cap-surface=s^Stalk-shape=e]>],
  [],
  [],
  [<Rule object: [Gill-size=n^Population=s]>],
  []])

In [42]:
# Set up


# Merge recommend and very recommend
dataset = 'nursery.csv'
filename = datasets_path + dataset
df = pd.read_csv(filename)
df['mod_class'] = df['class'].map(lambda x: 'recommend-very_recom' if x in {'recommend','very_recom'} else x)
#df.to_csv(datasets_path + 'nursery_mod.csv')

dataset = 'nursery_mod.csv'
filename = datasets_path + dataset
class_feat = 'mod_class'
pos_class = 'recommend-very_recom'
k=2
verbosity=0
n_runs=10

nursery_results = run_experiment(filename, class_feat, pos_class, k=k, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(nursery_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10
ripper 1 of 10


KeyboardInterrupt: 

In [77]:
# Set up

# Has test and train already separated

train_dataset = 'SPECT_train.csv'
test_dataset = 'SPECT_test.csv'

train_filename = datasets_path + train_dataset
test_filename = datasets_path + test_dataset
class_feat = 'class'
pos_class = '1'
k=2
verbosity=0
n_runs=1

spect_results = run_experiment(filename, class_feat, pos_class, k=k, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(spect_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 1


KeyError: 'class'

In [74]:
# Set up

# Has test and train

dataset = 'tic-tac-toe.csv'
filename = datasets_path + dataset
class_feat = 'Class'
pos_class = 'positive'
n_classes = 2
k=2
verbosity=0
n_runs=10

tic_results = run_experiment(filename, class_feat, pos_class, verbosity=verbosity, random_state=random_state, n_runs=n_runs)
pickle.dump(tic_results,open(filename.replace('.csv','.pkl'),'wb'))

irep 1 of 10
ripper 1 of 10
tree 1 of 10
max-tree 1 of 10
irep 2 of 10
ripper 2 of 10
tree 2 of 10
max-tree 2 of 10
irep 3 of 10
ripper 3 of 10
tree 3 of 10
max-tree 3 of 10
irep 4 of 10
ripper 4 of 10
tree 4 of 10
max-tree 4 of 10
irep 5 of 10
ripper 5 of 10
tree 5 of 10
max-tree 5 of 10
irep 6 of 10
ripper 6 of 10
tree 6 of 10
max-tree 6 of 10
irep 7 of 10
ripper 7 of 10
tree 7 of 10
max-tree 7 of 10
irep 8 of 10
ripper 8 of 10
tree 8 of 10
max-tree 8 of 10
irep 9 of 10
ripper 9 of 10
tree 9 of 10
max-tree 9 of 10
irep 10 of 10
ripper 10 of 10
tree 10 of 10
max-tree 10 of 10

class balance: 0.6534446764091858

means:
ripper (0.9811209477959754, 1.0, 26.4)
irep (0.7959384786569028, 0.9819516185579058, 11.5)
tree means (0.33346100707831605, 0.3546354186091842, 331.4)
max_tree means (0.49777331644907336, 0.32984091773236657, 17.0)

medians
irep medians (0.8040839757257667, 1.0, 13.5)
ripper medians (0.995260663507109, 1.0, 27.0)
tree medians (0.33946877912395157, 0.3596273291925466, 333