In [1]:
import pandas as pd
from base import Timer

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import precision_score, recall_score

In [3]:
import ripper

def make_ripper_dataset(dataset_filename, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    train, test = train_test_split(df, test_size=.33, random_state=random_state)
    
    return train, test

In [4]:
def make_ripper(train, class_feat, pos_class, k=1, random_state=42, verbosity=0):
    # Train
    irep_clf = ripper.RIPPER(class_feat=class_feat,pos_class=pos_class, k=k, verbosity=verbosity)
    irep_clf.fit(train, seed=random_state)
    return irep_clf

In [5]:
def score_ripper(ripper_clf, test, class_feat):
    X_test = test.drop(class_feat,axis=1)
    y_test = test[class_feat]

    precision = ripper_clf.score(X_test, y_test, precision_score)
    recall = ripper_clf.score(X_test, y_test, recall_score)
    total_conds = ripper_clf.ruleset_.count_conds()
    return precision, recall, total_conds

In [6]:
datasets_path = '../datasets/'
random_state = 0

In [7]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
n_classes = 2
random_state=42
k=5

In [8]:
# Set up
dataset = 'breast-cancer.csv'
filename = datasets_path + dataset
class_feat = 'Recurrence'
pos_class = 'recurrence-events'
n_classes = 2
random_state=30

In [None]:
# Set up
dataset = 'adult.csv'
filename = datasets_path + dataset
class_feat = 'income'
pos_class = '>50K'
n_classes = 2
random_state = 0
k=2

In [None]:
# Run ripper
random_state=2
verbosity=5
train, test = make_ripper_dataset(filename, random_state=random_state)
timer = Timer()
ripper_clf = make_ripper(train, class_feat, pos_class, k=k, random_state=random_state, verbosity=verbosity)
timer.buzz()
#ripper_precision, ripper_recall, ripper_conds = score_ripper(ripper_clf, test, class_feat)
#ripper_precision, ripper_recall, ripper_conds

growing ruleset...

pos_growset 1731 pos_pruneset 3517
neg_growset 5467 neg_pruneset 11100
growing rule
[]
gain 1329 [marital.status=Married-civ-spouse]
negs remaining 1844
gain 253 [marital.status=Married-civ-spouse^education=Bachelors]
negs remaining 205
gain 43.9 [marital.status=Married-civ-spouse^education=Bachelors^occupation=Exec-managerial]
negs remaining 33
gain 6.8 [marital.status=Married-civ-spouse^education=Bachelors^occupation=Exec-managerial^native.country=United-States]
negs remaining 25
gain 3.5 [marital.status=Married-civ-spouse^education=Bachelors^occupation=Exec-managerial^native.country=United-States^workclass=Self-emp-inc]
negs remaining 0
grew rule: [marital.status=Married-civ-spouse^education=Bachelors^occupation=Exec-managerial^native.country=United-States^workclass=Self-emp-inc]
prune value of [marital.status=Married-civ-spouse^education=Bachelors^occupation=Exec-managerial^native.country=United-States^workclass=Self-emp-inc]: 0.62
prune value of [marital.status

In [None]:
ripper_clf.ruleset_

In [None]:
score_ripper(ripper_clf, test, class_feat)