In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import precision_score, recall_score

In [3]:
import ripper

def make_ripper_dataset(dataset_filename, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    train, test = train_test_split(df, test_size=.33, random_state=random_state)
    
    return train, test

In [4]:
def make_ripper(train, class_feat, pos_class, random_state=42):
    # Train
    irep_clf = ripper.RIPPER(class_feat=class_feat,pos_class=pos_class)
    irep_clf.fit(train, seed=random_state, prune=True, verbose=True)
    return irep_clf

In [5]:
def score_ripper(ripper_clf, test, class_feat):
    X_test = test.drop(class_feat,axis=1)
    y_test = test[class_feat]

    precision = ripper_clf.score(X_test, y_test, precision_score)
    recall = ripper_clf.score(X_test, y_test, recall_score)
    total_conds = ripper_clf.ruleset_.count_conds()
    return precision, recall, total_conds

In [6]:
datasets_path = '../datasets/'
random_state = 0

In [7]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
n_classes = 2

In [8]:
# Run ripper
train, test = make_ripper_dataset(filename, random_state=random_state)
ripper_clf = make_ripper(train, class_feat, pos_class, random_state=random_state)
ripper_precision, ripper_recall, ripper_conds = score_ripper(ripper_clf, test, class_feat)
ripper_precision, ripper_recall, ripper_conds

grew rule: [physician-fee-freeze=n^adoption-of-the-budget-resolution=y]
on pruneset iter, p_covered 103 n_covered 0
on pruneset iter, p_covered 113 n_covered 1
pruned rule: [physician-fee-freeze=n^adoption-of-the-budget-resolution=y]
rule_bits| rule [physician-fee-freeze=n^adoption-of-the-budget-resolution=y] k 2 n 48 pr 0.041666666666666664: rule_bits 6.5
exceptions_bits| P 183 p 154 fp 0 fn 29: exceptions_bits 0.0
Rule [physician-fee-freeze=n^adoption-of-the-budget-resolution=y] total_bits 6.5
examples remaining: 29 pos, 108 neg
smallest_dl 6.5

grew rule: [synfuels-corporation-cutback=y^export-administration-act-south-africa=n]
on pruneset iter, p_covered 3 n_covered 2
on pruneset iter, p_covered 12 n_covered 8
pruned rule: [synfuels-corporation-cutback=y^export-administration-act-south-africa=n]
rule_bits| rule [synfuels-corporation-cutback=y^export-administration-act-south-africa=n] k 2 n 48 pr 0.041666666666666664: rule_bits 6.5
exceptions_bits| P 29 p 6 fp 2 fn 23: exceptions_bi

(0.8901098901098901, 0.9642857142857143, 13)

In [9]:
ripper_clf

<IREP object fit ruleset=[[physician-fee-freeze=n^adoption-of-the-budget-resolution=y]v[synfuels-corporation-cutback=y^export-administration-act-south-africa=n]v[el-salvador-aid=n^Water-project-cost-sharing=y]v[mx-missile=y^anti-satellite-test-ban=n]v[physician-fee-freeze=?^Handicapped-infants=n]v[adoption-of-the-budget-resolution=?^mx-missile=y]v[physician-fee-freeze=n]]>