In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import precision_score, recall_score

In [3]:
import ruleset_rip

def make_irep_dataset(dataset_filename, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    train, test = train_test_split(df, test_size=.33, random_state=random_state)
    
    return train, test

In [4]:
def make_irep(train, class_feat, pos_class, random_state=42):
    # Train
    irep_clf = ruleset_rip.IREP(class_feat=class_feat,pos_class=pos_class)
    irep_clf.fit(train, seed=random_state, prune=True, display=False)
    return irep_clf

In [5]:
def score_irep(irep_clf, test, class_feat):
    X_test = test.drop(class_feat,axis=1)
    y_test = test[class_feat]

    precision = irep_clf.score(X_test, y_test, precision_score)
    recall = irep_clf.score(X_test, y_test, recall_score)
    total_conds = irep_clf.ruleset_.count_conds()
    return precision, recall, total_conds

In [6]:
datasets_path = '../datasets/'
random_state = 0

In [7]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
n_classes = 2

In [8]:
# Run irep
train, test = make_irep_dataset(filename, random_state=random_state)
irep_clf = make_irep(train, class_feat, pos_class, random_state=random_state)
irep_precision, irep_recall, total_conds = score_irep(irep_clf, test, class_feat)
irep_precision, irep_recall, total_conds

(0.9753086419753086, 0.9404761904761905, 3)