In [1]:
import ruleset as rs
from ruleset import RIPPER, ripper, base
import pickle
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [3]:
def make_rs_dataset(dataset_filename, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    train, test = train_test_split(df, test_size=.33, random_state=random_state)
    
    return train, test

In [4]:
def make_ripper(train, class_feat, pos_class, k=2, random_state=42, verbosity=0):
    # Train
    rip_clf = rs.RIPPER(k=k, verbosity=verbosity)
    rip_clf.fit(train, class_feat=class_feat, pos_class=pos_class, 
                n_discretize_bins=5, random_state=random_state)
    return rip_clf

In [5]:
datasets_path = '../datasets/'
random_state = 42

In [6]:
dataset = 'mushroom.csv'
filename = datasets_path + dataset
class_feat = 'Poisonous/Edible'
pos_class = 'p'

result = pickle.load(open(filename.replace('.csv','.pkl'), "rb"))
model = result['ripper_models'][0]

In [7]:
train, test = make_rs_dataset(filename, random_state=42)
#model = make_ripper(train, class_feat, pos_class, k=0, random_state=42, verbosity=0)
#pickle.dump(model, open('temp_rip.pkl','wb'))
model = pickle.load(open('temp_rip.pkl','rb'))

In [8]:
ruleset = model.ruleset_
stats = ripper.RulesetStats()
pos_df, neg_df = base.pos_neg_split(train, class_feat, pos_class)
possible_conds = model.possible_conds

In [16]:
stats.update(ruleset, possible_conds, pos_df, neg_df, bestsubset_dl=False, ret_bestsubset=False, verbosity=5)

mismatch at 9


In [17]:
list(zip(range(0,len(stats.subset_dls)), stats.subset_dls))

[(0, 3377.7235010761033),
 (1, 2152.5167396808397),
 (2, 1778.1709835519496),
 (3, 1142.696786306302),
 (4, 827.9312750686732),
 (5, 734.9581516407509),
 (6, 663.9150892363692),
 (7, 538.3074896667496),
 (8, 412.7397059699782),
 (9, 377.6444895630475),
 (10, 294.9152417352419),
 (11, 250.31921401773693),
 (12, 213.42243213548036),
 (13, 166.730637899361),
 (14, 152.62125713656573)]

In [18]:
stats.subset_dls[1] = 4000
stats.subset_dls[4] = 3000

In [11]:
#temprs = base.Ruleset(ruleset.rules[:9]+[ruleset.rules[4]]+ruleset.rules[10:]+ruleset.rules[1:3])
#stats.update(temprs, possible_conds, pos_df, neg_df, bestsubset_dl=False, ret_bestsubset=False, verbosity=5)

mismatch at 9


In [19]:
list(zip(range(0,len(stats.subset_dls)), stats.subset_dls))

[(0, 3377.7235010761033),
 (1, 4000),
 (2, 1778.1709835519496),
 (3, 1142.696786306302),
 (4, 3000),
 (5, 734.9581516407509),
 (6, 663.9150892363692),
 (7, 538.3074896667496),
 (8, 412.7397059699782),
 (9, 377.6444895630475),
 (10, 294.9152417352419),
 (11, 250.31921401773693),
 (12, 213.42243213548036),
 (13, 166.730637899361),
 (14, 152.62125713656573)]

In [20]:
stats.dl_pruned_ruleset(possible_conds, pos_df, neg_df)

rule 4 raised dl -- removing
mismatch at 4
new ruleset is [Stalk-surface-above-ring=k^Gill-spacing=c] V [Gill-size=n^Sport-print-color=w^Gill-spacing=c] V [Gill-size=n^Population=s] V [Sport-print-color=h^Cap-surface=s] V [Gill-size=n^Habitat=g] V [Gill-size=n^Sport-print-color=k^Stalk-root=b] V [Population=v^Stalk-shape=e^Bruises?=t^Cap-shape=b] V [Gill-size=n^Cap-surface=y^Bruises?=t] V [Gill-size=n^Stalk-root=c] V [Population=v^Stalk-shape=e^Stalk-root=b^Stalk-color-below-ring=w^Cap-surface=f] V [Ring-number=t^Population=v^Cap-shape=f^Gill-color=g] V [Ring-number=t^Population=v^Habitat=g] V [Habitat=m^Cap-shape=f] V [Habitat=l^Cap-color=w]
rule 1 raised dl -- removing
mismatch at 1
new ruleset is [Stalk-surface-above-ring=k^Gill-spacing=c] V [Gill-size=n^Population=s] V [Sport-print-color=h^Cap-surface=s] V [Gill-size=n^Habitat=g] V [Gill-size=n^Sport-print-color=k^Stalk-root=b] V [Population=v^Stalk-shape=e^Bruises?=t^Cap-shape=b] V [Gill-size=n^Cap-surface=y^Bruises?=t] V [Gill-si

In [14]:
stats.dl_change(14)

-8.946580185149173

In [15]:
stats.ruleset

<Ruleset object: [Stalk-surface-above-ring=k^Gill-spacing=c] V [Gill-size=n^Sport-print-color=w^Gill-spacing=c] V [Gill-size=n^Population=s] V [Sport-print-color=h^Cap-surface=s] V [Gill-size=n^Cap-surface=s^Stalk-shape=e] V [Gill-size=n^Habitat=g] V [Gill-size=n^Sport-print-color=k^Stalk-root=b] V [Population=v^Stalk-shape=e^Bruises?=t^Cap-shape=b] V [Gill-size=n^Cap-surface=y^Bruises?=t] V [Gill-size=n^Cap-surface=s^Stalk-shape=e] V [Population=v^Stalk-shape=e^Stalk-root=b^Stalk-color-below-ring=w^Cap-surface=f] V [Ring-number=t^Population=v^Cap-shape=f^Gill-color=g] V [Ring-number=t^Population=v^Habitat=g] V [Habitat=m^Cap-shape=f] V [Habitat=l^Cap-color=w] V [Gill-size=n^Sport-print-color=w^Gill-spacing=c] V [Gill-size=n^Population=s]>