In [1]:
import pandas as pd
from tqdm import tqdm
from imodels import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('../data/ICPSR_03986/DS0001/data_clean.csv')
X, y = df.drop(['id', 'any_deviance'], axis=1), df['any_deviance']

# different models select different rules

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
models = [
    BoostedRulesClassifier(n_estimators=5),
    RuleFitClassifier(n_estimators=10, tree_size=3, max_rules=5),
    DecisionTreeClassifier()
]
for m in tqdm(models):
    m.fit(X_train, y_train, feature_names=X_train.columns)
    print(m, 'acc', accuracy_score(y_test, m.predict(X_test)))
    print(m.rules_)

  0%|          | 0/2 [00:00<?, ?it/s]

Mined rules:
	you_gang_fight <= 1.5
	you_broken_+_entering <= 1.5
	any_victimization <= 0.5
	you_attack_intent_kill/maim <= 1.5
	fr_suggest_agnts_law <= 1.5
 acc 0.9428571428571428
[you_gang_fight <= 1.5, you_gang_fight > 1.5, you_broken_+_entering <= 1.5, you_broken_+_entering > 1.5, any_victimization <= 0.5, any_victimization > 0.5, you_attack_intent_kill/maim <= 1.5, you_attack_intent_kill/maim > 1.5, fr_suggest_agnts_law <= 1.5, fr_suggest_agnts_law > 1.5]




# the same model with slight bootstrap sampling selects different rules

In [None]:
np.random.seed(13)
for i in tqdm(range(3)):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43 + i)
    m = imodels.BayesianRuleListClassifier(max_iter=10)
    m.fit(X_train, y_train, feature_names=X_train.columns)
    print('acc', accuracy_score(y_test, m.predict(X_test)))
    print('rules', m.rules_)