# DLS Testing and comparison with pyIDS

In [1]:
RULES_CUTOFF = 20

In [2]:
from pyarc.data_structures import TransactionDB
from pyarc.algorithms import top_rules, createCARs

# Guide to use lvhimabindu/interpretable_decision_sets

* git pull https://github.com/lvhimabindu/interpretable_decision_sets interpretable_decision_sets_lakkaraju
* locate your python *site_packages* directory
* copy *interpretable_decision_sets_lakkaraju* into *site_packages*
* correct errors in code to allow it to run (wrong identation etc.)

# Interpretable Decision Sets - setup

In [3]:
import interpretable_decision_sets_lakkaraju.IDS_smooth_local as sls_lakk
from interpretable_decision_sets_lakkaraju.IDS_deterministic_local import run_apriori, createrules, deterministic_local_search, func_evaluation, rule

In [4]:
import pandas as pd
import numpy as np
import time

## Lakkaraju DLS

In [5]:
def get_IDS_rules_representation(pyARC_rules):
    IDS_rules = []
    
    for pyARC_rule in pyARC_rules:
        feature_list = list(dict(pyARC_rule.antecedent).keys())
        value_list = list(dict(pyARC_rule.antecedent).values())
        
        class_value = pyARC_rule.consequent[1]
        
        support = pyARC_rule.support
        confidence = pyARC_rule.confidence
        
        IDS_rule = rule(feature_list, value_list, class_value)
        
        IDS_rules.append(IDS_rule)
        
    return IDS_rules

In [6]:
df = pd.read_csv("C:/code/python/pyids/data/titanic.csv")
df["Died"] = df["Died"].astype(str) + "_"
Y = df["Died"]
txns = TransactionDB.from_DataFrame(df)

rules = top_rules(txns.string_representation, appearance=txns.appeardict)
cars = createCARs(rules)

list_of_rules = get_IDS_rules_representation(cars)


Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

In [7]:
for r in list_of_rules[:RULES_CUTOFF]:
    r.print_rule()

If Age_Cat == child and Passenger_Cat == 2nd_class, then 0_
If Age_Cat == child and Passenger_Cat == 2nd_class and Gender == female, then 0_
If Gender == male and Age_Cat == child and Passenger_Cat == 2nd_class, then 0_
If Age_Cat == child and Passenger_Cat == 1st_class, then 0_
If Gender == male and Age_Cat == child and Passenger_Cat == 1st_class, then 0_
If Age_Cat == child and Passenger_Cat == 1st_class and Gender == female, then 0_
If Gender == female and Passenger_Cat == 1st_class, then 0_
If Age_Cat == adult and Gender == female and Passenger_Cat == 1st_class, then 0_
If Age_Cat == adult and Gender == male and Passenger_Cat == 2nd_class, then 1_
If Gender == female and Passenger_Cat == 2nd_class, then 0_
If Age_Cat == adult and Gender == female and Passenger_Cat == 2nd_class, then 0_
If Gender == male and Passenger_Cat == 2nd_class, then 1_
If Gender == female and Passenger_Cat == crew, then 0_
If Age_Cat == adult and Gender == female and Passenger_Cat == crew, then 0_
If Age_Cat

In [8]:
lambda_array = [0.5]*7     # use separate hyperparamter search routine
epsilon = 0.05
soln_set, obj_val = deterministic_local_search(list_of_rules[:RULES_CUTOFF], df, Y, lambda_array, epsilon)
print(soln_set)
print(obj_val)

722435.5
720350.0
{17}
722435.5


## Final solution set

In [9]:
solution_rules = np.array(list_of_rules)[list(soln_set)]

for r in solution_rules:
    r.print_rule()

If Gender == male, then 1_


# PyIDS

## PyIDS setup

In [10]:
import time

%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

%run C:/code/python/pyids/main.py

from sklearn.metrics import accuracy_score, auc, roc_auc_score

from pyids.ids_rule import IDSRule
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction
from pyids.ids_optimizer import RSOptimizer, SLSOptimizer
from pyids.ids_cacher import IDSCacher
from pyids.ids_classifier import IDS, mine_CARs


from pyarc.qcba import *

from pyarc.algorithms import createCARs, top_rules
from pyarc import TransactionDB

In [11]:
quant_df = QuantitativeDataFrame(df)

## PyIDS DLS

In [12]:
cars[:RULES_CUTOFF]

[CAR {Age_Cat=child,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.01 conf: 1.00 len: 3, id: 74,
 CAR {Age_Cat=child,Passenger_Cat=2nd_class,Gender=female} => {Died=0_} sup: 0.01 conf: 1.00 len: 4, id: 75,
 CAR {Gender=male,Age_Cat=child,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.01 conf: 1.00 len: 4, id: 73,
 CAR {Age_Cat=child,Passenger_Cat=1st_class} => {Died=0_} sup: 0.00 conf: 1.00 len: 3, id: 71,
 CAR {Gender=male,Age_Cat=child,Passenger_Cat=1st_class} => {Died=0_} sup: 0.00 conf: 1.00 len: 4, id: 70,
 CAR {Age_Cat=child,Passenger_Cat=1st_class,Gender=female} => {Died=0_} sup: 0.00 conf: 1.00 len: 4, id: 72,
 CAR {Gender=female,Passenger_Cat=1st_class} => {Died=0_} sup: 0.06 conf: 0.97 len: 3, id: 45,
 CAR {Age_Cat=adult,Gender=female,Passenger_Cat=1st_class} => {Died=0_} sup: 0.06 conf: 0.97 len: 4, id: 44,
 CAR {Age_Cat=adult,Gender=male,Passenger_Cat=2nd_class} => {Died=1_} sup: 0.07 conf: 0.90 len: 4, id: 47,
 CAR {Gender=female,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.04

In [16]:
ids = IDS()
ids.fit(class_association_rules=cars[:RULES_CUTOFF], quant_dataframe=quant_df, algorithm="DLS", lambda_array=[0.5]*7)

cover cache prepared
overlap cache prepared
Testing if rule is good to add IDS-CAR {Age_Cat=adult,Gender=male,Passenger_Cat=crew} => {Died=1_} sup: 0.30 conf: 0.77 len: 4, id: 4
Testing if rule is good to add IDS-CAR {Age_Cat=adult,Gender=female,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.04 conf: 0.87 len: 4, id: 56
Testing if rule is good to add IDS-CAR {Age_Cat=child,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.01 conf: 1.00 len: 3, id: 74
Testing if rule is good to add IDS-CAR {Gender=female,Passenger_Cat=2nd_class} => {Died=0_} sup: 0.04 conf: 0.89 len: 3, id: 57
Testing if rule is good to add IDS-CAR {Gender=female,Passenger_Cat=crew} => {Died=0_} sup: 0.01 conf: 0.84 len: 3, id: 28
Testing if rule is good to add IDS-CAR {Age_Cat=adult,Gender=male,Passenger_Cat=2nd_class} => {Died=1_} sup: 0.07 conf: 0.90 len: 4, id: 47
Testing if rule is good to add IDS-CAR {Gender=male,Age_Cat=child,Passenger_Cat=1st_class} => {Died=0_} sup: 0.00 conf: 1.00 len: 4, id: 70
Testing if rule is go

<pyids.ids_classifier.IDS at 0x256f3e37198>

In [17]:
ids.score_auc(quant_df)

0.696455584411042

In [18]:
ids.clf.rules

{IDS-CAR {Gender=male} => {Died=1_} sup: 0.61 conf: 0.78 len: 2, id: 2}