In [38]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import (
    IDSRule,
    IDSRuleSet,
    IDSObjectiveFunction,
    ObjectiveFunctionParameters,
    ObjectiveFunctionParameters,
    RSOptimizer,
    DLSOptimizer,
    SLSOptimizer,
    RandomizedUSMOptimizer,
    DeterministicUSMOptimizer
)

In [39]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.calc_f1() for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [40]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.data_structures import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [41]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [42]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,crew,adult,male,1
1,crew,adult,male,1
2,1st_class,adult,male,1
3,1st_class,adult,female,0
4,3rd_class,adult,female,1


In [43]:
cars = mine_CARs(data_train, 20)
car = cars[0]

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 51, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

In [44]:
ids_rule = IDSRule(car)

In [45]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [46]:
type(quant_dataframe_train)

pyarc.qcba.data_structures.quant_dataset.QuantitativeDataFrame

In [47]:
ids_rule.cache_prepared

False

In [48]:
ids_rule.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [49]:
ids_rule1 = IDSRule(cars[0])
ids_rule1.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [50]:
ids_rule1 = IDSRule(cars[0])
ids_rule2 = IDSRule(cars[1])

print(ids_rule1)
print(ids_rule2)

IDSRule {Passenger_Cat=crew,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 3, id: 166
IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 4, id: 165


In [51]:
ids_rule1.calculate_cover(quant_dataframe_train)
ids_rule2.calculate_cover(quant_dataframe_train)

In [52]:
ids_rule1.rule_overlap(ids_rule2, quant_dataframe_train)

array([False, False, False, False, False, False, False, False, False,
       False,  True,  True, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [53]:
type(data_train)

pandas.core.frame.DataFrame

In [54]:
type(mine_CARs(data_train, rule_cutoff=20)[0])

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 51, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 73, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

pyarc.data_structures.car.ClassAssocationRule

In [55]:
ruleset = IDSRuleSet.from_cba_rules(cars)

In [56]:
ruleset.sum_rule_length(), ruleset.max_rule_length()

(50, 3)

In [57]:
ids_ruleset = IDSRuleSet.from_cba_rules(cars)

In [58]:
type(ids_ruleset)

pyids.data_structures.ids_ruleset.IDSRuleSet

In [59]:
params = ObjectiveFunctionParameters()

params.params["all_rules"] = ids_ruleset
params.params["len_all_rules"] = len(ids_ruleset.ruleset)
params.params["quant_dataframe"] = quant_dataframe_train
params.params["lambda_array"] = 7 * [1]


In [60]:
obj_func = IDSObjectiveFunction(params)

cover cache prepared
overlap cache prepared


In [61]:
rule_subset = IDSRuleSet(list(ids_ruleset.ruleset)[2:5])

In [62]:
rule_subset.ruleset

{IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 210,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 212,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 4, id: 165}

In [63]:
obj_func.evaluate(ids_ruleset)

281799.0

In [64]:
rs_opt = RSOptimizer(input_set=ids_ruleset.ruleset)

solution_set = rs_opt.optimize()

In [65]:
solution_set

{IDSRule {Gender=male,Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 208,
 IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 209,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 3, id: 211,
 IDSRule {Gender=male,Age_Cat=child,Passenger_Cat=3rd_class} => {Died=1} sup: 0.02 conf: 0.88, f1: 0.04, len: 4, id: 200,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 4, id: 165,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.03 conf: 0.90, f1: 0.05, len: 4, id: 194,
 IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.03 conf: 0.91, f1: 0.06, len: 3, id: 195,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=male} => {Died=1} sup: 0.07 conf: 0.89, f1: 0.13, len: 4, id: 185,
 IDSRule {Age_Cat=adult,Passenger_Cat=3rd_class,Gender=male} 

In [66]:
obj_func.evaluate(IDSRuleSet(solution_set))

285239.0

In [69]:
dls_opt = DLSOptimizer(obj_func, params)

In [73]:
solution_set = dls_opt.optimize()

Testing if rule is good to add IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 209
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 3, id: 211
Testing if rule is good to add IDSRule {Age_Cat=adult,Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.07 conf: 0.96, f1: 0.13, len: 4, id: 182
Testing if rule is good to add IDSRule {Passenger_Cat=crew,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 3, id: 166
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 186
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=male} => {Died=1} sup: 0.07 conf: 0.89, f1: 0.13, len: 4, id: 185
Testing if rule is good to add IDSRule {Passenger_Cat=crew,Gender=male} => {Died=1} sup: 0.30 conf: 0.79, f1: 0.43, len: 3, id: 145
Testing if rule

In [75]:
obj_func.evaluate(IDSRuleSet(solution_set))

288788.0

In [78]:
sls_opt = SLSOptimizer(obj_func, params)
solution_set = sls_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

INFO - Number of input rules: 20
INFO - RandomOptimizer estimated the OPTIMUM value as: 284125.0
INFO - Threshold value (2/(n*n) * OPT) = 1420.625. This is the standard error treshold value.
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 186
INFO - stardard error of omega estimate: 273.49112965505844
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=male} => {Died=1} sup: 0.07 conf: 0.89, f1: 0.13, len: 4, id: 185
INFO - stardard error of omega estimate: 250.28810199448156
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 210
INFO - stardard error of omega estimate: 397.3355408216084
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=crew,Age_Cat=adult,G

INFO - stardard error of omega estimate: 309.11058377221576
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=male} => {Died=1} sup: 0.30 conf: 0.79, f1: 0.43, len: 4, id: 144
INFO - stardard error of omega estimate: 346.72690117728104
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=3rd_class} => {Died=1} sup: 0.19 conf: 0.81, f1: 0.31, len: 3, id: 156
INFO - stardard error of omega estimate: 306.72874335477593
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.07 conf: 0.96, f1: 0.13, len: 3, id: 183
INFO - stardard error of omega estimate: 265.92731525738384
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.03 conf: 0.90, f1: 0.05, len: 4, id: 194
INFO - stardard error of omega es

287367.0

In [100]:
import numpy as np

class RandomizedUSMOptimizer:

    def __init__(self, objective_function, objective_func_params, debug=True, random_seed=None):
        self.objective_function_params = objective_func_params 
        self.objective_function = objective_function
        self.debug = debug

        if random_seed:
            np.random.seed(random_seed)

    def optimize(self):
        all_rules = self.objective_function_params.params["all_rules"]

        x0 = IDSRuleSet(set())
        y0 = IDSRuleSet({rule for rule in all_rules.ruleset})

        n = len(y0)

        for rule in all_rules.ruleset:
            a_set = IDSRuleSet(x0.ruleset | {rule})
            b_set = IDSRuleSet(y0.ruleset - {rule})

            print("x0", self.objective_function.evaluate(x0))
            print("a_set", self.objective_function.evaluate(a_set))
            
            a_value = self.objective_function.evaluate(a_set) - self.objective_function.evaluate(x0)
            b_value = self.objective_function.evaluate(b_set) - self.objective_function.evaluate(y0)

            a_max = max(a_value, 0)
            b_max = max(b_value, 0)

            x_probability = 1
            y_probability = 0
            
            print("--------")
            print(a_value)
            print(b_value)
            
            if not (a_max == 0 and b_max == 0):
                print("real_prob")
                x_probability = a_max / (a_max + b_max)
                y_probability = b_max / (a_max + b_max)

            if np.random.uniform() <= x_probability:
                x0.ruleset.add(rule)

            if np.random.uniform() <= y_probability:
                y0.ruleset.remove(rule)                

        x_value = self.objective_function.evaluate(x0)
        y_value = self.objective_function.evaluate(y0)

        if x_value > y_value:
            return x0.ruleset
        else:
            return y0.ruleset

In [102]:
rusm_opt = RandomizedUSMOptimizer(obj_func, params)
solution_set = rusm_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

-304.0
388.0
real_prob
-305.0
359.0
real_prob
-353.0
356.0
real_prob
-339.0
356.0
real_prob
-353.0
357.0
real_prob
-334.0
358.0
real_prob
68.0
514.0
real_prob
-334.0
356.0
real_prob
-352.0
355.0
real_prob
-350.0
351.0
real_prob
-307.0
357.0
real_prob
-338.0
339.0
real_prob
-144.0
383.0
real_prob
-341.0
357.0
real_prob
-145.0
146.0
real_prob
-220.0
356.0
real_prob
-306.0
307.0
real_prob
-337.0
338.0
real_prob
-353.0
353.0
real_prob
-235.0
235.0
real_prob


288720.0

In [87]:
ids_ruleset.ruleset

{IDSRule {Gender=male,Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 208,
 IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 209,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 212,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 210,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 3, id: 211,
 IDSRule {Gender=male,Age_Cat=child,Passenger_Cat=3rd_class} => {Died=1} sup: 0.02 conf: 0.88, f1: 0.04, len: 4, id: 200,
 IDSRule {Passenger_Cat=crew,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 3, id: 166,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 4, id: 165,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=female} => 

In [85]:
solution_set

{IDSRule {Gender=male,Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 208,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 212,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 210,
 IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 209,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 3, id: 211,
 IDSRule {Gender=male,Age_Cat=child,Passenger_Cat=3rd_class} => {Died=1} sup: 0.02 conf: 0.88, f1: 0.04, len: 4, id: 200,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 4, id: 165,
 IDSRule {Passenger_Cat=crew,Gender=female} => {Died=0} sup: 0.02 conf: 1.00, f1: 0.04, len: 3, id: 166,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=adult,Gender=female} => 

## Mining the Class Association Rules (CARs)

## Training the IDS Model

In [23]:
ids = IDS()
ids.fit(class_association_rules=rules, quant_dataframe=quant_dataframe_train, debug=True, random_seed=5)

NameError: name 'rules' is not defined

In [None]:
print(print_cars(ids.clf.rules))

In [24]:
ids.clf.default_class

AttributeError: 'NoneType' object has no attribute 'default_class'

In [13]:
ids.clf.default_class_confidence

1

## Evaluating the IDS model

In [30]:
print("Accuracy on train data: ", ids.score(quant_dataframe_train))
print("Accuracy on test data: ", ids.score(quant_dataframe_test))

Accuracy on train data:  0.6590909090909091
Accuracy on test data:  0.681334279630944


In [15]:
print("AUC on train data: ", ids.score_auc(quant_dataframe_train, confidence_based=True))
print("AUC on test data: ", ids.score_auc(quant_dataframe_test, confidence_based=True))

AUC on train data:  0.6735958005249345


IndexError: index 352 is out of bounds for axis 1 with size 352

In [32]:
ids.score_interpretable_metrics(quant_dataframe_train)

{'fraction_overlap': 0.1952651515151515,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.14204545454545447,
 'average_rule_width': 1.8333333333333333,
 'ruleset_length': 6}

In [67]:
ids.score_interpretable_metrics(quant_dataframe_test)

{'fraction_overlap': 0.32008516678495386,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.00709723207948898,
 'average_rule_width': 1.4285714285714286,
 'ruleset_length': 7}