In [2]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import (
    IDSOneVsAll,
    IDSRule,
    IDSRuleSet,
    IDSClassifier,
    IDSObjectiveFunction,
    ObjectiveFunctionParameters,
    ObjectiveFunctionParameters,
    RSOptimizer,
    DLSOptimizer,
    SLSOptimizer,
    RandomizedUSMOptimizer,
    DeterministicUSMOptimizer
)

In [3]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.calc_f1() for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [4]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.data_structures import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [5]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [6]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,3rd_class,child,male,0
1,2nd_class,adult,male,1
2,crew,adult,male,1
3,3rd_class,adult,female,1
4,3rd_class,adult,male,1


In [7]:
cars = mine_CARs(data_train, 20)
car = cars[0]

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

In [8]:
ids_rule = IDSRule(car)

In [9]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [10]:
type(quant_dataframe_train)

pyarc.qcba.data_structures.quant_dataset.QuantitativeDataFrame

In [11]:
ids_rule.cache_prepared

False

In [12]:
ids_rule.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [13]:
ids_rule1 = IDSRule(cars[0])
ids_rule1.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [14]:
ids_rule1 = IDSRule(cars[0])
ids_rule2 = IDSRule(cars[1])

print(ids_rule1)
print(ids_rule2)

IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73
IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72


In [15]:
ids_rule1.calculate_cover(quant_dataframe_train)
ids_rule2.calculate_cover(quant_dataframe_train)

In [16]:
ids_rule1.rule_overlap(ids_rule2, quant_dataframe_train)

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [17]:
type(data_train)

pandas.core.frame.DataFrame

In [18]:
type(mine_CARs(data_train, rule_cutoff=20)[0])

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 75, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

pyarc.data_structures.car.ClassAssocationRule

In [19]:
ruleset = IDSRuleSet.from_cba_rules(cars)

In [20]:
ruleset.sum_rule_length(), ruleset.max_rule_length()

(46, 3)

In [21]:
ids_ruleset = IDSRuleSet.from_cba_rules(cars)

In [22]:
type(ids_ruleset)

pyids.data_structures.ids_ruleset.IDSRuleSet

In [23]:
params = ObjectiveFunctionParameters()

params.params["all_rules"] = ids_ruleset
params.params["len_all_rules"] = len(ids_ruleset.ruleset)
params.params["quant_dataframe"] = quant_dataframe_train
params.params["lambda_array"] = 7 * [1]


In [24]:
obj_func = IDSObjectiveFunction(params)

cover cache prepared
overlap cache prepared


In [25]:
rule_subset = IDSRuleSet(list(ids_ruleset.ruleset)[2:5])

In [26]:
rule_subset.ruleset

{IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 74,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73,
 IDSRule {Gender=male,Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.18 conf: 0.84, f1: 0.30, len: 4, id: 15}

In [27]:
obj_func.evaluate(ids_ruleset)

280038.0

In [28]:
rs_opt = RSOptimizer(input_set=ids_ruleset.ruleset)

solution_set = rs_opt.optimize()

In [29]:
solution_set

{IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72,
 IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.05 conf: 0.82, f1: 0.10, len: 3, id: 48,
 IDSRule {Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.06 conf: 0.96, f1: 0.12, len: 3, id: 45,
 IDSRule {Passenger_Cat=1st_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.06 conf: 0.96, f1: 0.12, len: 4, id: 44,
 IDSRule {Gender=male,Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.18 conf: 0.84, f1: 0.30, len: 4, id: 15,
 IDSRule {Gender=male,Passenger_Cat=3rd_class} => {Died=1} sup: 0.20 conf: 0.80, f1: 0.32, len: 3, id: 16,
 IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.29 conf: 0.77, f1: 0.42, len: 4, id: 4,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 3, id: 3,
 IDSRule {Passenger_Cat=crew} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 2, id: 6,
 IDSR

In [30]:
obj_func.evaluate(IDSRuleSet(solution_set))

284039.0

In [31]:
dls_opt = DLSOptimizer(obj_func, params)

In [32]:
solution_set = dls_opt.optimize()

Testing if rule is good to add IDSRule {Passenger_Cat=1st_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.06 conf: 0.96, f1: 0.12, len: 4, id: 44
Testing if rule is good to add IDSRule {Gender=male,Age_Cat=adult} => {Died=1} sup: 0.58 conf: 0.78, f1: 0.67, len: 3, id: 1
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 0.05 conf: 0.90, f1: 0.10, len: 4, id: 47
Testing if rule is good to add IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.29 conf: 0.77, f1: 0.42, len: 4, id: 4
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73
Testing if rule is good to add IDSRule {Passenger_Cat=crew} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 2, id: 6
Testing if rule is good to add IDSRule {Gender=male,Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.18 conf: 0.84, f1: 0.30, len: 4, id: 15
Testing if rule is good

In [33]:
obj_func.evaluate(IDSRuleSet(solution_set))

288787.0

In [34]:
sls_opt = SLSOptimizer(obj_func, params)
solution_set = sls_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

INFO - Number of input rules: 20
INFO - RandomOptimizer estimated the OPTIMUM value as: 285747.0
INFO - Threshold value (2/(n*n) * OPT) = 1428.7350000000001. This is the standard error treshold value.
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 0.05 conf: 0.90, f1: 0.10, len: 4, id: 47
INFO - stardard error of omega estimate: 353.1228129135811
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.29 conf: 0.77, f1: 0.42, len: 4, id: 4
INFO - stardard error of omega estimate: 342.43635321034475
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73
INFO - stardard error of omega estimate: 242.3867983203706
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=3rd_cl

INFO - stardard error of omega estimate: 247.92802382949773
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72
INFO - stardard error of omega estimate: 341.02530697882236
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 3, id: 3
INFO - stardard error of omega estimate: 355.38055940076407
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.22 conf: 0.76, f1: 0.35, len: 3, id: 14
INFO - stardard error of omega estimate: 338.2313439053217
INFO - omega succesfully estimated


285126.0

In [35]:
ids_clf = IDSClassifier(solution_set)

ids_clf.quant_dataframe_train = quant_dataframe_train

ids_clf.calculate_default_class()

In [36]:
ids_clf.default_class

'0'

In [37]:
ids_clf.default_class_confidence

0.6

In [38]:
import numpy as np

class RandomizedUSMOptimizer:

    def __init__(self, objective_function, objective_func_params, debug=True, random_seed=None):
        self.objective_function_params = objective_func_params 
        self.objective_function = objective_function
        self.debug = debug

        if random_seed:
            np.random.seed(random_seed)

    def optimize(self):
        all_rules = self.objective_function_params.params["all_rules"]

        x0 = IDSRuleSet(set())
        y0 = IDSRuleSet({rule for rule in all_rules.ruleset})

        n = len(y0)

        for rule in all_rules.ruleset:
            a_set = IDSRuleSet(x0.ruleset | {rule})
            b_set = IDSRuleSet(y0.ruleset - {rule})

            a_value = self.objective_function.evaluate(a_set) - self.objective_function.evaluate(x0)
            b_value = self.objective_function.evaluate(b_set) - self.objective_function.evaluate(y0)

            a_max = max(a_value, 0)
            b_max = max(b_value, 0)

            x_probability = 1
            y_probability = 0
            
            if not (a_max == 0 and b_max == 0):
                x_probability = a_max / (a_max + b_max)
                y_probability = b_max / (a_max + b_max)

            if np.random.uniform() <= x_probability:
                x0.ruleset.add(rule)

            if np.random.uniform() <= y_probability:
                y0.ruleset.remove(rule)                

        x_value = self.objective_function.evaluate(x0)
        y_value = self.objective_function.evaluate(y0)

        if x_value > y_value:
            return x0.ruleset
        else:
            return y0.ruleset

In [39]:
params.params["lambda_array"] = [0, 0, 1, 0, 0, 0, 0]

rusm_opt = RandomizedUSMOptimizer(obj_func, params)
solution_set = rusm_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

140800.0

In [40]:
ids_ruleset.ruleset

{IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 71,
 IDSRule {Gender=male,Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 70,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 74,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73,
 IDSRule {Passenger_Cat=2nd_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.04 conf: 0.87, f1: 0.07, len: 4, id: 56,
 IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.04 conf: 0.88, f1: 0.08, len: 3, id: 57,
 IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.05 conf: 0.82, f1: 0.10, len: 3, id: 48,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 

In [41]:
solution_set

{IDSRule {Passenger_Cat=1st_class,Age_Cat=child} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 3, id: 71,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72,
 IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.04 conf: 0.88, f1: 0.08, len: 3, id: 57,
 IDSRule {Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.06 conf: 0.96, f1: 0.12, len: 3, id: 45,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.22 conf: 0.76, f1: 0.35, len: 3, id: 14,
 IDSRule {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 3, id: 3}

In [42]:
ids = IDS()
ids.fit(quant_dataframe_train, class_association_rules=cars)

cover cache prepared
overlap cache prepared
INFO - Number of input rules: 20
INFO - RandomOptimizer estimated the OPTIMUM value as: 285428.0
INFO - Threshold value (2/(n*n) * OPT) = 1427.14. This is the standard error treshold value.
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 0.05 conf: 0.90, f1: 0.10, len: 4, id: 47
INFO - stardard error of omega estimate: 314.7247527602492
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.29 conf: 0.77, f1: 0.42, len: 4, id: 4
INFO - stardard error of omega estimate: 350.94909887332665
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 73
INFO - stardard error of omega estimate: 305.4660128393992
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule 

INFO - stardard error of omega estimate: 334.3166253718172
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.01, len: 4, id: 72
INFO - stardard error of omega estimate: 378.8699684588368
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.30 conf: 0.75, f1: 0.43, len: 3, id: 3
INFO - stardard error of omega estimate: 351.2196421044814
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.22 conf: 0.76, f1: 0.35, len: 3, id: 14
INFO - stardard error of omega estimate: 483.83557951022993
INFO - omega succesfully estimated


<pyids.data_structures.ids_classifier.IDS at 0x109ff2cd438>

In [43]:
ids.score(quant_dataframe_train)

0.7471590909090909

In [44]:
np.where([True, True, True, False, False, True], [1, 1, 1, 0, 0, 1], [0, 0, 0, 1, 1, 0])

array([1, 1, 1, 1, 1, 1])

In [45]:
ids.score_auc(quant_dataframe_train, confidence_based=True)

0.6855568673750493

In [57]:
mutliclass_quant_df = QuantitativeDataFrame(pd.read_csv("../data/iris1.csv"))

ids_one_vs_all = IDSOneVsAll()


ids_one_vs_all.fit(mutliclass_quant_df)

training class: Iris-setosa
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=5
Rule count: 54, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=5
Rule count: 98, Iteration: 2
Increasing maxlen 5
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 115, Iteration: 3
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 115, Iteration: 4
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 115, Iteration: 5
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 115, Iteration: 6
Decreasing confidence to 0.30000000000000004
Running apriori with set

In [59]:
ids_one_vs_all.score_auc(mutliclass_quant_df)

scoring class: Iris-setosa
scoring class: Iris-versicolor
scoring class: Iris-virginica


0.9777777777777779