In [1]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import (
    IDSRule,
    IDSRuleSet,
    IDSClassifier,
    IDSObjectiveFunction,
    ObjectiveFunctionParameters,
    ObjectiveFunctionParameters,
    RSOptimizer,
    DLSOptimizer,
    SLSOptimizer,
    RandomizedUSMOptimizer,
    DeterministicUSMOptimizer
)

In [3]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.calc_f1() for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [4]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.data_structures import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [5]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [6]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,3rd_class,adult,female,1
1,crew,adult,male,0
2,3rd_class,adult,female,1
3,1st_class,adult,male,1
4,2nd_class,adult,male,1


In [7]:
cars = mine_CARs(data_train, 20)
car = cars[0]

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 50, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

In [8]:
ids_rule = IDSRule(car)

In [9]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [10]:
type(quant_dataframe_train)

pyarc.qcba.data_structures.quant_dataset.QuantitativeDataFrame

In [11]:
ids_rule.cache_prepared

False

In [12]:
ids_rule.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [13]:
ids_rule1 = IDSRule(cars[0])
ids_rule1.cover_cache

{'cover': None,
 'correct_cover': None,
 'incorrect_cover': None,
 'rule_cover': None}

In [14]:
ids_rule1 = IDSRule(cars[0])
ids_rule2 = IDSRule(cars[1])

print(ids_rule1)
print(ids_rule2)

IDSRule {Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.07 conf: 1.00, f1: 0.13, len: 3, id: 43
IDSRule {Passenger_Cat=1st_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.07 conf: 1.00, f1: 0.13, len: 4, id: 42


In [15]:
ids_rule1.calculate_cover(quant_dataframe_train)
ids_rule2.calculate_cover(quant_dataframe_train)

In [16]:
ids_rule1.rule_overlap(ids_rule2, quant_dataframe_train)

array([False, False,  True, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False, False,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
        True,  True, False, False, False,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False,  True, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False,

In [17]:
type(data_train)

pandas.core.frame.DataFrame

In [18]:
type(mine_CARs(data_train, rule_cutoff=20)[0])

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 50, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 71, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

pyarc.data_structures.car.ClassAssocationRule

In [19]:
ruleset = IDSRuleSet.from_cba_rules(cars)

In [20]:
ruleset.sum_rule_length(), ruleset.max_rule_length()

(45, 3)

In [21]:
ids_ruleset = IDSRuleSet.from_cba_rules(cars)

In [22]:
type(ids_ruleset)

pyids.data_structures.ids_ruleset.IDSRuleSet

In [23]:
params = ObjectiveFunctionParameters()

params.params["all_rules"] = ids_ruleset
params.params["len_all_rules"] = len(ids_ruleset.ruleset)
params.params["quant_dataframe"] = quant_dataframe_train
params.params["lambda_array"] = 7 * [1]


In [24]:
obj_func = IDSObjectiveFunction(params)

cover cache prepared
overlap cache prepared


In [25]:
rule_subset = IDSRuleSet(list(ids_ruleset.ruleset)[2:5])

In [26]:
rule_subset.ruleset

{IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=male} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 68,
 IDSRule {Passenger_Cat=1st_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.07 conf: 1.00, f1: 0.13, len: 4, id: 42,
 IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.32 conf: 0.78, f1: 0.45, len: 4, id: 4}

In [27]:
obj_func.evaluate(ids_ruleset)

280276.0

In [28]:
rs_opt = RSOptimizer(input_set=ids_ruleset.ruleset)

solution_set = rs_opt.optimize()

In [29]:
solution_set

{IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 4, id: 70,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 69,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=child,Gender=female} => {Died=1} sup: 0.01 conf: 0.83, f1: 0.03, len: 4, id: 66,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=child} => {Died=1} sup: 0.02 conf: 0.78, f1: 0.04, len: 3, id: 61,
 IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.05 conf: 0.86, f1: 0.10, len: 3, id: 55,
 IDSRule {Passenger_Cat=3rd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 0.18 conf: 0.86, f1: 0.30, len: 4, id: 15,
 IDSRule {Passenger_Cat=3rd_class} => {Died=1} sup: 0.23 conf: 0.82, f1: 0.36, len: 2, id: 17,
 IDSRule {Passenger_Cat=crew} => {Died=1} sup: 0.32 conf: 0.76, f1: 0.45, len: 2, id: 6,
 IDSRule {Gender=male,Age_Cat=adult} => {Died=1} sup: 0.62 conf: 0.79, f1: 0.69, len: 3, id: 1}

In [30]:
obj_func.evaluate(IDSRuleSet(solution_set))

285902.0

In [31]:
dls_opt = DLSOptimizer(obj_func, params)

In [32]:
solution_set = dls_opt.optimize()

Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 46
Testing if rule is good to add IDSRule {Passenger_Cat=3rd_class,Gender=male} => {Died=1} sup: 0.18 conf: 0.86, f1: 0.30, len: 3, id: 16
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.05 conf: 0.86, f1: 0.10, len: 3, id: 55
Testing if rule is good to add IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 4, id: 70
Testing if rule is good to add IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.21 conf: 0.82, f1: 0.33, len: 3, id: 14
Testing if rule is good to add IDSRule {Gender=male,Age_Cat=adult} => {Died=1} sup: 0.62 conf: 0.79, f1: 0.69, len: 3, id: 1
Testing if rule is good to add IDSRule {Passenger_Cat=3rd_class,Age_Cat=child} => {Died=1} sup: 0.02 conf: 0.78, f1: 0.04, len: 3, id: 61
Testing if rule is good to add IDSRul

In [33]:
obj_func.evaluate(IDSRuleSet(solution_set))

288805.0

In [34]:
sls_opt = SLSOptimizer(obj_func, params)
solution_set = sls_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

INFO - Number of input rules: 20
INFO - RandomOptimizer estimated the OPTIMUM value as: 286851.0
INFO - Threshold value (2/(n*n) * OPT) = 1434.255. This is the standard error treshold value.
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 46
INFO - stardard error of omega estimate: 343.61607209209524
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.21 conf: 0.82, f1: 0.33, len: 3, id: 14
INFO - stardard error of omega estimate: 401.4816720598837
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.32 conf: 0.78, f1: 0.45, len: 4, id: 4
INFO - stardard error of omega estimate: 324.60143252918647
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=male} => {

INFO - stardard error of omega estimate: 423.4204730524966
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=child,Gender=female} => {Died=1} sup: 0.01 conf: 0.83, f1: 0.03, len: 4, id: 66
INFO - stardard error of omega estimate: 247.37878850055031
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup: 0.07 conf: 0.89, f1: 0.13, len: 4, id: 45
INFO - stardard error of omega estimate: 387.25856607698165
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male} => {Died=1} sup: 0.62 conf: 0.79, f1: 0.70, len: 2, id: 2
INFO - stardard error of omega estimate: 266.4906546203825
INFO - omega succesfully estimated


286719.0

In [35]:
ids_clf = IDSClassifier(solution_set)

ids_clf.quant_dataframe_train = quant_dataframe_train

ids_clf.calculate_default_class()

In [36]:
ids_clf.default_class

'1'

In [37]:
ids_clf.default_class_confidence

0.6060606060606061

In [38]:
import numpy as np

class RandomizedUSMOptimizer:

    def __init__(self, objective_function, objective_func_params, debug=True, random_seed=None):
        self.objective_function_params = objective_func_params 
        self.objective_function = objective_function
        self.debug = debug

        if random_seed:
            np.random.seed(random_seed)

    def optimize(self):
        all_rules = self.objective_function_params.params["all_rules"]

        x0 = IDSRuleSet(set())
        y0 = IDSRuleSet({rule for rule in all_rules.ruleset})

        n = len(y0)

        for rule in all_rules.ruleset:
            a_set = IDSRuleSet(x0.ruleset | {rule})
            b_set = IDSRuleSet(y0.ruleset - {rule})

            a_value = self.objective_function.evaluate(a_set) - self.objective_function.evaluate(x0)
            b_value = self.objective_function.evaluate(b_set) - self.objective_function.evaluate(y0)

            a_max = max(a_value, 0)
            b_max = max(b_value, 0)

            x_probability = 1
            y_probability = 0
            
            if not (a_max == 0 and b_max == 0):
                x_probability = a_max / (a_max + b_max)
                y_probability = b_max / (a_max + b_max)

            if np.random.uniform() <= x_probability:
                x0.ruleset.add(rule)

            if np.random.uniform() <= y_probability:
                y0.ruleset.remove(rule)                

        x_value = self.objective_function.evaluate(x0)
        y_value = self.objective_function.evaluate(y0)

        if x_value > y_value:
            return x0.ruleset
        else:
            return y0.ruleset

In [39]:
params.params["lambda_array"] = [0, 0, 1, 0, 0, 0, 0]

rusm_opt = RandomizedUSMOptimizer(obj_func, params)
solution_set = rusm_opt.optimize()
obj_func.evaluate(IDSRuleSet(solution_set))

140800.0

In [40]:
ids_ruleset.ruleset

{IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=male} => {Died=0} sup: 0.00 conf: 1.00, f1: 0.01, len: 4, id: 68,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 4, id: 70,
 IDSRule {Passenger_Cat=2nd_class,Age_Cat=child} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 3, id: 69,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=child,Gender=female} => {Died=1} sup: 0.01 conf: 0.83, f1: 0.03, len: 4, id: 66,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=child} => {Died=1} sup: 0.02 conf: 0.78, f1: 0.04, len: 3, id: 61,
 IDSRule {Passenger_Cat=2nd_class,Gender=female,Age_Cat=adult} => {Died=0} sup: 0.04 conf: 0.83, f1: 0.08, len: 4, id: 54,
 IDSRule {Passenger_Cat=2nd_class,Gender=female} => {Died=0} sup: 0.05 conf: 0.86, f1: 0.10, len: 3, id: 55,
 IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 46,
 IDSRule {Passenger_Cat=2nd_class,Gender=male,Age_Cat=adult} => {Died=1} sup

In [41]:
solution_set

{IDSRule {Passenger_Cat=2nd_class,Age_Cat=child,Gender=female} => {Died=0} sup: 0.01 conf: 1.00, f1: 0.02, len: 4, id: 70,
 IDSRule {Passenger_Cat=3rd_class,Age_Cat=child,Gender=female} => {Died=1} sup: 0.01 conf: 0.83, f1: 0.03, len: 4, id: 66,
 IDSRule {Passenger_Cat=1st_class,Gender=female} => {Died=0} sup: 0.07 conf: 1.00, f1: 0.13, len: 3, id: 43,
 IDSRule {Gender=male} => {Died=1} sup: 0.62 conf: 0.79, f1: 0.70, len: 2, id: 2}

In [42]:
ids = IDS()
ids.fit(quant_dataframe_train, class_association_rules=cars)

cover cache prepared
overlap cache prepared
INFO - Number of input rules: 20
INFO - RandomOptimizer estimated the OPTIMUM value as: 284732.0
INFO - Threshold value (2/(n*n) * OPT) = 1423.66. This is the standard error treshold value.
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=2nd_class,Gender=male} => {Died=1} sup: 0.07 conf: 0.86, f1: 0.13, len: 3, id: 46
INFO - stardard error of omega estimate: 391.4272767705388
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=adult} => {Died=1} sup: 0.21 conf: 0.82, f1: 0.33, len: 3, id: 14
INFO - stardard error of omega estimate: 278.15152884713757
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Gender=male,Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.32 conf: 0.78, f1: 0.45, len: 4, id: 4
INFO - stardard error of omega estimate: 338.7727261749387
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat

INFO - stardard error of omega estimate: 337.6761362607669
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=child} => {Died=1} sup: 0.02 conf: 0.78, f1: 0.04, len: 3, id: 61
INFO - stardard error of omega estimate: 408.5011872687765
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=crew} => {Died=1} sup: 0.32 conf: 0.76, f1: 0.45, len: 2, id: 6
INFO - stardard error of omega estimate: 392.4117187342906
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class} => {Died=1} sup: 0.23 conf: 0.82, f1: 0.36, len: 2, id: 17
INFO - stardard error of omega estimate: 428.5205771021971
INFO - omega succesfully estimated
INFO - Estimating omega for rule: IDSRule {Passenger_Cat=3rd_class,Age_Cat=child,Gender=female} => {Died=1} sup: 0.01 conf: 0.83, f1: 0.03, len: 4, id: 66
INFO - stardard error of omega estimate: 315.6288643327793
INFO - omega succesfu

<pyids.data_structures.ids_classifier.IDS at 0x26410fab208>

In [43]:
ids.score(quant_dataframe_train)

0.7926136363636364

In [44]:
np.where([True, True, True, False, False, True], [1, 1, 1, 0, 0, 1], [0, 0, 0, 1, 1, 0])

array([1, 1, 1, 1, 1, 1])

In [45]:
ids.score_auc(quant_dataframe_train, confidence_based=True)

  corrected_confidences = np.where(pred == "1", predicted_classes, 1 - predicted_classes)


TypeError: unsupported operand type(s) for -: 'int' and 'list'