In [1]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import IDSRule

In [2]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.calc_f1() for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [3]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.data_structures import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [4]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [5]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,3rd_class,adult,female,1
1,2nd_class,adult,male,1
2,3rd_class,adult,male,1
3,crew,adult,male,1
4,3rd_class,adult,male,1


In [6]:
rm = RuleMiner()
rules = rm.mine_rules(data_train)

[CAR {Age_Cat=adult} => {Died=1} sup: 0.97 conf: 0.00 len: 2, id: 22, CAR {Age_Cat=adult} => {Died=0} sup: 0.97 conf: 0.00 len: 2, id: 23, CAR {Gender=male} => {Died=1} sup: 0.78 conf: 0.00 len: 2, id: 18, CAR {Gender=male} => {Died=0} sup: 0.78 conf: 0.00 len: 2, id: 19, CAR {Gender=male,Age_Cat=adult} => {Died=1} sup: 0.76 conf: 0.00 len: 3, id: 20, CAR {Gender=male,Age_Cat=adult} => {Died=0} sup: 0.76 conf: 0.00 len: 3, id: 21, CAR {Passenger_Cat=crew} => {Died=1} sup: 0.43 conf: 0.00 len: 2, id: 12, CAR {Passenger_Cat=crew} => {Died=0} sup: 0.43 conf: 0.00 len: 2, id: 13, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.43 conf: 0.00 len: 3, id: 10, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=0} sup: 0.43 conf: 0.00 len: 3, id: 11, CAR {Passenger_Cat=crew,Gender=male} => {Died=1} sup: 0.42 conf: 0.00 len: 3, id: 16, CAR {Passenger_Cat=crew,Gender=male} => {Died=0} sup: 0.42 conf: 0.00 len: 3, id: 17, CAR {Passenger_Cat=crew,Gender=male,Age_Cat=adult} => {Died=1} sup: 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [7]:
print(print_cars(rules))

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} and \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} and \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} and \textcolor{bl

In [8]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

In [9]:
data_train.sample(len(data_train)).head(20)

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
1502,crew,adult,male,1
662,crew,adult,male,1
1458,crew,adult,male,1
1183,2nd_class,adult,female,0
86,crew,adult,male,1
746,crew,adult,male,1
1387,crew,adult,male,1
528,1st_class,adult,female,0
250,crew,adult,male,1
592,3rd_class,adult,female,0


## Mining the Class Association Rules (CARs)

## Training the IDS Model

In [10]:
ids = IDS()
ids.fit(class_association_rules=rules, quant_dataframe=quant_dataframe_train, debug=True, random_seed=5)

cover cache prepared
overlap cache prepared
INFO - Number of input rules: 24
INFO - RandomOptimizer estimated the OPTIMUM value as: 404959.0
INFO - Threshold value (2/(n*n) * OPT) = 1406.107638888889. This is the standard error treshold value.
INFO - Estimating omega for rule: CAR {Age_Cat=adult} => {Died=1} sup: 0.97 conf: 0.65, f1: 0.78, len: 2, id: 22
INFO - stardard error of omega estimate: 666.0749087002151
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=3rd_class,Gender=male} => {Died=1} sup: 0.20 conf: 0.78, f1: 0.32, len: 3, id: 6
INFO - stardard error of omega estimate: 715.3484255941296
INFO - 715.3484255941296 > 703.0538194444445 => omega estimation continues
INFO - stardard error of omega estimate: 487.3468399148598
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=crew} => {Died=1} sup: 0.43 conf: 0.75, f1: 0.55, len: 2, id: 12
INFO - stardard error of omega estimate: 1021.8076164327608
INFO - 10

INFO - stardard error of omega estimate: 715.5630747879602
INFO - 715.5630747879602 > 703.0538194444445 => omega estimation continues
INFO - stardard error of omega estimate: 510.76843825604425
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=0} sup: 0.43 conf: 0.25, f1: 0.32, len: 3, id: 11
INFO - stardard error of omega estimate: 617.816429046687
INFO - omega succesfully estimated
INFO - Number of input rules: 24
INFO - RandomOptimizer estimated the OPTIMUM value as: 409948.0
INFO - Threshold value (2/(n*n) * OPT) = 1423.4305555555554. This is the standard error treshold value.
INFO - Estimating omega for rule: CAR {Age_Cat=adult} => {Died=1} sup: 0.97 conf: 0.65, f1: 0.78, len: 2, id: 22
INFO - stardard error of omega estimate: 642.6780974951613
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=3rd_class,Gender=male} => {Died=1} sup: 0.20 conf: 0.78, f1: 0.32, len: 3, id: 6
INFO

INFO - stardard error of omega estimate: 629.1212990155396
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=3rd_class} => {Died=1} sup: 0.28 conf: 0.73, f1: 0.41, len: 2, id: 4
INFO - stardard error of omega estimate: 1079.1756298212076
INFO - 1079.1756298212076 > 711.7152777777777 => omega estimation continues
INFO - stardard error of omega estimate: 775.4885639872454
INFO - 775.4885639872454 > 711.7152777777777 => omega estimation continues
INFO - stardard error of omega estimate: 584.5074723724747
INFO - omega succesfully estimated
INFO - Estimating omega for rule: CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=0} sup: 0.43 conf: 0.25, f1: 0.32, len: 3, id: 11
INFO - stardard error of omega estimate: 799.9446955883889
INFO - 799.9446955883889 > 711.7152777777777 => omega estimation continues
INFO - stardard error of omega estimate: 833.5978813252826
INFO - 833.5978813252826 > 711.7152777777777 => omega estimation continues
INFO - stardard erro

<pyids.data_structures.ids_classifier.IDS at 0x14890e20668>

In [11]:
print(print_cars(ids.clf.rules))

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.55}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.37}

If \textcolor{blue}{Gender}=\textcolor{gray}{female} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.35}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} and \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.32}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} and \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.27}

If \textcolor{blue}{Gender}=\textcolor{gray}{female} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.23}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} and \tex

In [12]:
ids.clf.default_class

'1'

In [13]:
ids.clf.default_class_confidence

1

## Evaluating the IDS model

In [30]:
print("Accuracy on train data: ", ids.score(quant_dataframe_train))
print("Accuracy on test data: ", ids.score(quant_dataframe_test))

Accuracy on train data:  0.6590909090909091
Accuracy on test data:  0.681334279630944


In [15]:
print("AUC on train data: ", ids.score_auc(quant_dataframe_train, confidence_based=True))
print("AUC on test data: ", ids.score_auc(quant_dataframe_test, confidence_based=True))

AUC on train data:  0.6735958005249345


IndexError: index 352 is out of bounds for axis 1 with size 352

In [32]:
ids.score_interpretable_metrics(quant_dataframe_train)

{'fraction_overlap': 0.1952651515151515,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.14204545454545447,
 'average_rule_width': 1.8333333333333333,
 'ruleset_length': 6}

In [67]:
ids.score_interpretable_metrics(quant_dataframe_test)

{'fraction_overlap': 0.32008516678495386,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.00709723207948898,
 'average_rule_width': 1.4285714285714286,
 'ruleset_length': 7}