In [9]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import IDSRule

In [10]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.calc_f1() for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [11]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.data_structures import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [12]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [13]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,crew,adult,male,1
1,1st_class,adult,male,1
2,1st_class,adult,male,1
3,2nd_class,child,female,0
4,3rd_class,adult,male,1


In [14]:
rm = RuleMiner()
rules = rm.mine_rules(data_train)

[CAR {Age_Cat=adult} => {Died=0} sup: 0.93 conf: 0.00 len: 2, id: 46, CAR {Age_Cat=adult} => {Died=1} sup: 0.93 conf: 0.00 len: 2, id: 47, CAR {Gender=male} => {Died=0} sup: 0.74 conf: 0.00 len: 2, id: 42, CAR {Gender=male} => {Died=1} sup: 0.74 conf: 0.00 len: 2, id: 43, CAR {Age_Cat=adult,Gender=male} => {Died=0} sup: 0.71 conf: 0.00 len: 3, id: 44, CAR {Age_Cat=adult,Gender=male} => {Died=1} sup: 0.71 conf: 0.00 len: 3, id: 45, CAR {Passenger_Cat=crew} => {Died=0} sup: 0.37 conf: 0.00 len: 2, id: 36, CAR {Passenger_Cat=crew} => {Died=1} sup: 0.37 conf: 0.00 len: 2, id: 37, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=0} sup: 0.37 conf: 0.00 len: 3, id: 34, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.37 conf: 0.00 len: 3, id: 35, CAR {Passenger_Cat=crew,Gender=male} => {Died=0} sup: 0.37 conf: 0.00 len: 3, id: 40, CAR {Passenger_Cat=crew,Gender=male} => {Died=1} sup: 0.37 conf: 0.00 len: 3, id: 41, CAR {Passenger_Cat=crew,Age_Cat=adult,Gender=male} => {Died=0} sup: 0

In [15]:
print(print_cars(rules))

TypeError: 'bool' object is not subscriptable

In [None]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

In [None]:
data_train.sample(len(data_train)).head(20)

## Mining the Class Association Rules (CARs)

## Training the IDS Model

In [8]:
ids = IDS()
ids.fit(class_association_rules=rules, quant_dataframe=quant_dataframe_train, debug=True, random_seed=5)

NameError: name 'quant_dataframe_train' is not defined

In [65]:
print(print_cars(ids.clf.rules))

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.77}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} and \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.76}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} and \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.55}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.5}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.45}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} and \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.42}

If \textcolor{blu

## Evaluating the IDS model

In [70]:
print("Accuracy on train data: ", ids.score(quant_dataframe_train))
print("Accuracy on test data: ", ids.score(quant_dataframe_test))

Accuracy on train data:  0.7585227272727273
Accuracy on test data:  0.7544357700496807


In [73]:
print("AUC on train data: ", ids.score_auc(quant_dataframe_train))
print("AUC on test data: ", ids.score_auc(quant_dataframe_test))

AUC on train data:  0.6708796025715955
AUC on test data:  0.6672898110768755


In [67]:
ids.score_interpretable_metrics(quant_dataframe_test)

{'fraction_overlap': 0.32008516678495386,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.00709723207948898,
 'average_rule_width': 1.4285714285714286,
 'ruleset_length': 7}