In [1]:
from pyarc.data_structures import ClassAssocationRule
from pyids.data_structures import IDSRule
from pyids.model_selection.utils import encode_label
from sklearn.metrics import roc_auc_score
import logging
import numpy as np



In [2]:
def print_cars(ids_cars):
    cars = ids_cars
    is_ids = False
    if type(ids_cars[0]) == IDSRule:
        is_ids = [ r.f1 for r in ids_cars ]
        cars = [ r.car for r in ids_cars ]
    latex = []
    for idx, car in enumerate(cars):
        if is_ids:
            latex.append(car_to_latex(car, is_ids=is_ids[idx]))
        else:
            latex.append(car_to_latex(car))
        
    return "\n\n".join(latex)
        
def car_to_latex(car, is_ids=False):
    trans = {
        "-":  r"\-",
        "_":  r"\_",
        "]":  r"\]",
        "\\": r"\\",
        "^":  r"\^",
        "$":  r"\$",
        "*":  r"\*",
        ".":  r"\."
    }
    latex = []
    
    latex.append("If")
    
    for idx, predicate in enumerate(car.antecedent):
       
        name, interval = predicate
        name = name.translate(str.maketrans(trans))
        
        interval_string = interval.translate(str.maketrans(trans))
        
        if idx != 0:
            latex.append("and")
        latex.append(
            f"\\textcolor{{blue}}{{{name}}}=\\textcolor{{gray}}{{{interval_string}}}"
        )

        
    class_name, class_value = car.consequent
    
    latex.append("then")
    latex.append(
        f"\\textcolor{{red}}{{{class_name}}}=\\textcolor{{gray}}{{{class_value}}}"
    )
    
    if is_ids:
        latex.append(
            f"$\mid$ f1-score: \\textit{{{round(is_ids, 2)}}}"
        )
    
    return " ".join(latex)

# IDS Demonstration

## Importing required libraries

In [3]:
import pandas as pd
import pyarc
from pyids import IDS
from pyids.algorithms.ids_classifier import mine_CARs
from pyids.rule_mining import RuleMiner
from pyarc.qcba.data_structures import QuantitativeDataFrame

from sklearn.model_selection import train_test_split

## Loading the test and train data

In [4]:
data = pd.read_csv("../data/titanic.csv").sample(frac=1).reset_index(drop=True)


data_train, data_test = train_test_split(data, test_size=0.8)

In [5]:
data.head()

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
0,1st_class,adult,male,1
1,1st_class,adult,female,0
2,crew,adult,male,0
3,1st_class,adult,male,0
4,3rd_class,adult,female,0


In [6]:
rm = RuleMiner()
rules = rm.mine_rules(data_train)

[CAR {Age_Cat=adult} => {Died=0} sup: 0.94 conf: 0.00 len: 2, id: 24, CAR {Age_Cat=adult} => {Died=1} sup: 0.94 conf: 0.00 len: 2, id: 25, CAR {Gender=male} => {Died=0} sup: 0.76 conf: 0.00 len: 2, id: 20, CAR {Gender=male} => {Died=1} sup: 0.76 conf: 0.00 len: 2, id: 21, CAR {Age_Cat=adult,Gender=male} => {Died=0} sup: 0.73 conf: 0.00 len: 3, id: 22, CAR {Age_Cat=adult,Gender=male} => {Died=1} sup: 0.73 conf: 0.00 len: 3, id: 23, CAR {Passenger_Cat=crew} => {Died=0} sup: 0.36 conf: 0.00 len: 2, id: 14, CAR {Passenger_Cat=crew} => {Died=1} sup: 0.36 conf: 0.00 len: 2, id: 15, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=0} sup: 0.36 conf: 0.00 len: 3, id: 12, CAR {Passenger_Cat=crew,Age_Cat=adult} => {Died=1} sup: 0.36 conf: 0.00 len: 3, id: 13, CAR {Passenger_Cat=3rd_class} => {Died=0} sup: 0.35 conf: 0.00 len: 2, id: 4, CAR {Passenger_Cat=3rd_class} => {Died=1} sup: 0.35 conf: 0.00 len: 2, id: 5, CAR {Passenger_Cat=crew,Gender=male} => {Died=0} sup: 0.34 conf: 0.00 len: 3, id: 18,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [7]:
print(print_cars(rules))

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} and \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} and \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{0}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{1}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} and \textcolor{bl

In [8]:
quant_dataframe_train = QuantitativeDataFrame(data_train)
quant_dataframe_test = QuantitativeDataFrame(data_test)

In [9]:
data_train.sample(len(data_train)).head(20)

Unnamed: 0,Passenger_Cat,Age_Cat,Gender,Died
248,2nd_class,child,female,0
1190,crew,adult,male,1
897,crew,adult,male,1
1682,crew,adult,male,1
1629,2nd_class,adult,male,1
322,crew,adult,male,1
242,crew,adult,male,1
1503,2nd_class,adult,male,1
774,2nd_class,adult,male,1
22,3rd_class,adult,male,1


## Mining the Class Association Rules (CARs)

## Training the IDS Model

In [10]:
ids = IDS()
ids.fit(class_association_rules=rules, quant_dataframe=quant_dataframe_train, random_seed=5)

<pyids.algorithms.ids.IDS at 0x2cbba9afd30>

In [11]:
print(print_cars(ids.clf.rules))

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{crew} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.28}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.28}

If \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.27}

If \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} and \textcolor{blue}{Passenger\_Cat}=\textcolor{gray}{3rd\_class} and \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{1} $\mid$ f1-score: \textit{0.18}

If \textcolor{blue}{Gender}=\textcolor{gray}{male} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.14}

If \textcolor{blue}{Gender}=\textcolor{gray}{female} and \textcolor{blue}{Age\_Cat}=\textcolor{gray}{adult} then \textcolor{red}{Died}=\textcolor{gray}{0} $\mid$ f1-score: \textit{0.14}

If \textcolor{blu

In [12]:
ids.clf.default_class

'0'

In [13]:
ids.clf.default_class_confidence

1.0

In [14]:
pred = np.array(ids.predict(quant_dataframe_train)).astype(float)
real = data_train["Died"].astype(float)

roc_auc_score(pred, real)

0.5568641884431358

## Evaluating the IDS model

In [15]:
print("Accuracy on train data: ", ids.score(quant_dataframe_train))
print("Accuracy on test data: ", ids.score(quant_dataframe_test))

Accuracy on train data:  0.5170454545454546
Accuracy on test data:  0.5500354861603974


In [16]:
print("AUC on train data: ", ids.score_auc(quant_dataframe_train, confidence_based=False))
print("AUC on test data: ", ids.score_auc(quant_dataframe_test, confidence_based=False))

AUC on train data:  0.5655292076344708
AUC on test data:  0.5754345830971469


In [17]:
print("AUC on train data: ", ids.score_auc(quant_dataframe_train, confidence_based=True))
print("AUC on test data: ", ids.score_auc(quant_dataframe_test, confidence_based=True))

AUC on train data:  0.5573742047426258
AUC on test data:  0.571814181985046


In [18]:
ids.score_interpretability_metrics(quant_dataframe_train)

{'fraction_overlap': 0.1590119949494949,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.0028409090909090606,
 'average_rule_width': 1.7777777777777777,
 'ruleset_length': 9}

In [19]:
ids.score_interpretability_metrics(quant_dataframe_test)

{'fraction_overlap': 0.1555476697421339,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.008516678495386731,
 'average_rule_width': 1.7777777777777777,
 'ruleset_length': 9}

In [28]:
def train_ids(lambda_array):
    ids = IDS()
    ids.fit(class_association_rules=rules, quant_dataframe=quant_dataframe_train, lambda_array=lambda_array)
    
    print("ids fitted")
    
    score_dict = dict()
    score_dict["acc_train"] = ids.score(quant_dataframe_train)
    score_dict["acc_test"] = ids.score(quant_dataframe_test)
    score_dict["auc_train_classbased"] = ids.score_auc(quant_dataframe_train, confidence_based=False)
    score_dict["auc_train_confbased"] = ids.score_auc(quant_dataframe_train, confidence_based=True)
    score_dict["auc_test_classbased"] = ids.score_auc(quant_dataframe_test, confidence_based=False) 
    score_dict["auc_test_confbased"] = ids.score_auc(quant_dataframe_test, confidence_based=True)
    score_dict.update(ids.score_interpretability_metrics(quant_dataframe_test))
    
    return score_dict
    

In [30]:
logging.basicConfig(level=logging.DEBUG)
train_ids([1, 1, 1, 1, 1000, 1, 1])

ids fitted


{'acc_train': 0.7727272727272727,
 'acc_test': 0.7508871540099361,
 'auc_train_classbased': 0.6765374975901292,
 'auc_train_confbased': 0.7109697320223636,
 'auc_test_classbased': 0.6663618736559864,
 'auc_test_confbased': 0.7114532440773432,
 'fraction_overlap': 0.18872328680703418,
 'fraction_classes': 1.0,
 'fraction_uncovered': 0.029808374733853782,
 'average_rule_width': 2.0,
 'ruleset_length': 10}

In [22]:
print(data_train["Died"].value_counts(True))
print(data_test["Died"].value_counts(True))

1    0.701705
0    0.298295
Name: Died, dtype: float64
1    0.661462
0    0.338538
Name: Died, dtype: float64
