# pyARC QCBA demonstration

This notebook demonstrates the use of QCBA algorithm in pyARC.

# Import libraries from pyarc

In [60]:
from pyarc import CBA
from pyarc.data_structures import TransactionDB
from pyarc.qcba.data_structures import QuantitativeDataFrame
import pandas as pd
from pyarc.qcba import QCBA

In [61]:
from pyarc.qcba.data_structures import (
    IntervalReader,
    Interval,
    QuantitativeDataFrame,
    QuantitativeCAR
)

## Defining interval reader for reading data

first we will look at our current data format and define an appropriate interval reader

In [62]:
interval_reader = IntervalReader()

interval_reader.closed_bracket = "", "NULL"
interval_reader.open_bracket = "NULL", ""
interval_reader.infinity_symbol = "inf", "inf"
interval_reader.members_separator = "_to_"

interval_reader.compile_reader()

QuantitativeCAR.interval_reader = interval_reader

then we will read in our data

In [63]:
data_train_discretized = pd.read_csv("c:/code/python/machine_learning/assoc_rules/train/iris0.csv")
data_train_undiscretized = pd.read_csv("c:/code/python/machine_learning/assoc_rules/folds_undiscr/train/iris0.csv")
data_test = pd.read_csv("c:/code/python/machine_learning/assoc_rules/test/iris0.csv")

txns_train = TransactionDB.from_DataFrame(data_train_discretized)
txns_test = TransactionDB.from_DataFrame(data_test)

In [64]:
quant_dataframe_train_disc = QuantitativeDataFrame(data_train_discretized)
quant_dataframe_train_undisc = QuantitativeDataFrame(data_train_undiscretized)

In [65]:
data_train_discretized.head()

Unnamed: 0,sepallength,petalwidth,sepalwidth,petallength,class
0,-inf_to_5.55,-inf_to_0.8,3.35_to_inf,-inf_to_2.45,Iris-setosa
1,-inf_to_5.55,-inf_to_0.8,2.95_to_3.35,-inf_to_2.45,Iris-setosa
2,-inf_to_5.55,-inf_to_0.8,2.95_to_3.35,-inf_to_2.45,Iris-setosa
3,-inf_to_5.55,-inf_to_0.8,3.35_to_inf,-inf_to_2.45,Iris-setosa
4,-inf_to_5.55,-inf_to_0.8,3.35_to_inf,-inf_to_2.45,Iris-setosa


In [66]:
cba = CBA()
cba.fit(txns_train)
cba.rule_model_accuracy(txns_train)

0.9407407407407408

# Run QCBA optimization

In [67]:
qcba_cba = QCBA(quant_dataframe_train_undisc, cba_rule_model=cba)
qcba_cba.fit()

applying selected transformations
refitting
literal pruning
trimming
extending
[                                                  ]
[############                                      ]
[#########################                         ]
[#####################################             ]
post pruning
overlap pruning


<pyarc.qcba.classifier.QuantitativeClassifier at 0x20479fedc88>

# Evaluate QCBA model

In [68]:
print("CBA accuracy:", cba.rule_model_accuracy(txns_train))
print("QCBA accuracy:", qcba.score(quant_dataframe_train_undisc))

CBA accuracy: 0.9407407407407408
QCBA accuracy: 0.9629629629629629
