In [12]:
import pandas as pd
import numpy as np
from pyids.data_structures import IDS, mine_CARs, IDSRuleSet, mine_IDS_ruleset, IDSOneVsAll
from pyids.model_selection import CoordinateAscentOptimizer

from pyarc.qcba.data_structures import QuantitativeDataFrame

import random 
import time


In [13]:
def encode_label(actual, predicted):
    levels = set(actual)
    
    actual = np.copy(actual)
    predicted = np.copy(predicted)
    
    for idx, level in enumerate(levels):
        actual[actual == level] = idx
        predicted[predicted == level] = idx

    actual = actual.astype(int)
    predicted = predicted.astype(int)
        
    return actual, predicted

In [18]:
actual = ['OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica']
predicted = ['OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'OTHER', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-virginica']

encode_label(actual, predicted)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1]))

In [4]:


df = pd.read_csv("../data/titanic.csv")
cars = mine_CARs(df, 15, sample=False)
ids_ruleset = IDSRuleSet.from_cba_rules(cars).ruleset

quant_dataframe = QuantitativeDataFrame(df)

for r in reversed(sorted(cars)):
    print(r)


start = time.time()
ids = IDS()
ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, debug=False, algorithm="RUSM", random_seed=None, lambda_array=[1, 1, 1, 1, 1, 1, 1])
end = time.time()

print(end - start)

for r in ids.clf.rules:
    print(r)

auc = ids.score_auc(quant_dataframe)

print("AUC", auc)

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

AUC 0.6701905336516181


In [11]:
ids_ruleset = mine_IDS_ruleset(df, rule_cutoff=50)

quant_dataframe = QuantitativeDataFrame(df)



coordinate_ascent = CoordinateAscentOptimizer(IDSOneVsAll(), debug=True, maximum_delta_between_iterations=200, maximum_score_estimation_iterations=3, ternary_search_precision=20)
coordinate_ascent.fit(ids_ruleset, quant_dataframe, quant_dataframe, algorithm="RUSM")

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=4
Rule count: 52, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 2
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 3
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 4
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 5
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000000000004, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=4
Rule count: 76, Iteration: 6
Decreasing confidence to 0.25000000000000006
Running apriori

TypeError: fit() got an unexpected keyword argument 'algorithm'