In [1]:
%run ../../main.py

In [None]:
from cba.algorithms import top_rules
from cba.data_structures import TransactionDB, Consequent, Antecedent, Item, ClassAssocationRule
from cba.algorithms import M1Algorithm, M2Algorithm
import pandas as pd
from sklearn.metrics import accuracy_score

directory = "c:/code/python/machine_learning/assoc_rules"

def func(datasetname):
    pd_ds = pd.read_csv("c:/code/python/machine_learning/assoc_rules/train/{}.csv".format(datasetname))
    txns = TransactionDB.from_pandasdf(pd_ds)
    
    txns_test = TransactionDB.from_pandasdf(pd.read_csv("c:/code/python/machine_learning/assoc_rules/test/{}.csv".format(datasetname)))

    rules = top_rules(txns.string_representation, appearance=txns.appeardict)

    rules.sort(reverse=True)


    cars = []
    for idx, rule in enumerate(rules):
        con_tmp, ant_tmp, support, confidence = rule

        con = Consequent(*con_tmp.split("="))

        ant_items = [ Item(*i.split("=")) for i in ant_tmp ]
        ant = Antecedent(ant_items)

        id_len = len(ant)

        car = ClassAssocationRule(ant, con, support=support, confidence=confidence, id_rule=id_len)
        cars.append(car)

    cars.sort(reverse=True)

    if len(cars) > 1000:
        cars = cars[:1000]
        

    print("len(rules)", len(cars))

    m1 = M1Algorithm(cars, txns)
    
    m2 = M2Algorithm(cars, txns)
    
    m1clf = m1.build()
    m2clf = m2.build()
    
    
    actual = list(map(lambda i: i.value, txns_test.class_labels))

    pred = m1clf.predict_all(txns_test)
    predM2 = m2clf.predict_all(txns_test)
    
    accM2 = accuracy_score(predM2, actual)
    acc = accuracy_score(pred, actual)

    return acc, accM2



def mean_func(dataset_name):
    files = [ dataset_name + repr(i) for i in range(10) ]

    accs = []
    accsM2 = []
    
    for file in files:
        acc, accM2 = func(file)
        print("done", file, acc)
        print("done m2", file, accM2)
        accs.append(acc)
        accsM2.append(accM2)
        
    mn = sum(accs) / len(accs)
    mnM2 = sum(accsM2) / len(accsM2)
    
    return mn, mnM2


                
datasets = ["breast-w", "anneal", "hypothyroid", "ionosphere", "lymph", "vehicle", "autos", "diabetes", "glass", "heart-h", "tic-tac-toe", "australian"]    

means = []
for dataset in ["ionosphere"]:
    acc, accM2 = mean_func(dataset)
    print("*****")
    print("M1", dataset, acc)
    print("M2", dataset, accM2)
    print("******")
    
    means.append((dataset, acc))
    
    
print(means)



Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=35
Rule count: 11176, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done ionosphere0 0.916666666667
done m2 ionosphere0 0.916666666667
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=35
Rule count: 12443, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done ionosphere1 0.916666666667
done m2 ionosphere1 0.916666666667
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=35
Rule count: 11668, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done ionosphere2 0.972222222222
done m2 ionosphere2 0.972222222222
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=35
Rule count: 12162, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done ionosphere3 0.972222222222
done m2 ionosphere3 0.972222222222
Running apriori with set