In [1]:
%run ../../main.py

In [2]:
from cba.algorithms import top_rules
from cba.data_structures import TransactionDB, Consequent, Antecedent, Item, ClassAssocationRule
from cba.algorithms import M1Algorithm, M2Algorithm, createCARs
import pandas as pd
from sklearn.metrics import accuracy_score

directory = "c:/code/python/machine_learning/assoc_rules"

def func(datasetname, unique_transactions=True):
    pd_ds = pd.read_csv("c:/code/python/machine_learning/assoc_rules/train/{}.csv".format(datasetname))
    txns = TransactionDB.from_DataFrame(pd_ds, unique_transactions=unique_transactions)
    
    txns_test = TransactionDB.from_DataFrame(pd.read_csv("c:/code/python/machine_learning/assoc_rules/test/{}.csv".format(datasetname)))

    rules = top_rules(txns.string_representation, appearance=txns.appeardict)

    rules.sort(reverse=True)


    cars = createCARs(rules)

    cars.sort(reverse=True)

    if len(cars) > 1000:
        cars = cars[:1000]
        

    print("len(rules)", len(cars))

    m1 = M1Algorithm(cars, txns)
    
    m2 = M2Algorithm(cars, txns)
    
    m1clf = m1.build()
    m2clf = m2.build()
    
    
    actual = list(map(lambda i: i.value, txns_test.class_labels))

    pred = m1clf.predict_all(txns_test)
    predM2 = m2clf.predict_all(txns_test)
    
    accM2 = accuracy_score(predM2, actual)
    acc = accuracy_score(pred, actual)

    return acc, accM2



def mean_func(dataset_name, start=0, end=10):
    files = [ dataset_name + repr(i) for i in range(start, end) ]

    accs = []
    accsM2 = []
    
    for file in files:
        acc, accM2 = func(file)
        print("done", file, acc)
        print("done m2", file, accM2)
        accs.append(acc)
        accsM2.append(accM2)
        
    mn = sum(accs) / len(accs)
    mnM2 = sum(accsM2) / len(accsM2)
    
    return mn, mnM2


                
datasets = [
    "iris",
    "breast-w",
    "anneal",
    "hypothyroid",
    "ionosphere",
    "lymph",
    "vehicle",
    "autos",
    "diabetes",
    "glass",
    "heart-h",
    "tic-tac-toe",
    "australian",
    "sick",
    "segment",
    "spambase",
    "sonar",
    "vowel",
    "hepatitis",
    "credit-a",
    "mushroom",
    "house-votes-84",
    "soybean",
    "primary-tumor",
    "credit-g",
    "audiology",
    "breast-cancer",
    "balance-scale",
    "heart-c",
    "kr-vs-kp",
    "pima",
    "heart-statlog"]    

means = []
meansM2 = []
for dataset in datasets:
    acc, accM2 = mean_func(dataset)
    print("*****")
    print("M1", dataset, acc)
    print("M2", dataset, accM2)
    print("******")
    
    means.append((dataset, acc))
    meansM2.append((dataset, accM2))
    
print("M1")
print(means)
print("\nM2")
print(meansM2)

Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=5
Rule count: 96, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=5
Rule count: 166, Iteration: 2
Increasing maxlen 5
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 190, Iteration: 3
Decreasing confidence to 0.45
Running apriori with setting: confidence=0.45, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 190, Iteration: 4
Decreasing confidence to 0.4
Running apriori with setting: confidence=0.4, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 190, Iteration: 5
Decreasing confidence to 0.35000000000000003
Running apriori with setting: confidence=0.35000000000000003, support=0.0, minlen=2, maxlen=5, MAX_RULE_LEN=5
Rule count: 190, Iteration: 6
Decreasing confidence to 0.30000000000000004
Running apriori with setting: confidence=0.30000000

Rule count: 4056, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done breast-w3 0.9
done m2 breast-w3 0.9
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=10
Rule count: 708, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=10
Rule count: 4241, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done breast-w4 0.971428571429
done m2 breast-w4 0.971428571429
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=10
Rule count: 639, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=10
Rule count: 3718, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done breast-w5 0.957142857143
done m2 breast-w5 0.957142857143
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=10
Rule count: 688

len(rules) 1000
done autos7 1.0
done m2 autos7 1.0
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=26
Rule count: 7920, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done autos8 0.842105263158
done m2 autos8 0.842105263158
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=26
Rule count: 8066, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done autos9 0.611111111111
done m2 autos9 0.611111111111
*****
M1 autos 0.809591823762
M2 autos 0.809591823762
******
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=9
Rule count: 307, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=9
Rule count: 1376, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done diabetes0 0.753246753247
done m2 diabetes0 0.753246753247
Running apriori with setting: confidence

M2 glass 0.680905953949
******
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 736, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=14
Rule count: 4916, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done heart-h0 0.5
done m2 heart-h0 0.5
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 737, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=14
Rule count: 4920, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done heart-h1 0.633333333333
done m2 heart-h1 0.633333333333
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 739, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX

done australian1 0.885714285714
done m2 australian1 0.885714285714
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=15
Rule count: 719, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=15
Rule count: 5601, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done australian2 0.871428571429
done m2 australian2 0.871428571429
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=15
Rule count: 667, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=15
Rule count: 5002, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done australian3 0.898550724638
done m2 australian3 0.898550724638
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=15
Rule count: 667, Iteration: 1
Increasing maxlen 4
Running apri

Target rule count satisfied: 1000
len(rules) 1000
done hepatitis5 0.666666666667
done m2 hepatitis5 0.666666666667
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=20
Rule count: 1441, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done hepatitis6 0.8
done m2 hepatitis6 0.8
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=20
Rule count: 1532, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done hepatitis7 0.733333333333
done m2 hepatitis7 0.733333333333
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=20
Rule count: 1511, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done hepatitis8 0.8
done m2 hepatitis8 0.8
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=20
Rule count: 1555, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done hepatitis9 0.733333333333

len(rules) 1000
done house-votes-849 0.904761904762
done m2 house-votes-849 0.904761904762
*****
M1 house-votes-84 0.92640944327
M2 house-votes-84 0.92640944327
******
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=36
Rule count: 28250, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done soybean0 0.783783783784
done m2 soybean0 0.783783783784
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=36
Rule count: 28330, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done soybean1 0.833333333333
done m2 soybean1 0.833333333333
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=36
Rule count: 28220, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done soybean2 0.816901408451
done m2 soybean2 0.816901408451
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=36
Rule count: 28300, Iter

done audiology3 0.521739130435
done m2 audiology3 0.521739130435
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=70
Rule count: 73127, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done audiology4 0.45
done m2 audiology4 0.45
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=70
Rule count: 75283, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done audiology5 0.458333333333
done m2 audiology5 0.458333333333
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=70
Rule count: 71453, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done audiology6 0.47619047619
done m2 audiology6 0.47619047619
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=70
Rule count: 72829, Iteration: 1
Target rule count satisfied: 1000
len(rules) 1000
done audiology7 0.454545454545
done m2 audiology7 0.45

done heart-c0 0.935483870968
done m2 heart-c0 0.935483870968
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 817, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=14
Rule count: 6128, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done heart-c1 0.677419354839
done m2 heart-c1 0.677419354839
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 814, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=14
Rule count: 6113, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done heart-c2 0.774193548387
done m2 heart-c2 0.774193548387
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=14
Rule count: 819, Iteration: 1
Increasing maxlen 4
Running apriori with setting: 

Target rule count satisfied: 1000
len(rules) 1000
done pima6 0.662337662338
done m2 pima6 0.662337662338
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=9
Rule count: 279, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=9
Rule count: 1236, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done pima7 0.753246753247
done m2 pima7 0.753246753247
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=9
Rule count: 274, Iteration: 1
Increasing maxlen 4
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=4, MAX_RULE_LEN=9
Rule count: 1162, Iteration: 2
Target rule count satisfied: 1000
len(rules) 1000
done pima8 0.723684210526
done m2 pima8 0.723684210526
Running apriori with setting: confidence=0.5, support=0.0, minlen=2, maxlen=3, MAX_RULE_LEN=9
Rule count: 279, Iteration: 1
Increasing maxlen 4
Run

In [3]:

m2df = pd.DataFrame(dict(map(lambda i:  (i[0], [i[1]]), meansM2))).transpose()
m2df.columns = ["m2"]

m1df = pd.DataFrame(dict(map(lambda i:  (i[0], [i[1]]), means))).transpose()
m1df.columns = ["m1"]


acc_df = m1df.join(m2df)

acc_df.to_csv("../data/accuracies.csv")

#acc_df = pd.read_csv("../data/accuracies.csv")



In [4]:
acc_df

Unnamed: 0,m1,m2
anneal,0.991061,0.991061
audiology,0.455563,0.459911
australian,0.859381,0.859381
autos,0.809592,0.809592
balance-scale,0.740807,0.739219
breast-cancer,0.706708,0.706708
breast-w,0.951343,0.951343
credit-a,0.831947,0.831947
credit-g,0.733,0.733
diabetes,0.743558,0.743558


In [5]:
ARC_accs = pd.read_csv("../data/arc_accs3.csv")

ARC_accs_mod = ARC_accs[["dataset", "accuracy"]].set_index("dataset")

ARC_accs_mod.columns = ["ARC"]

accs = acc_df.join(ARC_accs_mod)

print("M1", accs["m1"].mean())
print("M2", accs["m2"].mean())
print("ARC", accs["ARC"].mean())

M1 0.8105017932631686
M2 0.8092027413359664
ARC 0.7821874999999999


In [6]:
ARC_accs = pd.read_csv("../data/arc_accs3.csv")

ARC_accs_mod = ARC_accs[["dataset", "accuracy"]].set_index("dataset")

ARC_accs_mod.columns = ["ARC"]

accs = acc_df.join(ARC_accs_mod)

print("M1", accs["m1"].mean())
print("M2", accs["m2"].mean())
print("ARC", accs["ARC"].mean())

M1 0.8105017932631686
M2 0.8092027413359664
ARC 0.7821874999999999


In [8]:
acc_df

Unnamed: 0,m1,m2
anneal,0.991061,0.991061
audiology,0.455563,0.459911
australian,0.859381,0.859381
autos,0.809592,0.809592
balance-scale,0.740807,0.739219
breast-cancer,0.706708,0.706708
breast-w,0.951343,0.951343
credit-a,0.831947,0.831947
credit-g,0.733,0.733
diabetes,0.743558,0.743558


In [10]:
ARC_accs = pd.read_csv("../data/arc_accs3.csv")

ARC_accs_mod = ARC_accs[["dataset", "accuracy"]].set_index("dataset")

ARC_accs_mod.columns = ["ARC"]

accs = acc_df.join(ARC_accs_mod)

print("M1", accs["m1"].mean())
print("M2", accs["m2"].mean())
print("ARC", accs["ARC"].mean())

M1 0.8105017932631686
M2 0.8092027413359664
ARC 0.7821874999999999


In [27]:
printaccs = accs[["m1", "m2", "ARC"]] * 100
printaccs = printaccs[["m1", "m2", "ARC"]].round(1)

In [28]:
printaccs.to_csv("../data/all_accuracies.csv")