firefly-cpp · firefly-cpp · Feb 27, 2022 · Feb 27, 2022 · Feb 27, 2022 · Feb 27, 2022
diff --git a/examples/stats.py b/examples/stats.py
@@ -0,0 +1,33 @@
+"""
+Example usage of the Stats class. We perform association rule mining from the basic run example and then print
+out a statistical analysis of the mined association rules
+"""
+
+
+from niaarm import NiaARM
+from niaarm.dataset import Dataset
+from niaarm.stats import Stats
+from niapy.algorithms.basic import DifferentialEvolution
+from niapy.task import Task, OptimizationType
+
+
+if __name__ == '__main__':
+    # Load the dataset and run the algorithm
+    data = Dataset("datasets/Abalone.csv")
+    problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
+    task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
+    algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
+    algo.run(task=task)
+
+    # Instantiate Stats object and print basic statistics of mined rules.
+    stats = Stats(problem.rules)
+
+    print('\nSTATS:')
+    print(f'Total rules: {stats.total_rules}')
+    print(f'Average fitness: {stats.avg_fitness}')
+    print(f'Average support: {stats.avg_support}')
+    print(f'Average confidence: {stats.avg_confidence}')
+    print(f'Average coverage: {stats.avg_coverage}')
+    print(f'Average shrinkage: {stats.avg_shrinkage}')
+    print(f'Average length of antecedent: {stats.avg_ant_len}')
+    print(f'Average length of consequent: {stats.avg_con_len}')
diff --git a/niaarm/__init__.py b/niaarm/__init__.py
@@ -1,3 +1,8 @@
 from niaarm.niaarm import NiaARM
+from niaarm.dataset import Dataset
+from niaarm.stats import Stats
+
+
+__all__ = [NiaARM, Dataset, Stats]
 
 __version__ = "0.1.0"
diff --git a/niaarm/association_rule.py b/niaarm/association_rule.py
@@ -74,55 +74,60 @@ def feature_position(self, feature):
                 position = position + 2
         return position
 
-    def support_confidence(self, antecedent, consequence, transactions):
+    def __match_antecedent(self, i, antecedent, transactions):
+        match = 0
+        for j in range(len(antecedent)):
+            dtype = self.features[self.permutation[j]].dtype
+            if dtype == 'float' or dtype == 'int':
+                if antecedent[j] != 'NO':
+                    border = antecedent[j]
+                    if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
+                        match = match + 1
+            elif dtype == 'cat':
+                if antecedent[j] != 'NO':
+                    ant = antecedent[j]
+                    if transactions[i, self.permutation[j]] == ant[0]:
+                        match = match + 1
+        return match
+
+    def __match_consequent(self, i, antecedent, consequent, transactions):
+        match = 0
+        con_counter = 0
+        for ll in range(len(antecedent), len(antecedent) + len(consequent)):
+            dtype = self.features[self.permutation[ll]].dtype
+            if dtype == 'float' or dtype == 'int':
+                if consequent[con_counter] != 'NO':
+                    border = consequent[con_counter]
+                    if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
+                        match = match + 1
+            elif dtype == 'cat':
+                if consequent[con_counter] != 'NO':
+                    con = consequent[con_counter]
+
+                    if transactions[i, self.permutation[ll]] == con[0]:
+                        match = match + 1
+
+            con_counter = con_counter + 1
+        return match
+
+    def support_confidence(self, antecedent, consequent, transactions):
         supp = 0
         conf = 0
         conf_counter = 0
 
-        # firstly antecedent
         for i in range(len(transactions)):
-            match1 = 0
-            match2 = 0
-            for j in range(len(antecedent)):
-                dtype = self.features[self.permutation[j]].dtype
-                if dtype == 'float' or dtype == 'int':
-                    if antecedent[j] != 'NO':
-                        border = antecedent[j]
-                        if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
-                            match1 = match1 + 1
-                elif dtype == 'cat':
-                    if antecedent[j] != 'NO':
-                        ant = antecedent[j]
-                        if transactions[i, self.permutation[j]] == ant[0]:
-                            match1 = match1 + 1
-
-            # secondly consequence
-            con_counter = 0
-            for ll in range(len(antecedent), len(antecedent) + len(consequence)):
-                dtype = self.features[self.permutation[ll]].dtype
-                if dtype == 'float' or dtype == 'int':
-                    if consequence[con_counter] != 'NO':
-                        border = consequence[con_counter]
-                        if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
-                            match2 = match2 + 1
-                elif dtype == 'cat':
-                    if consequence[con_counter] != 'NO':
-                        con = consequence[con_counter]
-
-                        if transactions[i, self.permutation[ll]] == con[0]:
-                            match2 = match2 + 1
-
-                con_counter = con_counter + 1
+            match_antecedent = self.__match_antecedent(i, antecedent, transactions)
+            match_consequent = self.__match_consequent(i, antecedent, consequent, transactions)
 
             missing_ant = antecedent.count('NO')
-            missing_con = consequence.count('NO')
+            missing_con = consequent.count('NO')
 
-            if (missing_ant + match1) == len(antecedent):
+            if (missing_ant + match_antecedent) == len(antecedent):
                 conf_counter += 1
-                if (missing_con + match2) == len(consequence):
+                if (missing_con + match_consequent) == len(consequent):
                     conf = conf + 1
 
-            total = match1 + match2 + missing_ant + missing_con
+            total = match_antecedent + match_consequent + missing_ant + missing_con
 
             if total == len(self.features):
                 supp = supp + 1
@@ -138,11 +143,11 @@ def support_confidence(self, antecedent, consequence, transactions):
 
         return total_supp, total_conf
 
-    def coverage(self, antecedent, consequence):
-        missing_total = antecedent.count("NO") + consequence.count("NO")
+    def coverage(self, antecedent, consequent):
+        missing_total = antecedent.count("NO") + consequent.count("NO")
         return 1 - missing_total / len(self.features)
 
-    def shrinkage(self, antecedent, consequence):
+    def shrinkage(self, antecedent, consequent):
         differences = []
 
         for i in range(len(antecedent)):
@@ -156,11 +161,11 @@ def shrinkage(self, antecedent, consequence):
                     differences.append(diff)
 
         con_counter = 0
-        for ll in range(len(antecedent), len(antecedent) + len(consequence)):
+        for ll in range(len(antecedent), len(antecedent) + len(consequent)):
             feature = self.features[self.permutation[ll]]
             if feature.dtype == 'float' or feature.dtype == 'int':
-                if consequence[con_counter] != 'NO':
-                    borders = consequence[con_counter]
+                if consequent[con_counter] != 'NO':
+                    borders = consequent[con_counter]
                     diff_borders = borders[1] - borders[0]
                     total_borders = feature.max_val - feature.min_val
                     diff = diff_borders / total_borders
@@ -175,9 +180,9 @@ def shrinkage(self, antecedent, consequence):
             return 0.0
         return 1 - normalized
 
-    def format_rules(self, antecedent, consequence):
+    def format_rules(self, antecedent, consequent):
         antecedent1 = []
-        consequence1 = []
+        consequent1 = []
 
         for i in range(len(antecedent)):
             if antecedent[i] != "NO":
@@ -188,19 +193,20 @@ def format_rules(self, antecedent, consequence):
                     rule = feature.name + "(" + str(antecedent[i]) + ")"
                 antecedent1.append(rule)
 
-        for i in range(len(consequence)):
-            if consequence[i] != "NO":
+        for i in range(len(consequent)):
+            if consequent[i] != "NO":
                 feature = self.features[self.permutation[i + len(antecedent)]]
                 if feature.dtype == "cat":
-                    rule = feature.name + "(" + str(consequence[i][0]) + ")"
+                    rule = feature.name + "(" + str(consequent[i][0]) + ")"
                 else:
-                    rule = feature.name + "(" + str(consequence[i]) + ")"
-                consequence1.append(rule)
-        return antecedent1, consequence1
+                    rule = feature.name + "(" + str(consequent[i]) + ")"
+                consequent1.append(rule)
+        return antecedent1, consequent1
 
 
 def _normalize(value, actual_bounds, real_bounds):
-    return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])
+    return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (
+                actual_bounds[1] - actual_bounds[0])
 
 
 def _rule_feasible(ant, con):

diff --git a/niaarm/dataset.py b/niaarm/dataset.py
@@ -41,7 +41,7 @@ def __analyse_types(self):
                 unique_categories = None
             else:
                 dtype = "cat"
-                unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower)  # convert to str just in case
+                unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower)
                 min_value = None
                 max_value = None
 

diff --git a/niaarm/niaarm.py b/niaarm/niaarm.py
@@ -8,20 +8,23 @@
 class NiaARM(Problem):
     r"""Implementation of NiaARM.
 
-    Date:
-        2021
-
     Reference:
         The implementation is composed of ideas found in the following papers:
 
-        I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister. [Differential evolution for association rule mining using categorical and numerical attributes](http://www.iztok-jr-fister.eu/static/publications/231.pdf) In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.
-
-        I. Fister Jr., V. Podgorelec, I. Fister. Improved Nature-Inspired Algorithms for Numeric Association Rule Mining. In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020. Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.
+        I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister.
+        [Differential evolution for association rule mining using categorical and numerical attributes]
+        (http://www.iztok-jr-fister.eu/static/publications/231.pdf)
+        In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.
 
-    License:
-        MIT
+        I. Fister Jr., V. Podgorelec, I. Fister.
+        Improved Nature-Inspired Algorithms for Numeric Association Rule Mining.
+        In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020.
+        Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.
 
     Attributes:
+        features (list[Feature]): List of features.
+        transactions (np.ndarray): Data from transaction database.
+        rules (list[Rule]): Mined association rules.
 
     """
 
@@ -42,30 +45,26 @@ def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma
         self.rules = []
         super().__init__(dimension, 0.0, 1.0)
 
-    def rule_exists(self, antecedent, consequence):
+    def rule_exists(self, antecedent, consequent):
         r"""Check if association rule already exists."""
         for rule in self.rules:
-            if rule.antecedent == antecedent and rule.consequence == consequence:
+            if rule.antecedent == antecedent and rule.consequent == consequent:
                 return True
         return False
 
     def export_rules(self, path):
         r"""Save all association rules found to csv file."""
-        try:
-            with open(path, 'w', newline='') as f:
-                writer = csv.writer(f)
-
-                # write header
-                writer.writerow(["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
-
-                for rule in self.rules:
-                    writer.writerow(
-                        [rule.antecedent, rule.consequence, rule.fitness, rule.support, rule.confidence, rule.coverage,
-                         rule.shrink])
-        except OSError:
-            print('OSError:', path)
-        else:
-            print("Output successfully")
+        with open(path, 'w', newline='') as f:
+            writer = csv.writer(f)
+
+            # write header
+            writer.writerow(["Antecedent", "consequent", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])
+
+            for rule in self.rules:
+                writer.writerow(
+                    [rule.antecedent, rule.consequent, rule.fitness, rule.support, rule.confidence, rule.coverage,
+                     rule.shrink])
+        print(f"Rules exported to {path}")
 
     def sort_rules(self):
         self.rules.sort(key=lambda x: x.fitness, reverse=True)
@@ -81,70 +80,71 @@ def _evaluate(self, sol):
 
         rule = arm.build_rule(solution)
 
-        # get antecedent and consequence of rule
+        # get antecedent and consequent of rule
         antecedent = rule[:cut]
-        consequence = rule[cut:]
+        consequent = rule[cut:]
 
         # check if rule is feasible
-        if _rule_feasible(antecedent, consequence):
+        if _rule_feasible(antecedent, consequent):
             # get support and confidence of rule
-            support, confidence = arm.support_confidence(antecedent, consequence, self.transactions)
+            support, confidence = arm.support_confidence(antecedent, consequent, self.transactions)
 
             if self.gamma == 0.0:
                 shrinkage = 0
             else:
-                shrinkage = arm.shrinkage(antecedent, consequence)
+                shrinkage = arm.shrinkage(antecedent, consequent)
 
             if self.delta == 0.0:
                 coverage = 0
             else:
-                coverage = arm.coverage(antecedent, consequence)
+                coverage = arm.coverage(antecedent, consequent)
 
             fitness = ((self.alpha * support) + (self.beta * confidence) + (self.gamma * shrinkage) +
                        (self.delta * coverage)) / (self.alpha + self.beta + self.gamma + self.delta)
 
-            # in case no attributes were selected for antecedent or consequence
-            if antecedent.count("NO") == len(antecedent) or consequence.count("NO") == len(consequence):
+            # in case no attributes were selected for antecedent or consequent
+            if antecedent.count("NO") == len(antecedent) or consequent.count("NO") == len(consequent):
                 fitness = 0.0
 
             if support > 0.0 and confidence > 0.0:
-                antecedent, consequence = _fix_border(antecedent, consequence)
+                antecedent, consequent = _fix_border(antecedent, consequent)
                 # format rule; remove NO; add name of features
-                antecedent1, consequence1 = arm.format_rules(antecedent, consequence)
+                antecedent1, consequent1 = arm.format_rules(antecedent, consequent)
 
                 # save feasible rule
-                if not self.rule_exists(antecedent1, consequence1):
-                    self.rules.append(Rule(antecedent1, consequence1, fitness, support, confidence, coverage, shrinkage))
+                if not self.rule_exists(antecedent1, consequent1):
+                    self.rules.append(
+                        Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage))
 
                 if fitness > self.best_fitness:
                     self.best_fitness = fitness
-                    print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, Shrinkage:{shrinkage}')
+                    print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, '
+                          f'Shrinkage:{shrinkage}')
             return fitness
         else:
             return -1.0
 
 
-def _fix_border(antecedent, consequence):
+def _fix_border(antecedent, consequent):
     r"""In case lower and upper bounds of interval are the same.
         We need this in order to provide clean output.
 
         Arguments:
             antecedent (np.ndarray): .
-            consequence (np.ndarray): .
+            consequent (np.ndarray): .
 
         Returns:
             antecedent (array):
-            consequence (array):
+            consequent (array):
     """
-
     for i in range(len(antecedent)):
         if len(antecedent[i]) > 1:
             if antecedent[i][0] == antecedent[i][1]:
                 antecedent[i] = antecedent[i][0]
 
-    for i in range(len(consequence)):
-        if len(consequence[i]) > 1:
-            if consequence[i][0] == consequence[i][1]:
-                consequence[i] = consequence[i][0]
+    for i in range(len(consequent)):
+        if len(consequent[i]) > 1:
+            if consequent[i][0] == consequent[i][1]:
+                consequent[i] = consequent[i][0]
 
-    return antecedent, consequence
+    return antecedent, consequent