Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stats example, fix linter errors #17

Merged
merged 4 commits into from
Feb 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions examples/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
Example usage of the Stats class. We perform association rule mining from the basic run example and then print
out a statistical analysis of the mined association rules
"""


from niaarm import NiaARM
from niaarm.dataset import Dataset
from niaarm.stats import Stats
from niapy.algorithms.basic import DifferentialEvolution
from niapy.task import Task, OptimizationType


if __name__ == '__main__':
# Load the dataset and run the algorithm
data = Dataset("datasets/Abalone.csv")
problem = NiaARM(data.dimension, data.features, data.transactions, alpha=1.0, beta=1.0)
task = Task(problem=problem, max_iters=30, optimization_type=OptimizationType.MAXIMIZATION)
algo = DifferentialEvolution(population_size=50, differential_weight=0.5, crossover_probability=0.9)
algo.run(task=task)

# Instantiate Stats object and print basic statistics of mined rules.
stats = Stats(problem.rules)

print('\nSTATS:')
print(f'Total rules: {stats.total_rules}')
print(f'Average fitness: {stats.avg_fitness}')
print(f'Average support: {stats.avg_support}')
print(f'Average confidence: {stats.avg_confidence}')
print(f'Average coverage: {stats.avg_coverage}')
print(f'Average shrinkage: {stats.avg_shrinkage}')
print(f'Average length of antecedent: {stats.avg_ant_len}')
print(f'Average length of consequent: {stats.avg_con_len}')
5 changes: 5 additions & 0 deletions niaarm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from niaarm.niaarm import NiaARM
from niaarm.dataset import Dataset
from niaarm.stats import Stats


__all__ = [NiaARM, Dataset, Stats]

__version__ = "0.1.0"
112 changes: 59 additions & 53 deletions niaarm/association_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,55 +74,60 @@ def feature_position(self, feature):
position = position + 2
return position

def support_confidence(self, antecedent, consequence, transactions):
def __match_antecedent(self, i, antecedent, transactions):
match = 0
for j in range(len(antecedent)):
dtype = self.features[self.permutation[j]].dtype
if dtype == 'float' or dtype == 'int':
if antecedent[j] != 'NO':
border = antecedent[j]
if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
match = match + 1
elif dtype == 'cat':
if antecedent[j] != 'NO':
ant = antecedent[j]
if transactions[i, self.permutation[j]] == ant[0]:
match = match + 1
return match

def __match_consequent(self, i, antecedent, consequent, transactions):
match = 0
con_counter = 0
for ll in range(len(antecedent), len(antecedent) + len(consequent)):
dtype = self.features[self.permutation[ll]].dtype
if dtype == 'float' or dtype == 'int':
if consequent[con_counter] != 'NO':
border = consequent[con_counter]
if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
match = match + 1
elif dtype == 'cat':
if consequent[con_counter] != 'NO':
con = consequent[con_counter]

if transactions[i, self.permutation[ll]] == con[0]:
match = match + 1

con_counter = con_counter + 1
return match

def support_confidence(self, antecedent, consequent, transactions):
supp = 0
conf = 0
conf_counter = 0

# firstly antecedent
for i in range(len(transactions)):
match1 = 0
match2 = 0
for j in range(len(antecedent)):
dtype = self.features[self.permutation[j]].dtype
if dtype == 'float' or dtype == 'int':
if antecedent[j] != 'NO':
border = antecedent[j]
if border[0] <= transactions[i, self.permutation[j]] <= border[1]:
match1 = match1 + 1
elif dtype == 'cat':
if antecedent[j] != 'NO':
ant = antecedent[j]
if transactions[i, self.permutation[j]] == ant[0]:
match1 = match1 + 1

# secondly consequence
con_counter = 0
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
dtype = self.features[self.permutation[ll]].dtype
if dtype == 'float' or dtype == 'int':
if consequence[con_counter] != 'NO':
border = consequence[con_counter]
if border[0] <= transactions[i, self.permutation[ll]] <= border[1]:
match2 = match2 + 1
elif dtype == 'cat':
if consequence[con_counter] != 'NO':
con = consequence[con_counter]

if transactions[i, self.permutation[ll]] == con[0]:
match2 = match2 + 1

con_counter = con_counter + 1
match_antecedent = self.__match_antecedent(i, antecedent, transactions)
match_consequent = self.__match_consequent(i, antecedent, consequent, transactions)

missing_ant = antecedent.count('NO')
missing_con = consequence.count('NO')
missing_con = consequent.count('NO')

if (missing_ant + match1) == len(antecedent):
if (missing_ant + match_antecedent) == len(antecedent):
conf_counter += 1
if (missing_con + match2) == len(consequence):
if (missing_con + match_consequent) == len(consequent):
conf = conf + 1

total = match1 + match2 + missing_ant + missing_con
total = match_antecedent + match_consequent + missing_ant + missing_con

if total == len(self.features):
supp = supp + 1
Expand All @@ -138,11 +143,11 @@ def support_confidence(self, antecedent, consequence, transactions):

return total_supp, total_conf

def coverage(self, antecedent, consequence):
missing_total = antecedent.count("NO") + consequence.count("NO")
def coverage(self, antecedent, consequent):
missing_total = antecedent.count("NO") + consequent.count("NO")
return 1 - missing_total / len(self.features)

def shrinkage(self, antecedent, consequence):
def shrinkage(self, antecedent, consequent):
differences = []

for i in range(len(antecedent)):
Expand All @@ -156,11 +161,11 @@ def shrinkage(self, antecedent, consequence):
differences.append(diff)

con_counter = 0
for ll in range(len(antecedent), len(antecedent) + len(consequence)):
for ll in range(len(antecedent), len(antecedent) + len(consequent)):
feature = self.features[self.permutation[ll]]
if feature.dtype == 'float' or feature.dtype == 'int':
if consequence[con_counter] != 'NO':
borders = consequence[con_counter]
if consequent[con_counter] != 'NO':
borders = consequent[con_counter]
diff_borders = borders[1] - borders[0]
total_borders = feature.max_val - feature.min_val
diff = diff_borders / total_borders
Expand All @@ -175,9 +180,9 @@ def shrinkage(self, antecedent, consequence):
return 0.0
return 1 - normalized

def format_rules(self, antecedent, consequence):
def format_rules(self, antecedent, consequent):
antecedent1 = []
consequence1 = []
consequent1 = []

for i in range(len(antecedent)):
if antecedent[i] != "NO":
Expand All @@ -188,19 +193,20 @@ def format_rules(self, antecedent, consequence):
rule = feature.name + "(" + str(antecedent[i]) + ")"
antecedent1.append(rule)

for i in range(len(consequence)):
if consequence[i] != "NO":
for i in range(len(consequent)):
if consequent[i] != "NO":
feature = self.features[self.permutation[i + len(antecedent)]]
if feature.dtype == "cat":
rule = feature.name + "(" + str(consequence[i][0]) + ")"
rule = feature.name + "(" + str(consequent[i][0]) + ")"
else:
rule = feature.name + "(" + str(consequence[i]) + ")"
consequence1.append(rule)
return antecedent1, consequence1
rule = feature.name + "(" + str(consequent[i]) + ")"
consequent1.append(rule)
return antecedent1, consequent1


def _normalize(value, actual_bounds, real_bounds):
return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (actual_bounds[1] - actual_bounds[0])
return real_bounds[0] + (value - real_bounds[0]) * (real_bounds[1] - real_bounds[0]) / (
actual_bounds[1] - actual_bounds[0])


def _rule_feasible(ant, con):
Expand Down
2 changes: 1 addition & 1 deletion niaarm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __analyse_types(self):
unique_categories = None
else:
dtype = "cat"
unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower) # convert to str just in case
unique_categories = sorted(col.astype('string').unique().tolist(), key=str.lower)
min_value = None
max_value = None

Expand Down
94 changes: 47 additions & 47 deletions niaarm/niaarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,23 @@
class NiaARM(Problem):
r"""Implementation of NiaARM.

Date:
2021

Reference:
The implementation is composed of ideas found in the following papers:

I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister. [Differential evolution for association rule mining using categorical and numerical attributes](http://www.iztok-jr-fister.eu/static/publications/231.pdf) In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.

I. Fister Jr., V. Podgorelec, I. Fister. Improved Nature-Inspired Algorithms for Numeric Association Rule Mining. In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020. Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.
I. Fister Jr., A. Iglesias, A. Gálvez, J. Del Ser, E. Osaba, I Fister.
[Differential evolution for association rule mining using categorical and numerical attributes]
(http://www.iztok-jr-fister.eu/static/publications/231.pdf)
In: Intelligent data engineering and automated learning - IDEAL 2018, pp. 79-88, 2018.

License:
MIT
I. Fister Jr., V. Podgorelec, I. Fister.
Improved Nature-Inspired Algorithms for Numeric Association Rule Mining.
In: Vasant P., Zelinka I., Weber GW. (eds) Intelligent Computing and Optimization. ICO 2020.
Advances in Intelligent Systems and Computing, vol 1324. Springer, Cham.

Attributes:
features (list[Feature]): List of features.
transactions (np.ndarray): Data from transaction database.
rules (list[Rule]): Mined association rules.

"""

Expand All @@ -42,30 +45,26 @@ def __init__(self, dimension, features, transactions, alpha=0.0, beta=0.0, gamma
self.rules = []
super().__init__(dimension, 0.0, 1.0)

def rule_exists(self, antecedent, consequence):
def rule_exists(self, antecedent, consequent):
r"""Check if association rule already exists."""
for rule in self.rules:
if rule.antecedent == antecedent and rule.consequence == consequence:
if rule.antecedent == antecedent and rule.consequent == consequent:
return True
return False

def export_rules(self, path):
r"""Save all association rules found to csv file."""
try:
with open(path, 'w', newline='') as f:
writer = csv.writer(f)

# write header
writer.writerow(["Antecedent", "Consequence", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])

for rule in self.rules:
writer.writerow(
[rule.antecedent, rule.consequence, rule.fitness, rule.support, rule.confidence, rule.coverage,
rule.shrink])
except OSError:
print('OSError:', path)
else:
print("Output successfully")
with open(path, 'w', newline='') as f:
writer = csv.writer(f)

# write header
writer.writerow(["Antecedent", "consequent", "Fitness", "Support", "Confidence", "Coverage", "Shrinkage"])

for rule in self.rules:
writer.writerow(
[rule.antecedent, rule.consequent, rule.fitness, rule.support, rule.confidence, rule.coverage,
rule.shrink])
print(f"Rules exported to {path}")

def sort_rules(self):
self.rules.sort(key=lambda x: x.fitness, reverse=True)
Expand All @@ -81,70 +80,71 @@ def _evaluate(self, sol):

rule = arm.build_rule(solution)

# get antecedent and consequence of rule
# get antecedent and consequent of rule
antecedent = rule[:cut]
consequence = rule[cut:]
consequent = rule[cut:]

# check if rule is feasible
if _rule_feasible(antecedent, consequence):
if _rule_feasible(antecedent, consequent):
# get support and confidence of rule
support, confidence = arm.support_confidence(antecedent, consequence, self.transactions)
support, confidence = arm.support_confidence(antecedent, consequent, self.transactions)

if self.gamma == 0.0:
shrinkage = 0
else:
shrinkage = arm.shrinkage(antecedent, consequence)
shrinkage = arm.shrinkage(antecedent, consequent)

if self.delta == 0.0:
coverage = 0
else:
coverage = arm.coverage(antecedent, consequence)
coverage = arm.coverage(antecedent, consequent)

fitness = ((self.alpha * support) + (self.beta * confidence) + (self.gamma * shrinkage) +
(self.delta * coverage)) / (self.alpha + self.beta + self.gamma + self.delta)

# in case no attributes were selected for antecedent or consequence
if antecedent.count("NO") == len(antecedent) or consequence.count("NO") == len(consequence):
# in case no attributes were selected for antecedent or consequent
if antecedent.count("NO") == len(antecedent) or consequent.count("NO") == len(consequent):
fitness = 0.0

if support > 0.0 and confidence > 0.0:
antecedent, consequence = _fix_border(antecedent, consequence)
antecedent, consequent = _fix_border(antecedent, consequent)
# format rule; remove NO; add name of features
antecedent1, consequence1 = arm.format_rules(antecedent, consequence)
antecedent1, consequent1 = arm.format_rules(antecedent, consequent)

# save feasible rule
if not self.rule_exists(antecedent1, consequence1):
self.rules.append(Rule(antecedent1, consequence1, fitness, support, confidence, coverage, shrinkage))
if not self.rule_exists(antecedent1, consequent1):
self.rules.append(
Rule(antecedent1, consequent1, fitness, support, confidence, coverage, shrinkage))

if fitness > self.best_fitness:
self.best_fitness = fitness
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, Shrinkage:{shrinkage}')
print(f'Fitness: {fitness}, Support: {support}, Confidence:{confidence}, Coverage:{coverage}, '
f'Shrinkage:{shrinkage}')
return fitness
else:
return -1.0


def _fix_border(antecedent, consequence):
def _fix_border(antecedent, consequent):
r"""In case lower and upper bounds of interval are the same.
We need this in order to provide clean output.

Arguments:
antecedent (np.ndarray): .
consequence (np.ndarray): .
consequent (np.ndarray): .

Returns:
antecedent (array):
consequence (array):
consequent (array):
"""

for i in range(len(antecedent)):
if len(antecedent[i]) > 1:
if antecedent[i][0] == antecedent[i][1]:
antecedent[i] = antecedent[i][0]

for i in range(len(consequence)):
if len(consequence[i]) > 1:
if consequence[i][0] == consequence[i][1]:
consequence[i] = consequence[i][0]
for i in range(len(consequent)):
if len(consequent[i]) > 1:
if consequent[i][0] == consequent[i][1]:
consequent[i] = consequent[i][0]

return antecedent, consequence
return antecedent, consequent
Loading