Skip to content

Commit

Permalink
flake8 cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
bartdag committed Aug 10, 2015
1 parent b71eb40 commit 3dc92e9
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 70 deletions.
13 changes: 8 additions & 5 deletions pymining/assocrules.py
Expand Up @@ -9,13 +9,15 @@ def mine_assoc_rules(isets, min_support=2, min_confidence=0.5):
for item in key:
left = key.difference([item])
right = frozenset([item])
_mine_assoc_rules(left, right, support, visited, isets,
min_support, min_confidence, rules)
_mine_assoc_rules(
left, right, support, visited, isets,
min_support, min_confidence, rules)

return rules


def _mine_assoc_rules(left, right, rule_support, visited, isets, min_support,
def _mine_assoc_rules(
left, right, rule_support, visited, isets, min_support,
min_confidence, rules):
if (left, right) in visited or len(left) < 1:
return
Expand All @@ -30,5 +32,6 @@ def _mine_assoc_rules(left, right, rule_support, visited, isets, min_support,
for item in left:
new_left = left.difference([item])
new_right = right.union([item])
_mine_assoc_rules(new_left, new_right, rule_support, visited, isets,
min_support, min_confidence, rules)
_mine_assoc_rules(
new_left, new_right, rule_support, visited, isets,
min_support, min_confidence, rules)
2 changes: 1 addition & 1 deletion pymining/compat.py
@@ -1,6 +1,6 @@
import sys

if sys.version_info[0] < 3:
range = xrange
range = xrange # noqa
else:
range = range
64 changes: 34 additions & 30 deletions pymining/itemmining.py
@@ -1,7 +1,8 @@
from collections import defaultdict, deque, OrderedDict


def _sort_transactions_by_freq(transactions, key_func, reverse_int=False,
def _sort_transactions_by_freq(
transactions, key_func, reverse_int=False,
reverse_ext=False, sort_ext=True):
key_seqs = [{key_func(i) for i in sequence} for sequence in transactions]
frequencies = get_frequencies(key_seqs)
Expand Down Expand Up @@ -45,7 +46,8 @@ def get_sam_input(transactions, key_func=None):
'''

if key_func is None:
key_func = lambda e: e
def key_func(e):
return e

(asorted_seqs, _) = _sort_transactions_by_freq(transactions, key_func)

Expand Down Expand Up @@ -113,7 +115,6 @@ def _sam(sam_input, fis, report, min_support):
if s >= min_support:
fis.add(i[1])
report[frozenset(fis)] = s
#print('{0} with support {1}'.format(fis, s))
n = n + 1 + _sam(c, fis, report, min_support)
fis.remove(i[1])
return n
Expand Down Expand Up @@ -160,10 +161,11 @@ def get_relim_input(transactions, key_func=None):
# relim_input[x][1][x][1] = rest of transaction prefixed by key_freq

if key_func is None:
key_func = lambda e: e
def key_func(e):
return e

(asorted_seqs, frequencies) = _sort_transactions_by_freq(transactions,
key_func)
(asorted_seqs, frequencies) = _sort_transactions_by_freq(
transactions, key_func)
key_map = _get_key_map(frequencies)

relim_input = _new_relim_input(len(key_map), key_map)
Expand Down Expand Up @@ -207,15 +209,12 @@ def relim(rinput, min_support=2):
def _relim(rinput, fis, report, min_support):
(relim_input, key_map) = rinput
n = 0
# Maybe this one isn't necessary
#a = deque(relim_input)
a = relim_input
while len(a) > 0:
item = a[-1][0][1]
s = a[-1][0][0]
if s >= min_support:
fis.add(item[1])
#print('Report {0} with support {1}'.format(fis, s))
report[frozenset(fis)] = s
b = _new_relim_input(len(a) - 1, key_map)
rest_lists = a[-1][1]
Expand Down Expand Up @@ -288,7 +287,8 @@ def _create_child(self, child_key, heads, last_insert):

return child

def get_cond_tree(self, child, count, visited, heads, last_insert,
def get_cond_tree(
self, child, count, visited, heads, last_insert,
dont_create=False):

key = self.key
Expand All @@ -300,13 +300,13 @@ def get_cond_tree(self, child, count, visited, heads, last_insert,
try:
cond_node = visited[self]
except Exception:
cond_node = self._create_cond_child(visited, heads,
last_insert)
cond_node = self._create_cond_child(
visited, heads, last_insert)

if self.parent is not None:
# Recursion
parent_node = self.parent.get_cond_tree(cond_node, count, visited,
heads, last_insert, False)
parent_node = self.parent.get_cond_tree(
cond_node, count, visited, heads, last_insert, False)
if cond_node is not None:
cond_node.count += count
heads[key][1] += count
Expand Down Expand Up @@ -339,7 +339,8 @@ def _find_ancestor(self, heads, min_support):
ancestor = ancestor.parent
return ancestor

def prune_me(self, from_head_list, visited_parents, merged_before,
def prune_me(
self, from_head_list, visited_parents, merged_before,
merged_now, heads, min_support):
try:
# Parent was merged
Expand Down Expand Up @@ -367,8 +368,9 @@ def prune_me(self, from_head_list, visited_parents, merged_before,

def __str__(self):
child_str = ','.join([str(key) for key in self.children])
return '{0} ({1}) [{2}] {3}'.format(self.key, self.count, child_str,
self.next_node is not None)
return '{0} ({1}) [{2}] {3}'.format(
self.key, self.count, child_str,
self.next_node is not None)

def __repr__(self):
return self.__str__()
Expand All @@ -385,12 +387,14 @@ def get_fptree(transactions, key_func=None, min_support=2):
'''

if key_func is None:
key_func = lambda e: e
def key_func(e):
return e

asorted_seqs, frequencies = _sort_transactions_by_freq(transactions,
key_func, True, False, False)
transactions = [[item[1] for item in aseq if item[0] >= min_support] for
aseq in asorted_seqs]
asorted_seqs, frequencies = _sort_transactions_by_freq(
transactions, key_func, True, False, False)
transactions = [
[item[1] for item in aseq if item[0] >= min_support] for
aseq in asorted_seqs]

root = FPNode(FPNode.root_key, None)
heads = {}
Expand All @@ -403,7 +407,6 @@ def get_fptree(transactions, key_func=None, min_support=2):
new_heads = OrderedDict()
for (head, head_support) in sorted_heads:
new_heads[head.key] = (head, head_support)
#new_heads = tuple(heads.values())

return (root, new_heads)

Expand All @@ -419,8 +422,9 @@ def _create_cond_tree(head_node, new_heads, pruning):
visited = {}
last_insert = {}
while head_node is not None:
head_node.get_cond_tree(None, head_node.count, visited, new_heads,
last_insert, True)
head_node.get_cond_tree(
None, head_node.count, visited, new_heads,
last_insert, True)
head_node = head_node.next_node
return new_heads

Expand All @@ -436,8 +440,9 @@ def _prune_cond_tree(heads, min_support):
while node is not None:
# If the node is merged, we lose the next_node
next_node = node.next_node
node.prune_me(previous_node, visited_parents, merged_before,
merged_now, heads, min_support)
node.prune_me(
previous_node, visited_parents, merged_before,
merged_now, heads, min_support)
if node.next_node is not None:
# Only change the previous node if it wasn't merged.
previous_node = node
Expand Down Expand Up @@ -470,13 +475,12 @@ def _fpgrowth(fptree, fis, report, min_support=2, pruning=True):
continue

fis.add(head_node.key)
#print('Report {0} with support {1}'.format(fis, head_support))
report[frozenset(fis)] = head_support
new_heads = _init_heads(heads)
_create_cond_tree(head_node, new_heads, pruning)
if pruning:
_prune_cond_tree(new_heads, min_support)
n = n + 1 + _fpgrowth((None, new_heads), fis, report, min_support,
pruning)
n = n + 1 + _fpgrowth(
(None, new_heads), fis, report, min_support, pruning)
fis.remove(head_node.key)
return n
22 changes: 13 additions & 9 deletions pymining/perftesting.py
Expand Up @@ -40,10 +40,11 @@ def get_default_transactions_alt():

def get_default_sequences():
'''Returns a small list of sequences. For testing purpose.'''
return ( 'caabc', 'abcb', 'cabc', 'abbca' )
return ('caabc', 'abcb', 'cabc', 'abbca')


def get_random_transactions(transaction_number=500,
def get_random_transactions(
transaction_number=500,
max_item_per_transaction=100, max_key_length=50,
key_alphabet=string.ascii_letters, universe_size=1000):
'''Generates a random list of `transaction_number` transactions containing
Expand All @@ -62,14 +63,17 @@ def get_random_transactions(transaction_number=500,
words = []
for _ in range(universe_size):

word = ''.join((random.choice(key_alphabet) for x in
word = ''.join((
random.choice(key_alphabet) for x in
range(random.randint(1, max_key_length))))
words.append(word)

transactions = []
for _ in range(transaction_number):
transaction = {word for word in random.sample(words, random.randint(0,
max_item_per_transaction))}
transaction = {
word for word in
random.sample(words, random.randint(0, max_item_per_transaction))
}
transactions.append(transaction)

return transactions
Expand Down Expand Up @@ -146,17 +150,17 @@ def test_itemset_perf(perf_round=10, sparse=True, seed=None):

start = time()
for i in range(perf_round):
(n, report) = test_fpgrowth(False, transactions, support,
pruning=True)
(n, report) = test_fpgrowth(
False, transactions, support, pruning=True)
print('Done round {0}'.format(i))
end = time()
print('FP-Growth (pruning on) took: {0}'.format(end - start))
print('Computed {0} frequent item sets.'.format(n))

start = time()
for i in range(perf_round):
(n, report) = test_fpgrowth(False, transactions, support,
pruning=False)
(n, report) = test_fpgrowth(
False, transactions, support, pruning=False)
print('Done round {0}'.format(i))
end = time()
print('FP-Growth (pruning off) took: {0}'.format(end - start))
Expand Down
45 changes: 22 additions & 23 deletions setup.py
Expand Up @@ -2,31 +2,30 @@

from distutils.core import setup

setup(name='pymining',
version='0.1',
description='Small collection of data mining algorithms',
long_description=
'''
setup(
name='pymining',
version='0.1',
description='Small collection of data mining algorithms',
long_description='''
pymining is a small collection of data mining algorithms implemented in Python
(no C extension). This is mainly useful for environments without support for C.
All algorithms come from the scientific literature.
''',
author='Barthelemy Dagenais',
author_email='barthe@users.sourceforge.net',
license='BSD License',
url='https://github.com/bartdag/pymining',
packages=['pymining'],
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Topic :: Software Development :: Libraries',
],
)

author='Barthelemy Dagenais',
author_email='barthe@users.sourceforge.net',
license='BSD License',
url='https://github.com/bartdag/pymining',
packages=['pymining'],
classifiers=[
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Topic :: Software Development :: Libraries',
],
)
5 changes: 3 additions & 2 deletions tests/assocrules_tests.py
@@ -1,6 +1,7 @@
import unittest
from pymining import itemmining, perftesting, assocrules


class TestAssocRule(unittest.TestCase):

def testDefaultSupportConf(self):
Expand All @@ -26,8 +27,8 @@ def testConfidence075(self):
ts1 = perftesting.get_default_transactions()
relim_input = itemmining.get_relim_input(ts1)
report = itemmining.relim(relim_input, 2)
rules = assocrules.mine_assoc_rules(report, min_support=2,
min_confidence=0.75)
rules = assocrules.mine_assoc_rules(
report, min_support=2, min_confidence=0.75)
self.assertEqual(8, len(rules))

a_rule = (frozenset(['b']), frozenset(['d']), 6, 0.75)
Expand Down

0 comments on commit 3dc92e9

Please sign in to comment.