In [1]:
import operator
from itertools import combinations

In [2]:
file_name = 'browsing.txt'
N = 100

def browsing_history():
    with open(file_name, 'r') as f:
        for line in f:
            yield line.rstrip().split(' ')
        f.close()

In [3]:
def count_history(browsing_reader = browsing_history()):
    count = {}
    frequent_item = set()
    for record in browsing_reader:
        for item in record:
            if item not in count:
                count[item] = 1
            else:
                count[item] += 1
    
    frequent_count = {}
    for item_id, value in count.items():
        if value >= N:
            frequent_item.add(item_id)
            frequent_count[item_id] = value
        
    return frequent_item, frequent_count                     

In [4]:
def count_pairs(frequent_items, browsing_reader=browsing_history()):
    pairs_count = dict()
    for record in browsing_reader:
        items_in_record = list()
        for item in record:
            if item in frequent_items:
                items_in_record.append(item)  
        
        pair_set = {tuple(sorted(items)) for items in combinations(items_in_record, 2)}
                    
        for pair in pair_set:
            if pair not in pairs_count:
                pairs_count[pair] = 1
            else:
                pairs_count[pair] += 1
                    
    filtered_pair_count = {}       
    for pair, count in pairs_count.items():
        if count >= N: filtered_pair_count[tuple(pair)] = count
                
    return filtered_pair_count         

In [5]:
frequent_items, frequent_count = count_history()
filtered_pair_count = count_pairs(frequent_items)

In [6]:
# calculate association rule
association_rule = {}
for pair, count in filtered_pair_count.items():
    association_rule[pair] = count/frequent_count[pair[0]]
    association_rule[pair[::-1]] = count/frequent_count[pair[1]]

# sort association_rule
sorted_association_rule = sorted(association_rule.items(), key=operator.itemgetter(1))
sorted_association_rule.reverse()

In [7]:
sorted_association_rule

[(('DAI93865', 'FRO40251'), 1.0),
 (('GRO85051', 'FRO40251'), 0.999176276771005),
 (('GRO38636', 'FRO40251'), 0.9906542056074766),
 (('ELE12951', 'FRO40251'), 0.9905660377358491),
 (('DAI88079', 'FRO40251'), 0.9867256637168141),
 (('FRO92469', 'FRO40251'), 0.983510011778563),
 (('DAI43868', 'SNA82528'), 0.972972972972973),
 (('DAI23334', 'DAI62779'), 0.9545454545454546),
 (('ELE92920', 'DAI62779'), 0.7326649958228906),
 (('DAI53152', 'FRO40251'), 0.717948717948718),
 (('SNA18336', 'DAI62779'), 0.7136812411847673),
 (('ELE55848', 'GRO32086'), 0.7094594594594594),
 (('GRO89004', 'ELE25077'), 0.698051948051948),
 (('GRO81647', 'GRO73461'), 0.6775510204081633),
 (('DAI37288', 'ELE32164'), 0.6464088397790055),
 (('SNA18336', 'ELE92920'), 0.6417489421720733),
 (('ELE32244', 'ELE66600'), 0.6403508771929824),
 (('FRO47962', 'DAI75645'), 0.6176470588235294),
 (('FRO73056', 'GRO44993'), 0.6016483516483516),
 (('FRO19221', 'DAI62779'), 0.5976714100905562),
 (('SNA44451', 'DAI18527'), 0.5828571428

In [37]:
# construct set of triples
triples = set()
for pair1 in filtered_pair_count.keys():
    for pair2 in filtered_pair_count.keys():
        
        if len(set(pair1).intersection(pair2)) == 1:
            triple = tuple(sorted(list(set(pair1 + pair2))))
            if triple not in triples: triples.add(triple)

In [38]:
def count_triples(triple_set, frequent_items, browsing_reader=browsing_history()):
    triples_count = dict()
    for triple in triple_set:
        triples_count[triple] = 0
    
    for record in browsing_reader:
        items_in_record = list()
        for item in record:
            if item in frequent_items:
                items_in_record.append(item)
                
        triples_in_record = {tuple(sorted(items)) for items in combinations(items_in_record, 3)}
        for triple in triples_in_record:
            if triple in triple_set: triples_count[triple] += 1
                    
    filtered_triples_count = {}       
    for triple, count in triples_count.items():
        if count >= N: filtered_triples_count[triple] = count
                
    return filtered_triples_count    

In [39]:
filtered_triples_count = count_triples(triples, frequent_items)

In [62]:
association_rule_for_triple = {}
for triple, count in filtered_triples_count.items():
    association_rule_for_triple[(tuple(sorted((triple[0], triple[1]))), triple[2])] = count/filtered_pair_count[tuple(sorted([triple[0], triple[1]]))]
    association_rule_for_triple[(tuple(sorted((triple[2], triple[1]))), triple[0])] = count/filtered_pair_count[tuple(sorted([triple[2], triple[1]]))]
    association_rule_for_triple[(tuple(sorted((triple[2], triple[0]))), triple[1])] = count/filtered_pair_count[tuple(sorted([triple[2], triple[0]]))]

# sort association_rule
sorted_association_rule_for_triple = sorted(association_rule_for_triple.items(), key=operator.itemgetter(0))
sorted_association_rule_for_triple = sorted(sorted_association_rule_for_triple, key=operator.itemgetter(1), reverse=True)

In [63]:
sorted_association_rule_for_triple

[((('DAI23334', 'ELE92920'), 'DAI62779'), 1.0),
 ((('DAI31081', 'GRO85051'), 'FRO40251'), 1.0),
 ((('DAI55911', 'GRO85051'), 'FRO40251'), 1.0),
 ((('DAI62779', 'DAI88079'), 'FRO40251'), 1.0),
 ((('DAI75645', 'GRO85051'), 'FRO40251'), 1.0),
 ((('ELE17451', 'GRO85051'), 'FRO40251'), 1.0),
 ((('ELE20847', 'FRO92469'), 'FRO40251'), 1.0),
 ((('ELE20847', 'GRO85051'), 'FRO40251'), 1.0),
 ((('ELE26917', 'GRO85051'), 'FRO40251'), 1.0),
 ((('FRO53271', 'GRO85051'), 'FRO40251'), 1.0),
 ((('GRO21487', 'GRO85051'), 'FRO40251'), 1.0),
 ((('GRO38814', 'GRO85051'), 'FRO40251'), 1.0),
 ((('GRO73461', 'GRO85051'), 'FRO40251'), 1.0),
 ((('GRO85051', 'SNA45677'), 'FRO40251'), 1.0),
 ((('GRO85051', 'SNA80324'), 'FRO40251'), 1.0),
 ((('DAI62779', 'GRO85051'), 'FRO40251'), 0.9973821989528796),
 ((('DAI75645', 'DAI88079'), 'FRO40251'), 0.9932885906040269),
 ((('DAI88079', 'GRO73461'), 'FRO40251'), 0.993103448275862),
 ((('DAI88079', 'ELE17451'), 'FRO40251'), 0.9919354838709677),
 ((('FRO92469', 'GRO73461'), 

In [42]:
filtered_triples_count[tuple(sorted(['DAI23334', 'ELE92920', 'DAI62779']))]

143

In [30]:
filtered_pair_count[tuple(sorted(['DAI23334', 'ELE92920']))]

143