##### Load Libraries

In [1]:
import numpy as np
MIN_SUPPORT=25 # default

##### Load Input.txt

In [2]:
def load_transactions(filename):
    transactions=[]
    with open(filename, "r") as file:
        for line in file:
            items = line[:-1].split('\t')
            transactions.append(items)
    return transactions

In [3]:
filename = 'input.txt'
transactions = load_transactions(filename)

##### Make Set List (size of set=1)

In [4]:
def init_sets(transactions):
    # find all items
    items={}
    for idx, transaction in enumerate(transactions):
        for item in transaction:
            if item in items:
                items[item]['nums']+=1
                items[item]['idxs'].append(idx)
            else:
                items[item]={'nums':1,'idxs':[idx]}
                
    # make set list      
    sets=[]
    for item in items.keys():
        sets.append({'items':{item}, 'nums':items[item]['nums'], 'idxs':items[item]['idxs']})
    return sets

In [5]:
sets = init_sets(transactions)

##### Run Apriori

In [6]:
def run_apriori(transactions):

    # get sets
    sets = init_sets(transactions)
    rules = []    
    
    start_point=0
    end_point=len(sets)
    while(start_point < end_point):

        # get idx_A and set_A
        for idx_A, set_A in enumerate(sets[start_point:], start_point):
            # get idx_B and set_B
            for idx_B, set_B in enumerate(sets[:idx_A]):

                # check if set_A and set_B is mutually exclusive.
                if set_A['items']&set_B['items'] == set({}):

                    # make new set
                    new_set=set_A['items']|set_B['items']
                    nums=0
                    idxs=[]

                    # find current set from transactions
                    for idx in set_A['idxs']:
                        if set(transactions[idx])&set_B['items']==set_B['items']:
                            nums+=1
                            idxs.append(idx)

                    # MIN_SUPPORT
                    if nums>=MIN_SUPPORT:
                        sets.append({'items':new_set, 'nums':nums, 'idxs':idxs})
                        rules.append({
                            'items_1':set_A['items'], 
                            'items_2':set_B['items'],
                            'support':set_A['nums']/len(transactions),
                            'confidence':nums/set_A['nums'],
                            'lift':(nums*len(transactions))/(set_A['nums']*set_B['nums'])
                        })
                        rules.append({
                            'items_1':set_B['items'], 
                            'items_2':set_A['items'],
                            'support':set_B['nums']/len(transactions),
                            'confidence':nums/set_B['nums'],
                            'lift':(nums*len(transactions))/(set_B['nums']*set_A['nums'])
                        })
        start_point=end_point
        end_point=len(sets)
    
    return rules


In [7]:
rules = run_apriori(transactions)

In [8]:
print(rules[:100])

[{'items_1': {'14'}, 'items_2': {'7'}, 'support': 0.256, 'confidence': 0.296875, 'lift': 1.2369791666666667}, {'items_1': {'7'}, 'items_2': {'14'}, 'support': 0.24, 'confidence': 0.31666666666666665, 'lift': 1.2369791666666667}, {'items_1': {'9'}, 'items_2': {'7'}, 'support': 0.278, 'confidence': 0.2446043165467626, 'lift': 1.0191846522781776}, {'items_1': {'7'}, 'items_2': {'9'}, 'support': 0.24, 'confidence': 0.2833333333333333, 'lift': 1.0191846522781776}, {'items_1': {'9'}, 'items_2': {'14'}, 'support': 0.278, 'confidence': 0.30935251798561153, 'lift': 1.2084082733812949}, {'items_1': {'14'}, 'items_2': {'9'}, 'support': 0.256, 'confidence': 0.3359375, 'lift': 1.2084082733812949}, {'items_1': {'18'}, 'items_2': {'7'}, 'support': 0.276, 'confidence': 0.2536231884057971, 'lift': 1.0567632850241546}, {'items_1': {'7'}, 'items_2': {'18'}, 'support': 0.24, 'confidence': 0.2916666666666667, 'lift': 1.0567632850241546}, {'items_1': {'18'}, 'items_2': {'14'}, 'support': 0.276, 'confidence'