##### Load Libraries

In [1]:
import sys

##### Load Input.txt

In [2]:
def load_transactions(filename='input.txt'):
    transactions=[]
    with open(filename, "r") as file:
        for line in file:
            items = line[:-1].split('\t')
            transactions.append(list(map(int, items)))
    return transactions

In [3]:
filename = 'input.txt'
transactions = load_transactions(filename)

In [4]:
transactions[0]

[7, 14]

##### Make Set List (size of set=1)

In [5]:
def init_sets(transactions):
    # find all items
    items={}
    for idx, transaction in enumerate(transactions):
        for item in transaction:
            if item in items:
                items[item]['nums']+=1
                items[item]['idxs'].append(idx)
            else:
                items[item]={'nums':1,'idxs':[idx]}

    # make set list      
    sets=[]
    for item in items.keys():
        sets.append({'items':{item}, 'nums':items[item]['nums'], 'idxs':items[item]['idxs']})
    return sets

In [6]:
sets = init_sets(transactions)

In [7]:
sets[0]

{'items': {7},
 'nums': 120,
 'idxs': [0,
  3,
  5,
  8,
  19,
  20,
  21,
  28,
  29,
  32,
  51,
  53,
  55,
  60,
  63,
  65,
  68,
  69,
  70,
  73,
  76,
  77,
  83,
  84,
  97,
  100,
  105,
  108,
  109,
  111,
  115,
  138,
  141,
  145,
  149,
  150,
  168,
  171,
  174,
  178,
  180,
  183,
  186,
  188,
  190,
  192,
  195,
  198,
  201,
  202,
  209,
  214,
  216,
  217,
  218,
  219,
  225,
  231,
  238,
  239,
  242,
  249,
  254,
  257,
  259,
  260,
  264,
  269,
  271,
  282,
  288,
  292,
  293,
  295,
  301,
  305,
  309,
  317,
  328,
  329,
  337,
  338,
  340,
  341,
  343,
  347,
  350,
  357,
  366,
  372,
  378,
  381,
  386,
  387,
  389,
  394,
  395,
  405,
  406,
  407,
  412,
  417,
  422,
  425,
  428,
  434,
  443,
  448,
  455,
  457,
  458,
  461,
  462,
  469,
  480,
  486,
  487,
  490,
  492,
  497]}

##### Run Apriori

In [8]:
def run_apriori(transactions):
    MIN_SUPPORT=0.05 # default
    
    # get sets
    sets = init_sets(transactions)
    rules = []    

    start_point=0
    end_point=len(sets)
    while(start_point < end_point):
        # get idx_A and set_A
        for idx_A, set_A in enumerate(sets[start_point:], start_point):
            # get idx_B and set_B
            for idx_B, set_B in enumerate(sets[:idx_A]):

                # check if set_A and set_B is mutually exclusive.
                if set_A['items']&set_B['items'] == set({}):

                    # make new set
                    new_set=set_A['items']|set_B['items']
                    nums=0
                    idxs=[]

                    # find current set from transactions
                    for idx in set_A['idxs']:
                        if set(transactions[idx])&set_B['items']==set_B['items']:
                            nums+=1
                            idxs.append(idx)

                    # MIN_SUPPORT
                    if nums>=int(len(transactions)* MIN_SUPPORT):
                        sets.append({'items':new_set, 'nums':nums, 'idxs':idxs})
                        rules.append({
                            'items_1':set_A['items'], 
                            'items_2':set_B['items'],
                            'support':set_A['nums']/len(transactions),
                            'confidence':nums/set_A['nums'],
                            'probability_both':nums/len(transactions),
                            'lift':(nums*len(transactions))/(set_A['nums']*set_B['nums'])
                        })
                        rules.append({
                            'items_1':set_B['items'], 
                            'items_2':set_A['items'],
                            'support':set_B['nums']/len(transactions),
                            'confidence':nums/set_B['nums'],
                            'probability_both':nums/len(transactions),
                            'lift':(nums*len(transactions))/(set_B['nums']*set_A['nums'])
                        })
        start_point=end_point
        end_point=len(sets)

    return rules

In [9]:
rules = run_apriori(transactions)

In [10]:
print(rules[0])

{'items_1': {14}, 'items_2': {7}, 'support': 0.256, 'confidence': 0.296875, 'probability_both': 0.076, 'lift': 1.2369791666666667}


##### Save results to output.txt

In [11]:
def save_rules(rules, filename='output.txt'):
    with open(filename, "w") as file:
        for rule in rules:
            file.write(
                str(rule['items_1'])+'\t'+
                str(rule['items_2'])+'\t'+
                str('%.2f' % round(rule['probability_both']*100,2)) + '\t' +
                str('%.2f' % round(rule['confidence']*100,2)) + '\n'
            )

In [12]:
filename='output.txt'
save_rules(rules, filename)