In [1]:
from collections import defaultdict
from itertools import chain, combinations

In [2]:
def getUnion(itemSet, length):
    return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])

def getAprioriAlg(itemSetList, minSup):
    C1List = getItemSetFromList(itemSetList) ## frequest 1 itemlist
    # print(C1List)
    globalFreqItemSet = dict()# The ` ` is used to create an empty set in Python. In the code, it is
    # used to initialize an empty set called `tempItemSet`.
     
    globalItemSetWithSup = defaultdict(int) ## total counts of each item "count of existance"
    L1ItemSet = getMinSup(C1List,itemSetList,minSup,globalItemSetWithSup) ## return first level of freq item set
    # print(globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2
    while currentLSet:
        globalFreqItemSet[k-1] = currentLSet ## for k = 1 "size of set is 1"
        candidateSet = getUnion(currentLSet, k) ## create  itemsets for size k  from the current list
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        currentLSet = getMinSup(candidateSet,itemSetList,minSup,globalItemSetWithSup) ## return only sets above min support
        k = k +1

    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, .7)  ## .3 is the min acceptable confidence.
    rules.sort(key=lambda x: x[2], reverse=True) ## sort by confidence
    return globalFreqItemSet, rules

def pruning(candidateSet, prevFreqSet, length):
    # print("=--------------------" )
    # print("start pruning : ",candidateSet )
    tempCandidateSet = candidateSet.copy()
    for item in candidateSet:
        subsets = combinations(item, length)
        for subset in subsets:
            # if the subset is not in previous K-frequent get, then remove the set
            if(frozenset(subset) not in prevFreqSet):
                tempCandidateSet.remove(item)
                break

    # print("after pruning : ",candidateSet )
    return tempCandidateSet   

def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s))) ## generate all possible combinations ex[1]

def associationRule(freqItemSet, itemSetWithSup, minConf):
    rules = []
    for k, itemSet in freqItemSet.items():
        for item in itemSet:
            # print("---------")
            # print(item)
            subsets = powerset(item)
            for s in subsets:
                confidence = float(
                    itemSetWithSup[item] / itemSetWithSup[frozenset(s)])
                if(confidence > minConf):
                    rules.append([set(s), set(item.difference(s)), confidence])
    return rules

def getItemSetFromList(itemSetList):
    tempItemSet = set()
    for itemSet in itemSetList:
        for item in itemSet:
            tempItemSet.add(frozenset([item]))

    return tempItemSet

def getMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):
    freqItemSet = set()
    localItemSetWithSup = defaultdict(int)

    ## count the occurance of a set in all itemsets // we put it in global store to be able to use it  for confidence calc at the end.
    for item in itemSet:
        for itemSet in itemSetList:
            if item.issubset(itemSet):
                globalItemSetWithSup[item] += 1
                localItemSetWithSup[item] += 1


    ## calc the support for each item and add it to fequest item list.
    for item, supCount in localItemSetWithSup.items():
        support = float(supCount / len(itemSetList))
        if(support >= minSup):
            freqItemSet.add(item)

    return freqItemSet



In [3]:
dataset = [['Milk','Onion', 'Bread', 'Kidney Beans','Eggs','Yoghurt'],
       ['Fish','Onion','Bread','Eggs','Yoghurt'],
       ['Milk', 'Apples', 'Kidney Beans', 'Eggs'],
       ['Milk', 'Sugar', 'Tea Leaves', 'Kidney Beans', 'Yoghurt'],
       ['Tea Leaves','Onion','Kidney Beans', 'Ice cream', 'Eggs'],]
globalFreqItemSet, rules = getAprioriAlg(dataset,.3)
print("Association rules")
for r in rules:
    print(f"{r}")

Association rules
[{'Bread'}, {'Yoghurt'}, 1.0]
[{'Tea Leaves'}, {'Kidney Beans'}, 1.0]
[{'Onion'}, {'Eggs'}, 1.0]
[{'Bread'}, {'Eggs'}, 1.0]
[{'Bread'}, {'Onion'}, 1.0]
[{'Milk'}, {'Kidney Beans'}, 1.0]
[{'Bread'}, {'Eggs', 'Onion'}, 1.0]
[{'Eggs', 'Bread'}, {'Onion'}, 1.0]
[{'Onion', 'Bread'}, {'Eggs'}, 1.0]
[{'Eggs', 'Milk'}, {'Kidney Beans'}, 1.0]
[{'Bread'}, {'Onion', 'Yoghurt'}, 1.0]
[{'Onion', 'Yoghurt'}, {'Bread'}, 1.0]
[{'Onion', 'Bread'}, {'Yoghurt'}, 1.0]
[{'Yoghurt', 'Bread'}, {'Onion'}, 1.0]
[{'Bread'}, {'Eggs', 'Yoghurt'}, 1.0]
[{'Eggs', 'Yoghurt'}, {'Bread'}, 1.0]
[{'Eggs', 'Bread'}, {'Yoghurt'}, 1.0]
[{'Yoghurt', 'Bread'}, {'Eggs'}, 1.0]
[{'Onion', 'Kidney Beans'}, {'Eggs'}, 1.0]
[{'Eggs', 'Yoghurt'}, {'Onion'}, 1.0]
[{'Onion', 'Yoghurt'}, {'Eggs'}, 1.0]
[{'Milk', 'Yoghurt'}, {'Kidney Beans'}, 1.0]
[{'Kidney Beans', 'Yoghurt'}, {'Milk'}, 1.0]
[{'Bread'}, {'Eggs', 'Onion', 'Yoghurt'}, 1.0]
[{'Eggs', 'Bread'}, {'Onion', 'Yoghurt'}, 1.0]
[{'Eggs', 'Yoghurt'}, {'Onion', 'Br

In [7]:
baskets = []
with open("true","r") as f:
    while True:
        line = f.readline()
        baskets.append(line)
        if not line:
            break
baskets[10:100]

['  \n',
 '  <link rel="icon" type="image/x-icon" href="https://instructure-uploads-eu.s3-eu-west-1.amazonaws.com/account_87790000000000001/attachments/2/favicon.ico?AWSAccessKeyId=AKIAJE5F5SJN3PUW3VNQ&amp;Expires=1935715252&amp;Signature=5iVLmF0ytONDSnXy8I3F8sntbk4%3D&amp;response-cache-control=Cache-Control%3Amax-age%3D473364000%2C%20public&amp;response-expires=473364000" />\n',
 '  <link rel="apple-touch-icon" href="https://instructure-uploads-eu.s3-eu-west-1.amazonaws.com/account_87790000000000001/attachments/3/kth_180.png?AWSAccessKeyId=AKIAJE5F5SJN3PUW3VNQ&amp;Expires=1935715252&amp;Signature=XSlSY%2FfwjBM%2F%2F%2BsLh93hxhlfBH4%3D&amp;response-cache-control=Cache-Control%3Amax-age%3D473364000%2C%20public&amp;response-expires=473364000" />\n',
 '  <link rel="stylesheet" href="https://du11hjcvx0uqb.cloudfront.net/dist/brandable_css/61214b82fbdb8adc98308672edcc2c3e/variables-7dd4b80918af0e0218ec0229e4bd5873.css" media="all" />\n',
 '  <link rel="stylesheet" href="https://du11hjcvx0u