In [11]:
from csv import reader
from collections import defaultdict
from itertools import chain, combinations
from optparse import OptionParser
import sys
from time import perf_counter



# This function takes itemset as input and returns the frequent itemset by checking the support values
def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):
    freqItemSet = set()
    localItemSetWithSup = defaultdict(int)

    for item in itemSet:
        for itemSet in itemSetList:
            if item.issubset(itemSet):
                globalItemSetWithSup[item] += 1
                localItemSetWithSup[item] += 1

    for item, supCount in localItemSetWithSup.items():
        support = float(supCount / len(itemSetList))
        if(support >= minSup):
            freqItemSet.add(item)

    return freqItemSet



# This function takes frequent itemset as input and return all the rules satisfying the minimum confidence values.

def getRules(freqItemSet, itemSetWithSup, minConf):
    rules = []
    for k, itemSet in freqItemSet.items():
        for item in itemSet:
            subsets = chain.from_iterable(combinations(item, r) for r in range(1, len(item)))
            for s in subsets:
                confidence = float(itemSetWithSup[item] / itemSetWithSup[frozenset(s)])
                if(confidence > minConf):
                    rules.append([set(s), set(item.difference(s)), confidence])
    return rules



#This function reads the input file and created the frequent 1 itemset and then iteratively called the getAboveMinSup
#function until we get an empty frequent set and then return the rules by calling getRules function
def apriori(fname, minSup, minConf):

    itemSetList = []
    C1ItemSet = set()

    with open(fname, 'r') as file:
        csv_reader = reader(file)
        for line in csv_reader:
            line = list(filter(None, line))
            line=line[0].strip(" ").split(" ")
            record = set(line)
            for item in record:
                C1ItemSet.add(frozenset([item]))
            itemSetList.append(record)
    
    
    globalFreqItemSet = dict()
    globalItemSetWithSup = defaultdict(int)

    L1ItemSet = getAboveMinSup(C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2

    
    while(currentLSet):
        
        globalFreqItemSet[k-1] = currentLSet
        candidateSet=set([i.union(j) for i in currentLSet for j in currentLSet if len(i.union(j)) == k])
        
        tempCandidateSet = candidateSet.copy()
        for item in candidateSet:
            subsets = combinations(item, k-1)
            for subset in subsets:
                if(frozenset(subset) not in currentLSet):
                    tempCandidateSet.remove(item)
                    break
        candidateSet=tempCandidateSet
        
        currentLSet = getAboveMinSup(candidateSet, itemSetList, minSup, globalItemSetWithSup)
        k += 1

    rules = getRules(globalFreqItemSet, globalItemSetWithSup, minConf)

    return globalFreqItemSet, rules

if __name__ == "__main__":
    inpfile = "dataset.txt"
    minSup = 0.2
    minConf=0.5
    t1_start = perf_counter()
    freqItemSet, rules = apriori(inpfile,minSup,minConf)
    t1_stop = perf_counter()
    print("Elapsed time during the whole program in seconds:",t1_stop-t1_start)
    for rule in rules:
        print(rule[0],"---->",rule[1],"with confidence",rule[2])
        

Elapsed time during the whole program in seconds: 0.0011774999998124258
{'38'} ----> {'39'} with confidence 1.0
{'39'} ----> {'38'} with confidence 0.5454545454545454
{'48'} ----> {'39'} with confidence 0.75
{'39'} ----> {'48'} with confidence 0.5454545454545454


In [12]:

if __name__ == "__main__":
    inpfile = "dataset.txt"
    minSup=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
    minConf=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
    #print("sup","conf","no. of rules","time")
    
    for j in range(len(minConf)):
        for i in range(len(minSup)):
        
            
            t1_start = perf_counter()
            freqItemSet, rules = apriori(inpfile,minSup[i],minConf[j])
            t1_stop = perf_counter()
            print(minSup[i],minConf[j],len(rules),round((t1_stop-t1_start)*1000,2))
        print()
            
            

0.1 0.1 32 1.33
0.2 0.1 4 1.35
0.3 0.1 0 1.61
0.4 0.1 0 1.27
0.5 0.1 0 0.94
0.6 0.1 0 1.91
0.7 0.1 0 1.15
0.8 0.1 0 1.14
0.9 0.1 0 1.09
1.0 0.1 0 1.43

0.1 0.2 32 1.9
0.2 0.2 4 1.25
0.3 0.2 0 1.2
0.4 0.2 0 1.02
0.5 0.2 0 0.99
0.6 0.2 0 1.0
0.7 0.2 0 0.99
0.8 0.2 0 0.99
0.9 0.2 0 1.0
1.0 0.2 0 1.0

0.1 0.3 29 1.28
0.2 0.3 4 1.04
0.3 0.3 0 1.48
0.4 0.3 0 1.83
0.5 0.3 0 0.9
0.6 0.3 0 1.0
0.7 0.3 0 1.02
0.8 0.3 0 0.99
0.9 0.3 0 1.0
1.0 0.3 0 0.98

0.1 0.4 27 1.25
0.2 0.4 4 1.04
0.3 0.4 0 1.05
0.4 0.4 0 1.01
0.5 0.4 0 1.02
0.6 0.4 0 0.99
0.7 0.4 0 1.05
0.8 0.4 0 1.07
0.9 0.4 0 1.01
1.0 0.4 0 1.03

0.1 0.5 17 1.23
0.2 0.5 4 1.07
0.3 0.5 0 1.01
0.4 0.5 0 1.01
0.5 0.5 0 0.97
0.6 0.5 0 1.0
0.7 0.5 0 0.97
0.8 0.5 0 0.99
0.9 0.5 0 0.99
1.0 0.5 0 1.0

0.1 0.6 15 1.27
0.2 0.6 2 1.11
0.3 0.6 0 1.03
0.4 0.6 0 1.0
0.5 0.6 0 1.0
0.6 0.6 0 0.98
0.7 0.6 0 0.96
0.8 0.6 0 1.0
0.9 0.6 0 0.98
1.0 0.6 0 0.96

0.1 0.7 10 1.22
0.2 0.7 2 1.06
0.3 0.7 0 1.02
0.4 0.7 0 0.99
0.5 0.7 0 1.07
0.6 0.7 0 0.98
0.7 0.7 0 