In [1]:


import sys

from itertools import chain, combinations
from collections import defaultdict



def subsets(arr):
    """ Returns non empty subsets of arr"""
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])


def returnItemsWithMinSupport(itemSet, transactionList, minSupport, freqSet):
   
    _itemSet = set()
    localSet = defaultdict(int)

    for item in itemSet:
        for transaction in transactionList:
            if item.issubset(transaction):
                freqSet[item] += 1
                localSet[item] += 1
    # print("Frequency Set")
    # for key, value in freqSet.items():
    #     print(key,"->",value)

    for item, count in localSet.items():
        support = count

        if support >= minSupport:
            _itemSet.add(item)

    return _itemSet


def joinSet(itemSet, length):
   
    return set(
        [i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length]
    )


def getItemSetTransactionList(data_iterator):
    transactionList = list()
    itemSet = set()
    for record in data_iterator:
       
        transaction = record
        
        transactionList.append(transaction)
        for item in transaction:
            itemSet.add(frozenset([item]))  # Generate 1-itemSets
   
    return itemSet, transactionList

def intermediate_results(table,index_val,flg):
    L_list=[]
    C_list=[]

    if flg==0:
        
        for index,val in enumerate(table):

            C_list.append(list(val))
        print()
        print("C{} Table".format(index_val))
        print(C_list)

    else:
        for index,val in enumerate(table):

            L_list.append(list(val))

        print()
        print("L{} Table".format(index_val))
        print(L_list)



    
    

def runApriori(data_iter, minSupport, minConfidence):
  
    itemSet, transactionList = getItemSetTransactionList(data_iter)
    

    freqSet = defaultdict(int)
    largeSet = dict()

    assocRules = dict()


    oneCSet = returnItemsWithMinSupport(itemSet, transactionList, minSupport, freqSet)
    C1_list=[]
    L1_list=[]
    for index,val in enumerate(oneCSet):

        C1_list.append(list(val))
    print()
    print("*********  Intermediate Results  *************")
    print()
    print("C1 Table")
    print(C1_list)

    currentLSet = oneCSet
    for index,val in enumerate(currentLSet):

        L1_list.append(list(val))

    print()
    print("L1 Table")
    print(L1_list)

    k = 2
    while currentLSet != set([]):
        print()
        print("***********************************************")
        largeSet[k - 1] = currentLSet
        currentLSet = joinSet(currentLSet, k)
        intermediate_results(currentLSet,k,0)
        currentCSet = returnItemsWithMinSupport(
            currentLSet, transactionList, minSupport, freqSet
        )
       
        currentLSet = currentCSet
        print("{} steps completed".format(k))
        intermediate_results(currentCSet,k,1)
        k = k + 1

    def getSupport(item):
        """local function which Returns the support of an item"""
        return freqSet[item]

    toRetItems = []
    for key, value in largeSet.items():
        toRetItems.extend([(tuple(item), getSupport(item)) for item in value])

    toRetRules = []
    for key, value in list(largeSet.items())[1:]:
        for item in value:
            _subsets = map(frozenset, [x for x in subsets(item)])
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = getSupport(item) / getSupport(element)
                    if confidence >= minConfidence:
                        toRetRules.append(((tuple(element), tuple(remain)), confidence))
    return toRetItems, toRetRules


def printResults(items, rules):
 
    print()
    print("***** Items, Support is described as ************")
    print()
    for item, support in sorted(items, key=lambda x: x[1]):
        print("item: %s , %d" % (str(item), support))
    print()
    print("\nAssociative Rules of length 4 is:")
    print()
    
    for rule, confidence in sorted(rules, key=lambda x: x[1]):
        pre, post = rule
        if len(pre) + len(post)==4:
            print("Rule: %s ==> %s , %d" % (str(pre), str(post), confidence*100))


def to_str_results(items, rules):

    i, r = [], []
    for item, support in sorted(items, key=lambda x: x[1]):
        x = "item: %s , %.3f" % (str(item), support)
        i.append(x)

    for rule, confidence in sorted(rules, key=lambda x: x[1]):
        pre, post = rule
        x = "Rule: %s ==> %s , %.3f" % (str(pre), str(post), confidence)
        r.append(x)

    return i, r


def dataFromFile(fname):
    
    flg=0

    with open(fname, "r") as file_iter:
        iter=0
        for line in file_iter:
          if flg==0:
            flg=1
            continue
          else:
            if iter ==100:
              break
            line = line.strip().rstrip(",")  # Remove trailing comma
            # l.append(line)
            record = frozenset(line.split(","))
            iter=iter+1
            
            yield record
   


if __name__ == "__main__":

    minSupport = int(input("Enter the Minimum Support:"))
    minConfidence = float(input("Enter the Min Confidence:"))
    # minSupport=2
    # minConfidence=0.7
    dataset_name="Goods_service_dataset.csv"
   
    inFile=dataFromFile(dataset_name)
 
  
   

    items, rules = runApriori(inFile, minSupport, minConfidence)

    printResults(items, rules)


Enter the Minimum Support:3
Enter the Min Confidence:0.7

*********  Intermediate Results  *************

C1 Table
[['41'], ['310'], ['340'], ['237'], ['36'], ['179'], ['60'], ['37'], ['79'], ['242'], ['32'], ['65'], ['147'], ['39'], ['161'], ['170'], ['225'], ['38'], ['152'], ['105'], ['89'], ['110'], ['186'], ['258'], ['48']]

L1 Table
[['41'], ['310'], ['340'], ['237'], ['36'], ['179'], ['60'], ['37'], ['79'], ['242'], ['32'], ['65'], ['147'], ['39'], ['161'], ['170'], ['225'], ['38'], ['152'], ['105'], ['89'], ['110'], ['186'], ['258'], ['48']]

***********************************************

C2 Table
[['161', '41'], ['37', '186'], ['310', '110'], ['340', '36'], ['179', '258'], ['39', '225'], ['225', '89'], ['48', '170'], ['38', '170'], ['340', '38'], ['186', '89'], ['225', '65'], ['110', '38'], ['105', '110'], ['161', '38'], ['152', '170'], ['161', '105'], ['340', '237'], ['105', '60'], ['147', '48'], ['32', '147'], ['161', '237'], ['39', '242'], ['41', '258'], ['258', '310'], ['

In [None]:
[]