In [421]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# we will ignore pandas warning 
import warnings
warnings.filterwarnings('ignore')

## **Book Dataset**

In [422]:
df = {'T100': ['I1', 'I2', 'I5'],
      'T200': ['I2', 'I4'],
      'T300': ['I2', 'I3'],
      'T400': ['I1', 'I2', 'I4'],
      'T500': ['I1', 'I3'],
      'T600': ['I2', 'I3'],
      'T700': ['I1', 'I3'],
      'T800': ['I1', 'I2', 'I3', 'I5'],
      'T900': ['I1', 'I2', 'I3']}

In [423]:
itemList = []

In [424]:
for i in df:
  itemList.append(df[i])

In [425]:
itemList

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3']]

In [426]:
min_support = 2

## **Function to calculate first Frequent Item Set**

In [427]:
def firstFrequentItemSet(itemList, min_support):
  first_frequent_dict = {}
  first_frequent_list = []
  for items in itemList:
    for item in items:
      if item not in first_frequent_dict:
        first_frequent_dict[item] = 1
      else:
        first_frequent_dict[item] += 1

  for item in first_frequent_dict:
    if first_frequent_dict[item] >= min_support:
      first_frequent_list.append(([item], first_frequent_dict[item]))

  return first_frequent_list

In [428]:
firstFrequentList = firstFrequentItemSet(itemList, min_support)

In [429]:
firstFrequentList

[(['I1'], 6), (['I2'], 7), (['I5'], 2), (['I4'], 2), (['I3'], 6)]

**All Frequent Item Sets**

In [430]:
allFrequentItems = []
allFrequentItemsWithSupport = []

In [431]:
for item in firstFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [432]:
allFrequentItems

[['I1'], ['I2'], ['I5'], ['I4'], ['I3']]

In [433]:
allFrequentItemsWithSupport

[(['I1'], 6), (['I2'], 7), (['I5'], 2), (['I4'], 2), (['I3'], 6)]

## **Function to calculate Second Frequent Item SEt**

In [434]:
def secondFrequentItemSet(firstFrequentList, itemList, min_support):
  second_frequent_list = []

  firstFreq = []

  for item in firstFrequentList:
    firstFreq.append(item[0][0])

  # print(firstFreq)

  for i in range(len(firstFreq)):
    for j in range(i+1, len(firstFreq)):
      item_1 = firstFreq[i]
      item_2 = firstFreq[j]
      pair = [item_1, item_2]
      # print(pair)

      pair_count = 0

      for items in itemList:
        if item_1 in items and item_2 in items:
          pair_count += 1

      if(pair_count >= min_support):
        listi = [item_1, item_2]
        listi.sort()
        second_frequent_list.append((listi, pair_count))
        # print(second_frequent_list)

  return second_frequent_list

In [435]:
secondFrequentList = secondFrequentItemSet(firstFrequentList, itemList, min_support)

In [436]:
secondFrequentList

[(['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4)]

**Appending in All frequent Item SEts**

In [437]:
for item in secondFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [438]:
allFrequentItems

[['I1'],
 ['I2'],
 ['I5'],
 ['I4'],
 ['I3'],
 ['I1', 'I2'],
 ['I1', 'I5'],
 ['I1', 'I3'],
 ['I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3']]

In [439]:
allFrequentItemsWithSupport

[(['I1'], 6),
 (['I2'], 7),
 (['I5'], 2),
 (['I4'], 2),
 (['I3'], 6),
 (['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4)]

**Unique Items**

In [440]:
unique = []

In [441]:
for item in firstFrequentList:
  unique.append(item[0][0])

In [442]:
unique

['I1', 'I2', 'I5', 'I4', 'I3']

## **Pruning**  
**Checking that the item present in the frequentItemset or not**

In [443]:
def is_apriori(item_sets, union):
    for item in union:
        subset = union.copy()
        subset.remove(item)
        # print(subset)
        if subset not in item_sets:
          return False
    return True

## **Frequent Item Set for 3 or greater**

In [444]:
from itertools import combinations

In [445]:
def threeOrGreater(unique, itemList, allFrequentItems, min_support):
  for size in range(3, len(unique)+1):

    dontKnowList = []

    for item_set in combinations(unique, size):
      # print(item_set)

      pair_count = 0

      for items in itemList:
        if set(item_set).issubset(items):
          pair_count += 1

      if(pair_count >= min_support):
        if is_apriori(allFrequentItems, list(item_set)):
          dontKnowList.append((list(item_set), pair_count))

    print(f"{size} Frequent Item Set")
    print("---------------------")
    print(dontKnowList)
    print("---------------------")
    print("---------------------")

    for item in dontKnowList:
      allFrequentItems.append(item[0])
      allFrequentItemsWithSupport.append(item)
    
    if(len(dontKnowList) == 0):
      break

In [446]:
threeOrGreater(unique, itemList, allFrequentItems, min_support)

3 Frequent Item Set
---------------------
[(['I1', 'I2', 'I5'], 2), (['I1', 'I2', 'I3'], 2)]
---------------------
---------------------
4 Frequent Item Set
---------------------
[]
---------------------
---------------------


In [447]:
allFrequentItems

[['I1'],
 ['I2'],
 ['I5'],
 ['I4'],
 ['I3'],
 ['I1', 'I2'],
 ['I1', 'I5'],
 ['I1', 'I3'],
 ['I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I5'],
 ['I1', 'I2', 'I3']]

In [448]:
allFrequentItemsWithSupport

[(['I1'], 6),
 (['I2'], 7),
 (['I5'], 2),
 (['I4'], 2),
 (['I3'], 6),
 (['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4),
 (['I1', 'I2', 'I5'], 2),
 (['I1', 'I2', 'I3'], 2)]