---
### Importing Libraries


In [None]:
import numpy as np
import pandas as pd
import math
from itertools import combinations
from tabulate import tabulate

---
### Importing Datasets

In [None]:
df = pd.read_csv("store_data.csv", header=None)

df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


---
### Data Cleaning Procedure:
    
    Removing NaN values


In [None]:
records = []
for i in range(0, 7501):
    records.append([str(df.values[i,j]) for j in range(0, 20)])
    
for i,j in enumerate(records):
    while 'nan' in records[i]:
        records[i].remove('nan')


In [None]:
df2 = pd.DataFrame(records)

In [None]:
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


---

### Finding Unique Values ka count


In [None]:
unique_items_list = [x for i in records for x in i]      # converting n dimensional to 1 dimensional list

unique_items_list = [i.replace(' asparagus', 'asparagus') for i in unique_items_list]

In [None]:
unique_items_set = set(unique_items_list)
#unique_items_set.remove("")
len(unique_items_set)

119

In [None]:
unique_items_set

{'almonds',
 'antioxydant juice',
 'asparagus',
 'avocado',
 'babies food',
 'bacon',
 'barbecue sauce',
 'black tea',
 'blueberries',
 'body spray',
 'bramble',
 'brownies',
 'bug spray',
 'burger sauce',
 'burgers',
 'butter',
 'cake',
 'candy bars',
 'carrots',
 'cauliflower',
 'cereals',
 'champagne',
 'chicken',
 'chili',
 'chocolate',
 'chocolate bread',
 'chutney',
 'cider',
 'clothes accessories',
 'cookies',
 'cooking oil',
 'corn',
 'cottage cheese',
 'cream',
 'dessert wine',
 'eggplant',
 'eggs',
 'energy bar',
 'energy drink',
 'escalope',
 'extra dark chocolate',
 'flax seed',
 'french fries',
 'french wine',
 'fresh bread',
 'fresh tuna',
 'fromage blanc',
 'frozen smoothie',
 'frozen vegetables',
 'gluten free bar',
 'grated cheese',
 'green beans',
 'green grapes',
 'green tea',
 'ground beef',
 'gums',
 'ham',
 'hand protein bar',
 'herb & pepper',
 'honey',
 'hot dogs',
 'ketchup',
 'light cream',
 'light mayo',
 'low fat yogurt',
 'magazines',
 'mashed potato',
 'ma

---
### Taking Input from User regarding Minimum Support and confidence

In [None]:
min_support = float(input("Enter the Minimum Support Value: "))
min_confidence = float(input("Enter the Minimum Confidence Value: "))

Enter the Minimum Support Value: 0.002
Enter the Minimum Confidence Value: 0.001


In [None]:
counting = {}
c = 0
for i in unique_items_list:
  if i in counting:
    counting[i] += 1
  else:
    counting[i] = 1

print("")

print("Count of elements: ")

for key, value in counting.items(): 
  print ("{:<18} : {:<10}".format(key, value)) 
  c += 1
print("Total Number of Elements : {}".format(c)) 

#print(tabulate([k + v for k,v in counting.items()], headers = ["Items","Frequency"]))  


Count of elements: 
shrimp             : 536       
almonds            : 153       
avocado            : 250       
vegetables mix     : 193       
green grapes       : 68        
whole weat flour   : 70        
yams               : 86        
cottage cheese     : 239       
energy drink       : 200       
tomato juice       : 228       
low fat yogurt     : 574       
green tea          : 991       
honey              : 356       
salad              : 37        
mineral water      : 1788      
salmon             : 319       
antioxydant juice  : 67        
frozen smoothie    : 475       
spinach            : 53        
olive oil          : 494       
burgers            : 654       
meatballs          : 157       
eggs               : 1348      
chutney            : 31        
turkey             : 469       
milk               : 972       
energy bar         : 203       
whole wheat rice   : 439       
whole wheat pasta  : 221       
french fries       : 1282      
soup               

In [None]:
len(unique_items_list)

29363

---
### 1st Round

In [None]:
counting1 = {}
ci = 0
for key, value in counting.items(): 
  if (int(value)/len(unique_items_list) > min_support):
    if key in counting1:
      counting1[key] = value
    else:
      counting1[key] = value
    
    ci += 1
    print ("{:<18} : {:<10}".format(key, value)) 
print("")
print("Total elements = {}".format(ci))

shrimp             : 536       
almonds            : 153       
avocado            : 250       
vegetables mix     : 193       
green grapes       : 68        
whole weat flour   : 70        
yams               : 86        
cottage cheese     : 239       
energy drink       : 200       
tomato juice       : 228       
low fat yogurt     : 574       
green tea          : 991       
honey              : 356       
mineral water      : 1788      
salmon             : 319       
antioxydant juice  : 67        
frozen smoothie    : 475       
olive oil          : 494       
burgers            : 654       
meatballs          : 157       
eggs               : 1348      
turkey             : 469       
milk               : 972       
energy bar         : 203       
whole wheat rice   : 439       
whole wheat pasta  : 221       
french fries       : 1282      
soup               : 379       
light cream        : 117       
frozen vegetables  : 715       
spaghetti          : 1306      
cookies 

In [None]:
c1 = 0
for key, value in counting1.items(): 
  print ("{:<18} : {:<10}".format(key, value)) 
  c1+=1
print("\nNumber of elements = {}".format(c1))

shrimp             : 536       
almonds            : 153       
avocado            : 250       
vegetables mix     : 193       
green grapes       : 68        
whole weat flour   : 70        
yams               : 86        
cottage cheese     : 239       
energy drink       : 200       
tomato juice       : 228       
low fat yogurt     : 574       
green tea          : 991       
honey              : 356       
mineral water      : 1788      
salmon             : 319       
antioxydant juice  : 67        
frozen smoothie    : 475       
olive oil          : 494       
burgers            : 654       
meatballs          : 157       
eggs               : 1348      
turkey             : 469       
milk               : 972       
energy bar         : 203       
whole wheat rice   : 439       
whole wheat pasta  : 221       
french fries       : 1282      
soup               : 379       
light cream        : 117       
frozen vegetables  : 715       
spaghetti          : 1306      
cookies 

In [None]:
print("Number of unique items: {}".format(len(list(counting))))
print("Number of unique items in Round 1: {}".format(len(list(counting1))))

Number of unique items: 119
Number of unique items in Round 1: 88


### Making tuples of 2 in records (total)

In [None]:
r1 = []

for i in records:
  #print(len(i))
  if (len(i)>=2):
    #print("1\n")
    for a in range(len(i)):
      for b in range(len(i)):  
        if (a==b):
          pass
        else:
          if (i[a],i[b]) or (i[b],i[a]) not in r1:
            r1.append((i[a],i[b]))
          else:
            pass

          

In [None]:
r1[0:5]

[('shrimp', 'almonds'),
 ('shrimp', 'avocado'),
 ('shrimp', 'vegetables mix'),
 ('shrimp', 'green grapes'),
 ('shrimp', 'whole weat flour')]

t1


In [None]:
counting2 = {}
c = 0
for i in r1:
  if i in counting2:
    counting2[i] += 1
  else:
    counting2[i] = 1

print("")

for key, value in counting2.items(): 
  print ("{} : {}".format(key, value)) 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
('french wine', 'cereals') : 9
('strong cheese', 'french wine') : 6
('strong cheese', 'eggs') : 6
('eggs', 'strong cheese') : 6
('light cream', 'french wine') : 3
('cereals', 'shrimp') : 24
('cereals', 'french wine') : 9
('pancakes', 'yogurt cake') : 27
('yogurt cake', 'pancakes') : 27
('burgers', 'bug spray') : 8
('ham', 'bug spray') : 2
('bug spray', 'burgers') : 8
('bug spray', 'ham') : 2
('fresh tuna', 'vegetables mix') : 10
('fresh tuna', 'rice') : 6
('fresh tuna', 'chili') : 1
('fresh tuna', 'green beans') : 1
('fresh tuna', 'salad') : 3
('french wine', 'chili') : 1
('french wine', 'extra dark chocolate') : 4
('french wine', 'green beans') : 8
('french wine', 'salad') : 2
('vegetables mix', 'fresh tuna') : 10
('vegetables mix', 'chili') : 2
('vegetables mix', 'extra dark chocolate') : 2
('vegetables mix', 'green beans') : 2
('rice', 'fresh tuna') : 6
('rice', 'chili') : 4
('rice', 'extra dark chocolate') : 6
('rice'

In [None]:
for i in records:
  print (i)
  """
  if 'asparagus' in records[i]:
    print("1")
  """

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
['cereals']
['escalope', 'pasta', 'mushroom cream sauce']
['olive oil', 'light mayo']
['cottage cheese']
['herb & pepper', 'ground beef', 'champagne', 'green tea']
['hot dogs']
['spaghetti', 'french fries', 'energy drink']
['ham', 'shrimp', 'ground beef', 'mineral water', 'avocado', 'milk', 'brownies', 'protein bar']
['mineral water', 'almonds', 'cottage cheese', 'light mayo']
['protein bar']
['red wine', 'candy bars']
['mineral water', 'avocado', 'milk', 'almonds', 'eggs', 'french fries', 'melons']
['herb & pepper']
['vegetables mix', 'escalope']
['tomatoes', 'milk']
['milk', 'salmon']
['eggs']
['mineral water', 'vegetables mix', 'rice', 'light mayo']
['vegetables mix']
['muffins', 'cookies', 'green tea']
['grated cheese', 'spinach', 'eggs', 'cooking oil', 'green tea', 'salad']
['strawberries']
['mineral water', 'cake', 'chicken', 'french fries', 'tomato juice']
['mineral water', 'cereals']
['chocolate', 'french fries']


### Now converting counting1 dict into list

In [None]:
qualified_round_1_list = list(counting1.keys())

In [None]:
qualified_round_1_list

### Making tuples of 2 of qualified round 1

In [None]:
tup_qualified_r1 = []

for a in qualified_round_1_list:
  for b in qualified_round_1_list:   
    if (a==b):
      pass
    else:
      if (a,b) not in tup_qualified_r1:
        tup_qualified_r1.append((a,b))
      else:
        pass

# counting2 = total tuple of 2

counting2 = {}
c = 0
for i in r1:
  if i in counting2:
    counting2[i] += 1
  else:
    counting2[i] = 1


In [None]:
counting2

{('shrimp', 'almonds'): 17,
 ('shrimp', 'avocado'): 23,
 ('shrimp', 'vegetables mix'): 20,
 ('shrimp', 'green grapes'): 8,
 ('shrimp', 'whole weat flour'): 7,
 ('shrimp', 'yams'): 8,
 ('shrimp', 'cottage cheese'): 20,
 ('shrimp', 'energy drink'): 21,
 ('shrimp', 'tomato juice'): 27,
 ('shrimp', 'low fat yogurt'): 47,
 ('shrimp', 'green tea'): 86,
 ('shrimp', 'honey'): 33,
 ('shrimp', 'salad'): 6,
 ('shrimp', 'mineral water'): 177,
 ('shrimp', 'salmon'): 35,
 ('shrimp', 'antioxydant juice'): 5,
 ('shrimp', 'frozen smoothie'): 55,
 ('shrimp', 'spinach'): 5,
 ('shrimp', 'olive oil'): 61,
 ('almonds', 'shrimp'): 17,
 ('almonds', 'avocado'): 13,
 ('almonds', 'vegetables mix'): 8,
 ('almonds', 'green grapes'): 6,
 ('almonds', 'whole weat flour'): 3,
 ('almonds', 'yams'): 4,
 ('almonds', 'cottage cheese'): 6,
 ('almonds', 'energy drink'): 6,
 ('almonds', 'tomato juice'): 7,
 ('almonds', 'low fat yogurt'): 15,
 ('almonds', 'green tea'): 38,
 ('almonds', 'honey'): 11,
 ('almonds', 'salad'): 2,


In [None]:
len(tup_qualified_r1)

7656

In [None]:
tup_qualified_r1.count(('green tea', 'soup'))

1

In [None]:
counting2_c = {}
c = 0
for i in tup_qualified_r1:
  if i not in counting2:
    pass
  else:
    if i not in counting2_c:
      counting2_c[i] = counting2[i]
    else:
      pass

print("")




In [None]:
counting2_c

## Summary:
Count of individual elements/items = counting dict

Count of Round 1 qualifiers = counting1 dict

Count of tuple of 2 - TOTAL = counting2 dict

Count of tuple of 2 - QUALIFIED = counting2_c dict


In [None]:
"""
for i in counting2_c:
  print(i[0])
"""

### Confidence for 1st stage: A->B = n(AUB)/n(A)

In [None]:
# qualified_round_1_list - consists of round 1 qualifier (single list)
# counting1 dict -> consists of round 1 qualifier + count (single item)
# counting2_c -> consists of round 1 qualifier + count (tuple)

for i in counting2_c:
  a = counting1[i[0]] 
  #b = counting1[i[1]] 
  c = counting2_c[i]
  d = float(c/a)
  print("------------------------------------------------------------")
  print("Confidence for {} -> {}: {}".format(i[0],i[1],d))
  print("------------------------------------------------------------")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
------------------------------------------------------------
------------------------------------------------------------
Confidence for muffins -> extra dark chocolate: 0.0055248618784530384
------------------------------------------------------------
------------------------------------------------------------
Confidence for muffins -> protein bar: 0.027624309392265192
------------------------------------------------------------
------------------------------------------------------------
Confidence for muffins -> red wine: 0.03314917127071823
------------------------------------------------------------
------------------------------------------------------------
Confidence for muffins -> pasta: 0.016574585635359115
------------------------------------------------------------
------------------------------------------------------------
Confidence for muffins -> pepper: 0.022099447513812154
------------------------------

---

### Making a tuple of 3 for qualified items


In [None]:
qualified_round_1_list[0:5]

['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes']

In [None]:
tup_of_3_qualified_r1 = []

for a in qualified_round_1_list:
  for b in qualified_round_1_list:   
    for c in qualified_round_1_list:
      if (a==b==c):
        
        pass
      else:
        if (a,b,c) not in tup_of_3_qualified_r1:
          tup_of_3_qualified_r1.append((a,b,c))
        else:
          pass        


In [None]:
tup_of_3_qualified_r1

[('shrimp', 'shrimp', 'almonds'),
 ('shrimp', 'shrimp', 'avocado'),
 ('shrimp', 'shrimp', 'vegetables mix'),
 ('shrimp', 'shrimp', 'green grapes'),
 ('shrimp', 'shrimp', 'whole weat flour'),
 ('shrimp', 'shrimp', 'yams'),
 ('shrimp', 'shrimp', 'cottage cheese'),
 ('shrimp', 'shrimp', 'energy drink'),
 ('shrimp', 'shrimp', 'tomato juice'),
 ('shrimp', 'shrimp', 'low fat yogurt'),
 ('shrimp', 'shrimp', 'green tea'),
 ('shrimp', 'shrimp', 'honey'),
 ('shrimp', 'shrimp', 'mineral water'),
 ('shrimp', 'shrimp', 'salmon'),
 ('shrimp', 'shrimp', 'antioxydant juice'),
 ('shrimp', 'shrimp', 'frozen smoothie'),
 ('shrimp', 'shrimp', 'olive oil'),
 ('shrimp', 'shrimp', 'burgers'),
 ('shrimp', 'shrimp', 'meatballs'),
 ('shrimp', 'shrimp', 'eggs'),
 ('shrimp', 'shrimp', 'turkey'),
 ('shrimp', 'shrimp', 'milk'),
 ('shrimp', 'shrimp', 'energy bar'),
 ('shrimp', 'shrimp', 'whole wheat rice'),
 ('shrimp', 'shrimp', 'whole wheat pasta'),
 ('shrimp', 'shrimp', 'french fries'),
 ('shrimp', 'shrimp', 'soup

---

### Count of tuple of 3 QUALIFIED

### Now mapping qualified tuple to individual items to find confidence

### Iterating through dictionary -  to find support and confidence

---

### Making Sets


In [None]:
# l = list(permutations(range(1, 4))) 
# print l 




In [None]:
#%%

"""

Finding Support

"""
# 

# support = frequency/N

In [None]:
#%%

"""

Checking Support threshold

"""

In [None]:
#%%

"""

Permutation and Combinations of sets

"""