In [4]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder
recipe_data = pd.read_csv('recipe_data.csv')
transactions = recipe_data.groupby('ID')['Ingredient Name'].apply(list).tolist()

In [5]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True, max_len=3)
itemsets_size_1 = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) == 1)]
itemsets_size_2 = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) == 2)]
itemsets_size_3 = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) == 3)]
itemsets_size_1.to_csv('itemsets_size_1.csv', index=False)
itemsets_size_2.to_csv('itemsets_size_2.csv', index=False)
itemsets_size_3.to_csv('itemsets_size_3.csv', index=False)

In [6]:
print('Top 20 itemsets for size 1 based on support value')
print(itemsets_size_1.nlargest(20, 'support'))
print('Top 20 itemsets for size 2 based on support value')
print(itemsets_size_2.nlargest(20, 'support'))
print('Top 20 itemsets for size 3 based on support value')
print(itemsets_size_3.nlargest(20, 'support'))

Top 20 itemsets for size 1 based on support value
      support               itemsets
104  0.361991                 (salt)
114  0.356350                (sugar)
22   0.308679               (butter)
81   0.252820                (onion)
54   0.239902               (garlic)
93   0.226710               (pepper)
126  0.219887                (water)
1    0.215338  (all - purpose flour)
79   0.189320            (olive oil)
53   0.175946                 (eggs)
73   0.155659                 (milk)
105  0.148563          (salt pepper)
122  0.121998      (vanilla extract)
123  0.112082        (vegetable oil)
52   0.108079                  (egg)
38   0.101892             (cinnamon)
31   0.086063              (chicken)
7    0.077784        (baking powder)
67   0.077056          (lemon juice)
55   0.075328        (garlic powder)
Top 20 itemsets for size 2 based on support value
      support                       itemsets
595  0.166485                  (sugar, salt)
259  0.150564                (sug

In [7]:
top_20_size_1 = itemsets_size_1.nlargest(20, 'support')
top_20_size_2 = itemsets_size_2.nlargest(20, 'support')
top_20_size_3 = itemsets_size_3.nlargest(20, 'support')
top_20_size_1.to_csv('top_20_itemsets_size_1.csv', index=False)
top_20_size_2.to_csv('top_20_itemsets_size_2.csv', index=False)
top_20_size_3.to_csv('top_20_itemsets_size_3.csv', index=False)

Support:

1. Indicates the Popularity of an Itemset:

    Support measures how often a particular item or itemset appears in the dataset. A higher support value indicates that the itemset occurs frequently, making it more relevant in frequent itemset mining (FISM).

2. Threshold for Frequent Itemsets:

    Support identifies the frequent itemsets. Itemsets with support greater than or equal to a given minimum support threshold are considered to be more frequent.
### **Support**:

$$
\text{Support}(X) = \frac{\text{Number of recipes containing itemset } X}{\text{Total number of recipes}}
$$


Confidence:

1. Measures the Strength of an Association Rule:

    Confidence measures how strong the association rule X→Y is. A high confidence value means that when item X is present, item Y is likely to also be present in the same recipe.

2. Helps in Decision Making:

    Confidence helps in evaluating the usefulness and reliability of the rules generated by frequent itemset mining. Rules with high confidence are more actionable for decision-making.
### **Confidence**:
$$
\text{Confidence}(X \rightarrow Y) = \frac{\text{Support}(X \cup Y)}{\text{Support}(X)}
$$
