# **Auxiliary Code for generating frequent itemsets**

### Frequent itemsets are generated using Apriori algorithm
### Separate itemset are created based on meal type

***Created by P. Akshay Kumar***

In [None]:
# all imports
import pickle
import re
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder
from nltk.stem import WordNetLemmatizer

**Dataset load and pre-processing and then converted to transactions to give as input to the apriori algorithm and the dictionary is saved to be used in the main gui screen**

In [None]:
df = pd.read_csv("recipes.csv")
lm = WordNetLemmatizer()

breakfast_df = df[df["Meal"] == "Breakfast"]
lunch_df = df[df["Meal"] == "Lunch"]
dinner_df = df[df["Meal"] == "Dinner"]

breakfast_ingredients_lists = list(breakfast_df["Lookup Ingredients"])
for i, ingredient_list in enumerate(breakfast_ingredients_lists):
    temp_list = ingredient_list.strip('][').split(', ')
    temp_list = [re.sub("'", "", t) for t in temp_list]
    temp_list = [t.strip() for t in temp_list]
    temp_list = [lm.lemmatize(i) for i in temp_list]
    breakfast_ingredients_lists[i] = temp_list
    
te = TransactionEncoder()
te_ary = te.fit(breakfast_ingredients_lists).transform(breakfast_ingredients_lists)
breakfast_df_new = pd.DataFrame(te_ary, columns=te.columns_)
breakfast_df_new.drop("", inplace=True, axis=1)

frequent_itemsets_breakfast = apriori(breakfast_df_new, min_support=0.005, use_colnames=True)
frequent_itemsets_breakfast['length'] = frequent_itemsets_breakfast['itemsets'].apply(lambda x: len(x))
frequent_itemsets_breakfast = frequent_itemsets_breakfast[(frequent_itemsets_breakfast['length'] == 2) &
                                                          (frequent_itemsets_breakfast['support'] >= 0.005)]
frequent_itemsets_breakfast = frequent_itemsets_breakfast.sort_values(['support'], ascending=[False])
dbfile = open('frequent_itemsets_breakfast_pickle', 'wb')
pickle.dump(frequent_itemsets_breakfast, dbfile)
dbfile.close()
print("Breakfast top 10 combinations with eggs")
print("----------------------------------------")
print(frequent_itemsets_breakfast[frequent_itemsets_breakfast['itemsets'].apply(lambda x: 'egg' in str(x))]["itemsets"][
      :10])

**Similarly done for lunch meal type and dinner meal type**

In [None]:
lunch_ingredients_lists = list(lunch_df["Lookup Ingredients"])
for i, ingredient_list in enumerate(lunch_ingredients_lists):
    temp_list = ingredient_list.strip('][').split(', ')
    temp_list = [re.sub("'", "", t) for t in temp_list]
    temp_list = [t.strip() for t in temp_list]
    temp_list = [lm.lemmatize(i) for i in temp_list]
    lunch_ingredients_lists[i] = temp_list

te = TransactionEncoder()
te_ary = te.fit(lunch_ingredients_lists).transform(lunch_ingredients_lists)
lunch_df_new = pd.DataFrame(te_ary, columns=te.columns_)
lunch_df_new.drop("", inplace=True, axis=1)

frequent_itemsets_lunch = apriori(lunch_df_new, min_support=0.005, use_colnames=True)
frequent_itemsets_lunch['length'] = frequent_itemsets_lunch['itemsets'].apply(lambda x: len(x))
frequent_itemsets_lunch = frequent_itemsets_lunch[(frequent_itemsets_lunch['length'] == 2) &
                                                  (frequent_itemsets_lunch['support'] >= 0.005)]
frequent_itemsets_lunch = frequent_itemsets_lunch.sort_values(['support'], ascending=[False])
dbfile = open('frequent_itemsets_lunch_pickle', 'wb')
pickle.dump(frequent_itemsets_lunch, dbfile)
dbfile.close()
print("Lunch top 10 combinations with eggs")
print("----------------------------------------")
print(frequent_itemsets_lunch[frequent_itemsets_lunch['itemsets'].apply(lambda x: 'egg' in str(x))]["itemsets"][:10])

dinner_ingredients_lists = list(dinner_df["Lookup Ingredients"])
for i, ingredient_list in enumerate(dinner_ingredients_lists):
    temp_list = ingredient_list.strip('][').split(', ')
    temp_list = [re.sub("'", "", t) for t in temp_list]
    temp_list = [t.strip() for t in temp_list]
    temp_list = [lm.lemmatize(i) for i in temp_list]
    dinner_ingredients_lists[i] = temp_list

te = TransactionEncoder()
te_ary = te.fit(dinner_ingredients_lists).transform(dinner_ingredients_lists)
dinner_df_new = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets_dinner = apriori(dinner_df_new, min_support=0.005, use_colnames=True)
frequent_itemsets_dinner['length'] = frequent_itemsets_dinner['itemsets'].apply(lambda x: len(x))
frequent_itemsets_dinner = frequent_itemsets_dinner[(frequent_itemsets_dinner['length'] == 2) &
                                                    (frequent_itemsets_dinner['support'] >= 0.005)]
frequent_itemsets_dinner = frequent_itemsets_dinner.sort_values(['support'], ascending=[False])
dbfile = open('frequent_itemsets_dinner_pickle', 'wb')
pickle.dump(frequent_itemsets_dinner, dbfile)
dbfile.close()
print("Dinner top 10 combinations with eggs")
print("----------------------------------------")
print(frequent_itemsets_dinner[frequent_itemsets_dinner['itemsets'].apply(lambda x: 'eggs' in str(x))]["itemsets"][:10])