In [1]:
from models import WordsComparison, TfidfSimilarity, ObjectsTextSimilarity, ObjectsSimilarityFiltered
import pandas as pd
import json
import pickle
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dasha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
train_data_enc = pd.read_csv("data//train_data_recipes_encoded.csv")
train_data_text = pd.read_csv("data//train_data_text_url.csv")
recipes_url, train_data_text = train_data_text["URL"], train_data_text.drop(columns=["URL"])
fd = pd.read_csv("data//filter_data_recipes.csv")
fd = fd.loc[:, ["Meal", "Course", "Cooking Methods", 'Vegetables', 'Fruits', 'Meat', 'Seafood', 'Mushrooms', 'Dairy', 'Grains', 'Nuts']].fillna(0)

# train_data_all = train_data_enc.copy()
# train_data_all[["Directions", "Ingr"]] = train_data_text

with open("data//recipes_val_indexes.txt", "r") as fp:
    train_val_indx = json.load(fp)

In [3]:
model1 = WordsComparison()
model1.fit(train_data_text)
model2 = TfidfSimilarity()
model2.fit(train_data_text)
model3 = ObjectsTextSimilarity()
model3.fit(train_data_text)
model4 = ObjectsSimilarityFiltered()
model4.fit(train_data_text, fd)

In [5]:
def validation(models, data):
    val_dir = {}
    for recipe_ind in train_val_indx.keys():
        temp = {}
        for model in models:
            if model.__class__.__name__ == "ObjectsSimilarityFiltered":
                 temp[str(model.__class__.__name__)] = model.predict(data.iloc[int(recipe_ind)].values, fd.iloc[int(recipe_ind)].values)
            else:
                temp[str(model.__class__.__name__)] = list(model.predict(data.iloc[int(recipe_ind)].values))
        val_dir[recipe_ind] = temp

    return val_dir

results_dir = validation([model1, model2, model3, model4], train_data_text)

In [9]:
res = [0, 0, 0, 0]
for rec_ind in results_dir.keys():
    for i, model in enumerate(results_dir[rec_ind].keys()):
        res[i] += len(set(train_val_indx[rec_ind]) & set(results_dir[rec_ind][model]))

In [10]:
alg = [
    "WordsComparison",
    "TfidfSimilarity",
    "ObjectsTextSimilarity (using only Directions and Ingredients features)",
    "ObjectsSimilarityFiltered"
]

pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({"Algorithm": alg, "Accuracy, %": res})
df

Unnamed: 0,Algorithm,"Accuracy, %"
0,WordsComparison,10
1,TfidfSimilarity,32
2,ObjectsTextSimilarity (using only Directions and Ingredients features),45
3,ObjectsSimilarityFiltered,35


In [6]:
print("Recipes for validation: \n\n")
for key in train_val_indx.keys():
    print("Request: ")
    print(recipes_url[int(key)], "\n")
    print(recipes_url.iloc[train_val_indx[key]].values, "\n\n")

Recipes for validation: 


Request: 
https://www.allrecipes.com/recipe/230873/amazing-and-easy-chicken-wings/ 

['https://www.allrecipes.com/recipe/25505/mahogany-chicken-wings/'
 'https://www.allrecipes.com/recipe/213068/grill-master-chicken-wings/'
 'https://www.allrecipes.com/recipe/282258/grilled-sweet-and-sour-chicken-wings/'
 'https://www.allrecipes.com/recipe/218350/simple-marinated-chicken-wings/'
 'https://www.allrecipes.com/recipe/219107/sweet-hot-mustard-chicken-wings/'
 'https://www.allrecipes.com/recipe/267927/sweet-heat-chicken-wings/'
 'https://www.allrecipes.com/recipe/274652/garlic-molasses-chicken-wings/'
 'https://www.allrecipes.com/recipe/214464/chinese-chicken-wings/'
 'https://www.allrecipes.com/recipe/25187/easy-baked-chicken-wings/'
 'https://www.allrecipes.com/recipe/232335/honey-garlic-chicken-wings/'] 


Request: 
https://www.allrecipes.com/recipe/205189/1-pumpkin-spice-cookies/ 

['https://www.allrecipes.com/recipe/24422/chocolate-chip-pumpkin-cookies/'
 'ht

In [8]:
print("Predictions for requests\n\n")

def print_val(dir):
    for recipe_ind in dir.keys():
        print("Request: ")
        print(recipes_url[int(recipe_ind)])
        print()
        for model_key in dir[recipe_ind].keys():
            print(model_key)          
            print(recipes_url.iloc[dir[recipe_ind][model_key]].values)
            print()
        print()

print_val(results_dir)

Predictions for requests


Request: 
https://www.allrecipes.com/recipe/230873/amazing-and-easy-chicken-wings/

WordsComparison
['https://www.allrecipes.com/recipe/237780/true-wings/'
 'https://www.allrecipes.com/recipe/281615/sesame-chicken-salad/'
 'https://www.allrecipes.com/recipe/236901/southern-bbq-chicken/'
 'https://www.allrecipes.com/recipe/217845/aztec-chicken-with-sweet-potato-corn-mash/'
 'https://www.allrecipes.com/recipe/8509102/chicken-al-pastor/'
 'https://www.allrecipes.com/recipe/233934/dijon-grilled-pork-chops/'
 'https://www.allrecipes.com/recipe/274690/homemade-portuguese-chicken/'
 'https://www.allrecipes.com/recipe/236503/jerk-chicken-wings/'
 'https://www.allrecipes.com/recipe/282319/sweet-and-sour-chicken-thighs/'
 'https://www.allrecipes.com/recipe/72007/chinese-pork-chops/']

TfidfSimilarity
['https://www.allrecipes.com/recipe/213068/grill-master-chicken-wings/'
 'https://www.allrecipes.com/recipe/274652/garlic-molasses-chicken-wings/'
 'https://www.allrecipes

In [None]:
file_name = 'ObjectsTextSimilarityModel.pkl'
with open(file_name, 'wb') as file:
    pickle.dump(model3, file)
    print(f'Object successfully saved to "{file_name}"')