### Storing the query-recipe pairs that were already evaluated as a reference

The ideia is save into a "dictionary" the evaluations of recipes return for a specific query, so that a new model can have the performance on the same set of queries easily compared with the previous ones since most of the answers will be the same.

In [5]:
import pandas as pd

# trying to load the review bank
try:
    review_bank = pd.read_excel('reviews/review_bank.xlsx').a()
except:
    review_bank = pd.DataFrame()


# loading every other previous evaluation
reviews_paths = {
    "Rodrigo": [
        'reviews/Results_semantic_Rodrigo.xlsx',
        'reviews/Results_Bm25_Rodrigo.xlsx',
        'reviews/Results_Tfidf_Rodrigo.xlsx'
    ],
    "Sanches": [
        'reviews/Results_semantic_sanches.xlsx',
        'reviews/Results_Bm25_sanches.xlsx',
        'reviews/Results_Tfidf_sanches.xlsx'
    ],
    "Vanessa": [
        'reviews/Results_semantic_Vanessa.xlsx',
        'reviews/Results_Bm25_Vanessa.xlsx',
        'reviews/Results_Tfidf_Vanessa.xlsx'
    ],
    "Ramalho": [
        'reviews/Results_Bm25_ramalho.xlsx',
        'reviews/Results_Tfidf_ramalho.xlsx'
    ]
}

for path in reviews_paths['Ramalho']:
    # reading all the reviews
    ramalho_reviews = pd.read_excel(path, sheet_name="Sheet1")
    # renaming the columns
    ramalho_reviews.rename(columns={'Unnamed: 6': 'Nota', 'title': 'Receita'}, inplace=True)
    ramalho_reviews["Evaluator"] = 'Ramalho'
    # subtracting 1 from the ratings because its on 1-6 scale
    ramalho_reviews['Nota'] = ramalho_reviews['Nota'] - 1
    # concatenating the reviews
    review_bank = pd.concat([review_bank, ramalho_reviews[['Tipo', 'Descrição', 'Query', 'Receita', 'Nota', 'Evaluator']]])

for path in reviews_paths['Rodrigo']:
    # reading all the reviews
    rod_reviews = pd.read_excel(path, sheet_name="Sheet1")
    # renaming the columns
    rod_reviews.rename(columns={'Unnamed: 12': 'Nota', 'title': 'Receita'}, inplace=True)
    rod_reviews["Evaluator"] = 'Rodrigo'
    # subtracting 1 from the ratings because its on 1-6 scale
    rod_reviews['Nota'] = rod_reviews['Nota'] - 1
    # concatenating the reviews
    review_bank = pd.concat([review_bank, rod_reviews[['Tipo', 'Descrição', 'Query', 'Receita', 'Nota', 'Evaluator']]])

for path in reviews_paths['Sanches']:
    # reading all the reviews
    san_reviews = pd.read_excel(path, sheet_name="Sheet1")
    # renaming the columns
    san_reviews.rename(columns={'nota': 'Nota', 'title': 'Receita'}, inplace=True)
    san_reviews["Evaluator"] = 'Sanches'
    # subtracting 1 from the ratings because its on 1-6 scale
    san_reviews['Nota'] = san_reviews['Nota'] - 1
    # concatenating the reviews
    review_bank = pd.concat([review_bank, san_reviews[['Tipo', 'Descrição', 'Query', 'Receita', 'Nota', 'Evaluator']]])
                
for path in reviews_paths['Vanessa']:
    # reading all the reviews
    van_reviews = pd.read_excel(path, sheet_name="Sheet1")
    # renaming the columns
    van_reviews.rename(columns={'Unnamed: 6': 'Nota', 'title': 'Receita'}, inplace=True)
    van_reviews["Evaluator"] = 'Vanessa'
    # subtracting 1 from the ratings because its on 1-6 scale
    van_reviews['Nota'] = van_reviews['Nota'] - 1
    # concatenating the reviews
    review_bank = pd.concat([review_bank, van_reviews[['Tipo', 'Descrição', 'Query', 'Receita', 'Nota', 'Evaluator']]])

def group_evaluators(evaluator):
    if evaluator in ["Rodrigo", "Sanches", "Vanessa", "Ramalho"]:
        return "person_rating = ratings[ratings['Evaluator'] == person]"
    else:
        return "IA"

# Getting the average rating
review_bank["Evaluator"] = review_bank["Evaluator"].apply(group_evaluators)
review_bank = review_bank.groupby(['Tipo', 'Descrição', "Query", "Receita", "Evaluator"]).agg({"Nota": "mean"})
review_bank.reset_index(inplace=True)
review_bank["Nota"] = review_bank["Nota"].round().astype(int)
review_bank

Unnamed: 0,Tipo,Descrição,Query,Receita,Evaluator,Nota
0,Keywords,Pergunta simples,Brûlée Cream,amazing lavender crme brle,Person,5
1,Keywords,Pergunta simples,Brûlée Cream,brled mashed sweet potatoes,Person,3
2,Keywords,Pergunta simples,Brûlée Cream,coffee brandy cream brulee,Person,4
3,Keywords,Pergunta simples,Brûlée Cream,cream brulee with strawberries,Person,4
4,Keywords,Pergunta simples,Brûlée Cream,cream cheese ice cream,Person,0
...,...,...,...,...,...,...
142,Semantica,Pergunta média,pasta without eggs,pasta with asparagus and fried eggs,Person,0
143,Semantica,Pergunta média,pasta without eggs,pasta with carbonara sauce,Person,0
144,Semantica,Pergunta média,pasta without eggs,sauerkraut cake without eggs,Person,1
145,Semantica,Pergunta média,pasta without eggs,simple savory pasta,Person,3


## Exporting the evaluations to a "review bank"

In [6]:
# saving the dataframe to a new excel file
review_bank.to_excel('reviews/review_bank.xlsx', index=False)

review_bank.head()

Unnamed: 0,Tipo,Descrição,Query,Receita,Evaluator,Nota
0,Keywords,Pergunta simples,Brûlée Cream,amazing lavender crme brle,Person,5
1,Keywords,Pergunta simples,Brûlée Cream,brled mashed sweet potatoes,Person,3
2,Keywords,Pergunta simples,Brûlée Cream,coffee brandy cream brulee,Person,4
3,Keywords,Pergunta simples,Brûlée Cream,cream brulee with strawberries,Person,4
4,Keywords,Pergunta simples,Brûlée Cream,cream cheese ice cream,Person,0


Now for every new evaluation, we can just add the new ratings to this file, and for the new searches we can just filter the recipes that are not in this file to evaluate them.

When the recipes are already on this file, we immediatly have the evaluation.

In [7]:
review_bank = pd.read_excel('reviews/review_bank.xlsx')

def lookup_rating(query, recipe):
    try:
        rating = review_bank[(review_bank['Query'] == query) & (review_bank['Receita'] == recipe)]["Nota"].values[0]
        print(f"The rating of the recipe '{recipe}' for the query '{query}' is {rating}.")	
    except:
        print(f"The recipe '{recipe}' for the query '{query}' was not found in the review bank.")

query = "Brûlée Cream"
recipe = "creme brulee for two"

# checking if the query-recipe pair is in the review bank
lookup_rating(query, recipe)

query = "Lasangna"
recipe = "creme brulee for two"

# checking if the query-recipe pair is in the review bank
lookup_rating(query, recipe)

The rating of the recipe 'creme brulee for two' for the query 'Brûlée Cream' is 5.
The recipe 'creme brulee for two' for the query 'Lasangna' was not found in the review bank.
