In [376]:
import numpy as np
import pandas as pd
import datetime
import pickle as pkl
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import decomposition
import csv

In [377]:
ingredients_recipes = {}
author_recipes = {}
time_recipes = {}
recipe_review = {}
user_review = {}

In [378]:
def read_file(path):
    file = pd.read_csv(path)
    df = pd.DataFrame(file)
    return df

In [379]:
def create_array_time(df_recipes):
    array_time = []

    for i in range(len(df_recipes)):
        time = df_recipes["Total Time"][i]
        if time == "X":
            array_time.append(0)
        elif len(time) == 3:      #1 d ou 1 h ou 1 m                                      
            divide = time.split(" ")
            if divide[1] == "h":
                array_time.append(int(divide[0]) * 60)
            elif divide[1] == "m":
                array_time.append(int(divide[0]))
            elif divide[1] == "d":
                array_time.append(int(divide[0]) * 24 * 60)
        elif len(time) == 4:    #30 m ou 10 h
            if 'm' in time:
                array_time.append(int(time.split(" m")[0]))
            elif 'h' in time:
                array_time.append(int(time.split(" h")[0]) * 60)
            
        elif len(time) == 8 or len(time) == 7:
            t = 0
            if 'd' in time:     #1 d 1 h ou 1 d 30 m
                divide = time.split(" d ")
                t += int(divide[0]) * 60 * 24
                if 'm' in divide[1]:
                    t += int(divide[1].split(" m")[0])
                elif 'h' in divide[1]:
                    t += int(divide[1].split(" h")[0]) * 60
            elif 'h' in time:       #12 h 2 m ou 1 h 20 m ou 1 h 2 m
                divide = time.split(" h ")
                t += int(divide[0]) * 60 + int(divide[1].split(" m")[0])
            
            array_time.append(t)
        elif len(time) == 9:    #12 h 20 m ou 12 d 12 h ou 12 d 20 m
            t = 0
            if "d" in time:
                divide = time.split(" d ")
                t += int(divide[0]) * 24 * 60
                if "m" in divide[1]:
                    t += int(divide[1].split(" m")[0])
                elif "h" in divide[1]:
                    t += int(divide[1].split(" h")[0])
            elif "h" in time:
                divide = time.split(" h ")
                t += int(divide[0]) * 60 + int(divide[1].split(" m")[0])
            array_time.append(t)
        elif len(time) == 11 or len(time) == 12 or len(time) == 13 or len(time) == 14:     #1 d 1 h 5 m ou 1 d 1 h 50 m ou 1 d 10 h 50 m ou 10 d 10 h 50 m
            divide = time.split(" d ")
            t = int(divide[0]) * 24 * 60
            div = divide[1].split(" h ")
            t += int(div[0]) * 60 + int(div[1].split(" m")[0])
            array_time.append(t)
            
    return array_time

In [380]:
def create_array_review(df_recipes):
    array_review = []
    for i in range(len(df_recipes)):
        review = df_recipes["Review Count"][i]
        if "k" in review:
            array_review.append(int(review.split("k")[0]) * 1000)
        else:
            array_review.append(int(review))
    return array_review

In [381]:
def create_ingredients_recipes(df_recipes):
    for i in range(len(df_recipes)):
        ingredients = df_recipes["Ingredients"][i]
        list_ingred = ingredients.split(',')
        for ing in list_ingred:
            if ing not in ingredients_recipes:
                ingredients_recipes[ing] = [df_recipes["Recipe Name"][i]]
            else:
                ingredients_recipes[ing].append(df_recipes["Recipe Name"][i])

In [382]:
def create_author_recipes(df_recipes):
    for i in range(len(df_recipes)):
        author = df_recipes["Author"][i]
        if author not in author_recipes:
            author_recipes[author] = [df_recipes["Recipe Name"][i]]
        else:
            author_recipes[author].append(df_recipes["Recipe Name"][i])

In [383]:
def create_time_recipes(df_recipes):
    global time_recipes
    for i in range(len(df_recipes)):
        time = df_recipes["Total Time (min)"][i]
        if time not in time_recipes:
            time_recipes[time] = [df_recipes["Recipe Name"][i]]
        else:
            time_recipes[time].append(df_recipes["Recipe Name"][i])
            
    time_recipes = dict(sorted(time_recipes.items()))

In [384]:
def create_recipe_review(df_recipes):
    for i in range(len(df_recipes)):
        recipe_review[df_recipes["Recipe Name"][i]] = df_recipes["Review Count"][i]

In [385]:
def create_user_review(df_review):
    global user_review
    if os.path.exists("dataset/user_reviews.pkl"):
        with open("dataset/user_reviews.pkl", 'rb') as arq:
            user_review =  pkl.load(arq)
    else:
        for i in range(len(df_review)):
            user = df_review["Name"][i]
            user_review[user] = {}
            for j in range(6):
                rank = df_review[str(j)][i]
                if rank == " ":
                    user_review[user][str(j)] = []
                elif ',' in rank:
                    list_recipes = rank.split(",")
                    user_review[user][str(j)] = list_recipes
                else:
                    user_review[user][str(j)] = [rank]
        with open("dataset/user_reviews.pkl", 'wb') as arq:
            pkl.dump(user_review, arq)

In [386]:
def plot_distributions(dic, title, xlabel, ylabel, number, orientation, x, y, fontesize):
    x_values = [i for i in dic if len(dic[i]) > number]
    y_values = [len(dic[i]) for i in dic if len(dic[i]) > number]
    
    plt.figure(figsize=(x, y))
    plt.bar(x_values, y_values, color='blue')                
    plt.title(title)
    plt.xticks(rotation=orientation, fontsize=fontesize)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [387]:
def plot_top10_distributions(dic, top, title, xlabel, ylabel, orientation, x, y, is_list):
    if is_list:
        new_dic = dict(sorted(dic.items(), key=lambda x: len(x[1]), reverse=True))
        x_values = []
        y_values = []
        count = 0
        for i in new_dic:
            x_values.append(i)
            y_values.append(len(new_dic[i]))
            if count == top:
                break
            count += 1
    else:
        new_dic = dict(sorted(dic.items(), key=lambda x: x[1], reverse=True))
        x_values = []
        y_values = []
        count = 0
        for i in new_dic:
            x_values.append(i)
            y_values.append(new_dic[i])
            if count == top:
                break
            count += 1
    
    plt.figure(figsize=(x, y))
    plt.bar(x_values, y_values, color='blue')                
    plt.title(title)
    plt.xticks(rotation=orientation)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()


In [388]:
def plot_pca(df_recipes):
    variables = ["Total Time (min)", "Review Count"]

    x = df_recipes[variables]

    data = StandardScaler().fit_transform(x)
    pca = decomposition.PCA()
    pca.n_components = 2
    pca_data = pca.fit_transform(data)
    percentage_var_explained = pca.explained_variance_ / np.sum(pca.explained_variance_)
    cum_var_explained = np.cumsum(percentage_var_explained)
    plt.figure(1, figsize=(6, 4))
    plt.plot(cum_var_explained, linewidth=2)
    plt.axis('tight')
    plt.grid()
    plt.xlabel('principal components')
    plt.ylabel('Cumulative variance explained')
    plt.title('PCA: components selection')
    plt.show()

In [389]:
def register():
    # print("-"*40 + " Welcome to Pendia " + "-"*40)
    # print("1- Log In;\n2- Sing In;\n0- Leave;")
    option = input("1- Log In\n2- Sing In\n0- Leave\n")
    # print(option)
    name = ""
    password = ""
    state = False
    while option != "0":
        if os.path.exists("dataset/users.pkl"):
            with open("dataset/users.pkl", 'rb') as arq:
                d_users =  pkl.load(arq)
        else:
            d_users = {}
        if option == "1" and d_users == {}:
            print("Without users")
        elif option == "1":
            while(1):
                name = input("Name: ")
                # print("Name: " + name)
                if name in d_users:
                    break
                else:
                    print("Incorret Name")
            while(1):
                password = input("Password: ")
                # print("Password: " + password)
                if d_users[name] == password:
                    break
                else:
                    print("Incorret Password")
            # print("-"*40 + " Hi " + name + " " + "-"*40)
            state = True
        elif option == "2":
            while(1):
                name = input("Name: ")
                print("Name: " + name)
                if name not in d_users:
                    password = input("Password: ")
                    print("Password: " + password)
                    d_users[name] = password
                    break
                else:
                    print("User Already exists")
            with open("dataset/users.pkl", 'wb') as arq:
                pkl.dump(d_users, arq)
            print("-"*40 + " Hi " + name + " " + "-"*40)
            state = True
        else:
            print("Invalid Option")
            
        if state:
            option = "0"
        else:
            print("1- Log In;\n2- Sing In;\n0- Leave;")
            option = input("1- Log In;\n2- Sing In;\n0- Leave;\n")

    if not state: 
        print("Bye, Have a Great Time!")
        return None
    
    return [name, password]

In [390]:
def validation_date(df_user, string_data):
    day_to_validation = []
    actual_date_obj = datetime.datetime.strptime(string_data, "%Y-%m-%d")
    for i in range(len(df_user)):
        date_obj = datetime.datetime.strptime(df_user["Expiration Date"][i], "%Y-%m-%d")
        day_to_validation.append(actual_date_obj - date_obj)
    return day_to_validation

In [391]:
def ingredients_expirated(day_to_validation, df_user):
    #print("Do you want to know what ingredients are expired and what are about to expire? (1- Yes/2- No)")
    option = input("Do you want to know what ingredients are expired and what are about to expire? (1- Yes/2- No)")
    #print(option)
    if option == "1":
        for i in range(len(day_to_validation)):
            if day_to_validation[i].days > 0:
                print("Ingredient " + df_user["Ingredients"][i] + " is expired")
            elif -15 <= day_to_validation[i].days <= 0:
                print("Ingredient " + df_user["Ingredients"][i] + " expire in " + str(abs(day_to_validation[i].days)) + " days.") 
    elif option == "2":
        return
            

In [392]:
def register_product(df_user):
    print("-"*20 + " Register Product " + "-"*20)
    
    name = input("Product Name: ")
    print("Product Name: " + name)
    quantity = input("Product Quantity: ")
    print("Product Quantity: " + quantity)
    day = input("Day of Validation: ")
    print("Day of Validation: " + day)
    month = input("Month of Validation: ")
    print("Month of Validation: " + month)
    year = input("Year of Validation: ")
    print("Year of Validation: " + year)
    
    date = year + "-" + month + "-" + day
    id = len(df_user)
    new_line = {"Ingredients": name, "Quantity": int(quantity), "Expiration Date": date, "Id": id}
    new_line = pd.DataFrame([new_line], columns=df_user.columns)
    df_user = pd.concat([df_user, new_line], ignore_index=True)
    
    return df_user
    

In [393]:
def eliminate_product(df_user):
    print("-"*20 + " Eliminate Product " + "-"*20)
    id = ""
    while(1):
        id = input("Product Id: ")
        print("Product Id: " + id)
        if int(id) not in df_user["Id"]:
            print("Product Id " + id + " doesn't exists\n")
        else:
            break
    index_line = df_user.index[df_user["Id"] == int(id)].tolist()[0]
    df_user = df_user.drop(index_line)
    print("Product " + df_user["Ingredients"][index_line] + " eliminated")
    
    
    return df_user

In [394]:
def modelo_previsão(user, d_recipe_prod):
    dic = {}
    user_login = {}
    done = False
    user_rankings = []
    # ver se o utilizador já avaliou aquela receita, se sim não faz sentido estar a tentar encontrar um valor para ela
    for recepie in d_recipe_prod.keys():
        for rank in user_review[user].keys():
            for i in user_review[user][rank]:
                user_rankings.append(i)
                if recepie in user_review[user][rank]:
                    user_login[recepie] = rank
                    done = True
                    break
    
    for name_user in user_review.keys():
        intersecao = 0
        uniao = 0
        valor = 0
        lista_user = []
        if name_user != user:
            for rank in user_review[name_user].keys():
                for i in user_review[name_user][rank]:
                    lista_user.append(i)
            intersecao = len([set(lista_user) & set(user_rankings)])
            uniao = len(list(set(lista_user) | set(user_rankings)))
            valor = intersecao/uniao
            dic[name_user] = valor
    dicionario_ordenado = dict(sorted(dic.items(), key=lambda item: item[1], reverse=True))

    if done == False:
        x = 0
        for recepie in d_recipe_prod.keys():
            total = 0.0
            total_2 = 0.0
            for name_user in user_review.keys():
                if name_user != user:
                    for rank in user_review[name_user].keys():
                        for i in user_review[name_user][rank]:
                            if recepie == i and x < 6:
                                total += dicionario_ordenado[name_user] * rank
                                total_2 += dicionario_ordenado[name_user]
                                x += 1

            user_login[recepie] = total_2 # dicionario com {receita1: modelo de previsao, } para o user que esta loged in   

    dicionario_final = dict(sorted(user_login.items(), key=lambda item: item[1], reverse=True)) # {dicionario acima ordenado
    return dicionario_final

In [None]:
def time_to_cook():
    day = input("Day: ")
    hours = input("Hours: ")
    minutes = input("Minutes: ")
    total_time = int(day) * 24 * 60 + int(hours) * 60 + int(minutes)
    print("Time to spent doing the recipe.\nDay: " + day + "\nHours: " + hours + "\nMinutes: " + minutes)

    array_recipes_time = []
    for time in time_recipes:
        if total_time <= time:
            for recipe in time_recipes[time]:
                array_recipes_time.append(recipe)
    return array_recipes_time

In [None]:
def missing_ingridients(array_recipes_time, df_recipes, df_user):
    d_recipe_prod = {}
    ingredients = [i for i in df_user["Ingredients"]]
    
    count = 0
    for recipe in array_recipes_time:
        line = df_recipes.index[df_recipes["Recipe Name"] == recipe].tolist()[0]
        r_ingredients = df_recipes["Ingredients"][line].split(",")
        # if count == 0:
        #     print(r_ingredients)
        #     print(ingredients)
        count += 1
        aux = []
        for ing in r_ingredients:
            if ing not in ingredients:
                aux.append(ing)
        if len(aux) <= 2:
            d_recipe_prod[recipe] = aux

In [395]:
def make_recipe(df_recipes, df_user, name):
    print("-"*20 + " Make Recipe " + "-"*20)
    
    array_recipes_time = time_to_cook()
    d_recipe_prod = missing_ingridients(array_recipes_time, df_recipes, df_user)
    d_recipes_predicted = modelo_previsão(name, d_recipe_prod)
        

In [396]:
def rank_recipe(name, df_recipes):
    print("-"*20 + " Rank a Recipe " + "-"*20)
    print("Do you wanna know what recipes are available?(1- Yes/2- No)")
    option = input("Do you wanna know what recipes are available?(1- Yes/2- No)")
    print(option)
    if option == "1":
        columns = ["Recipe Name", "RecipeID"]
        df_aux = df_recipes[columns]
        display(df_aux)
    
    recipe_id = input("Choose a RecipeID: ")
    print("Choose a RecipeID: " + recipe_id)
    recipe_name = df_recipes.loc[df_recipes["RecipeID"] == int(recipe_id), "Recipe Name"].values[0]
    
    already_ranked = False
    rank = -1
    for ranks in user_review[name]:
        if recipe_name in user_review[name][ranks]:
            already_ranked = True
            rank = ranks
            break
    if already_ranked:
        print("You already ranked the recipe " + recipe_name + " with " + str(rank) + ". you want to change it?(1- Yes/2- No)")
        change = input("You already ranked the recipe " + recipe_name + " with " + str(rank) + ". you want to change it?(1- Yes/2- No)")
        if change == "1":
            user_review[name][rank].remove(recipe_name)
            while(1):
                new_rank = input("Rank (0-5): ")
                print("Rank(0-5): " + new_rank)
                if new_rank == "0" or new_rank == "1" or new_rank == "2" or new_rank == "3" or new_rank == "4" or new_rank == "5":
                    user_review[name][new_rank].append(recipe_name)
                    break
                else:
                    print("Invalid Option")
    else:
        while(1):
            new_rank = input("Rank (0-5): ")
            print("Rank (0-5): " + new_rank)
            if new_rank == "0" or new_rank == "1" or new_rank == "2" or new_rank == "3" or new_rank == "4" or new_rank == "5":
                user_review[name][new_rank].append(recipe_name)
                break
            else:
                print("Invalid Option")

    with open("dataset/user_reviews.pkl", 'wb') as arq:
        pkl.dump(user_review, arq)

In [397]:
def choose_options(df_user, df_recipes, name, path):
    while(1):
        #print("1- Register Product\n2- Eliminate Product\n3- Make Recipe\n4- Rank a Recipe\n5- Leave\n")
        option = input("1- Register Product\n2- Eliminate Product\n3- Make Recipe\n4- Rank a Recipe\n5- Leave")
        if option == "1":
            df_user = register_product(df_user) 
        elif option == "2":
            df_user = eliminate_product(df_user)
        elif option == "3":
            make_recipe(df_recipes, df_user, name)
        elif option == "4":
            rank_recipe(name, df_recipes, df_user)
        elif option == "5":
            df_user.to_csv(path, index=False)
            print("Bye, Have a Great Time!")
            break
        else:
            print("Invalid Option")

In [398]:
def user_interface(df_recipes):
    array_login = register()
    if array_login is None:
        return
    path = "dataset/" + array_login[0] + ".csv"
    actual_data = datetime.datetime.today()
    string_data = str(actual_data.year) + "-" + str(actual_data.month) + "-" + str(actual_data.day)
    #print("Current date: " + string_data)
    
    if os.path.exists(path):
        df_user = read_file(path)
        day_to_validation = validation_date(df_user, string_data)
        ingredients_expirated(day_to_validation, df_user)
        choose_options(df_user, df_recipes, array_login[0], path)
    else:
        print("1- Register Product\n2- Eliminate Product\n3- Make Recipe\n4- Rank a Recipe\n5- Leave\n")
        dic = {"Ingredients": [], "Quantity": [], "Expiration Date": [], "Id": []}
        df_user = pd.DataFrame(dic)
        choose_options(df_user, df_recipes, array_login[0], path)
        
    


In [399]:
def main():
    path = "dataset/"
    path2 = "dataset/user_reviews.csv"
    if os.path.exists(path + "Final_recipes.csv"):
        path += "Final_recipes.csv"
        df_recipes = read_file(path)
    else:
        path += "clean_recipes.csv"
        df_recipes = read_file(path)
        array_time = create_array_time(df_recipes)
        df_recipes["Total Time (min)"] = array_time
        array_review = create_array_review(df_recipes)
        df_recipes["Review Count"] = array_review
        df_recipes.to_csv("dataset/Final_recipes.csv", index=False)

    create_ingredients_recipes(df_recipes)
    create_author_recipes(df_recipes)
    create_time_recipes(df_recipes)
    create_recipe_review(df_recipes)
    
    df_review = read_file(path2)
    create_user_review(df_review)
    top = 10
    # plot_distributions(ingredients_recipes, "Ingredients Distribution", "Ingredients", "Number of Recipes", 100, "vertical", 15, 5, 5) 
    # plot_top10_distributions(ingredients_recipes, top, "Top 10 Ingredients Distribution", "Top 10 Ingredients", "Number of Recipes", "horizontal", 10, 5, True)
    
    # plot_distributions(author_recipes, "Author Distribution", "Authors", "Number of Recipes", 5, "vertical", 15, 5, 4) 
    # plot_top10_distributions(author_recipes, top, "Top 10 Authors Distribution", "Top 10 Authors", "Number of Recipes", "vertical", 10, 5, True)
    
    
    # time_without_0 = {chave: valor for chave, valor in time_recipes.items() if chave != list(time_recipes.keys())[0]}
    # plot_distributions(time_without_0, "Time Distribution", "Time (min)", "Number of Recipes", 5, "horizontal", 15, 5, 10) 
    # plot_top10_distributions(time_without_0, top, "Top 10 Times Distribution", "Top 10 Times (min)", "Number of Recipes", "horizontal", 10, 5, True)

    # plot_top10_distributions(recipe_review, top, "Top 10 Recipes Distribution", "Top 10 Recipes", "Review", "vertical", 10, 5, False)
    
    # plot_pca(df)   
    
    user_interface(df_recipes)

    
    

In [400]:
if __name__ == '__main__':
    main()

-------------------- Make Recipe --------------------
Time to spent doing the recipe.
Day: 1
Hours: 1
Minutes: 1
{'Vanilla Ice Cream I Recipe ': ['cream', 'lemon juice'], 'Cirak (Egg Cheese Roll) Recipe ': [], 'Fig Pie Recipe ': ['fig'], 'Italian Beer Marinated Chicken Recipe ': ['salad dressing'], 'Brioche Recipe ': [], 'Smoked Pork Butt Recipe ': ['roast', 'mexico'], 'Mock Chicken Recipe ': ['tofu'], 'Grilled Kingfish Recipe ': ['fish', 'salad dressing'], 'Herman Sourdough Starter Recipe ': [], 'Garlic Pickled Eggs Recipe ': ['vinegar'], 'Rock Candy Recipe ': [], 'Homemade Vanilla Recipe ': ['vodka']}
---------------------
{'Vanilla Ice Cream I Recipe ': 0.0, 'Cirak (Egg Cheese Roll) Recipe ': 0.0, 'Fig Pie Recipe ': 0.0, 'Italian Beer Marinated Chicken Recipe ': 0.0, 'Brioche Recipe ': 0.0, 'Smoked Pork Butt Recipe ': 0.0, 'Mock Chicken Recipe ': 0.0, 'Grilled Kingfish Recipe ': 0.0, 'Herman Sourdough Starter Recipe ': 0.0, 'Garlic Pickled Eggs Recipe ': 0.0, 'Rock Candy Recipe ': 0