In [283]:
import numpy as np
import pandas as pd
from datetime import date
import pickle as pkl
import os
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [284]:
ingredients_recipes = {}
author_recipes = {}

In [285]:
def read_file(path):
    file = pd.read_csv(path)
    df = pd.DataFrame(file)
    return df

In [286]:
def treat_time(df):
    array_time = []
    print(len(df))
    for i in range(len(df)):
        time = df["Total Time"][i]
        if time == "X":
            array_time.append(0)
        elif len(time) == 3:      # 1 h ou 2 m                              
            divide = time.split(" ")
            if divide[1] == "h":
                array_time.append(int(divide[0]) * 60)
            elif divide[1] == "m":
                array_time.append(int(divide[0]))
        elif len(time) == 4:    #30 m ou 10 h
            if 'm' in time:
                array_time.append(int(time.split(" m")[0]))
            elif 'h' in time:
                array_time.append(int(time.split(" h")[0]) * 60)
        elif len(time) == 8 or len(time) == 7:
            t = 0
            if 'd' in time: #1d 1h ou 1d 30m
                divide = time.split(" d ")
                t += int(divide[0]) * 60 * 24
                if 'm' in divide[1]:
                    t += int(divide[1].split(" m")[0])
                elif 'h' in divide[1]:
                    t += int(divide[1].split(" h")[0]) * 60
            elif 'h' in time:
                divide = time.split(" h ")
                t += int(divide[0]) * 60 + int(divide[1].split(" m")[0])
            array_time.append(t)
    print(len(array_time))
    print(array_time)

In [287]:
def create_ingredients_recipes(df):
    for i in range(len(df)):
        ingredients = df["Ingredients"][i]
        list_ingred = ingredients.split(',')
        for ing in list_ingred:
            if ing not in ingredients_recipes:
                ingredients_recipes[ing] = [df["Recipe Name"][i]]
            else:
                ingredients_recipes[ing].append(df["Recipe Name"][i])


In [288]:
def create_author_recipes(df):
    for i in range(len(df)):
        author = df["Author"][i]
        if author not in author_recipes:
            author_recipes[author] = 1
        else:
            author_recipes[author] += 1

In [289]:
def plot_distributions():
    x_values1 = [ingredients for ingredients in ingredients_recipes if len(ingredients_recipes[ingredients]) > 100]
    y_values1 = [len(ingredients_recipes[ingredients]) for ingredients in ingredients_recipes if len(ingredients_recipes[ingredients]) > 100]
    plt.figure(figsize=(15, 5))
    plt.bar(x_values1, y_values1, color='blue')                
    plt.title("Ingredients distributions")
    plt.xticks(rotation='vertical', fontsize=5)
    plt.xlabel("Ingredients")
    plt.ylabel("Number of recipes")
    plt.show()

    # if len(author_recipes[author_recipes]) > 100
    x_values2 = [author for author in author_recipes if author_recipes[author] > 5]
    y_values2 = [author_recipes[author] for author in author_recipes if author_recipes[author] > 5]

    plt.figure(figsize=(15, 5))
    plt.bar(x_values2, y_values2, color='blue')                
    plt.title("Author distributions")
    plt.xticks(rotation='vertical', fontsize=4)
    plt.xlabel("Author")
    plt.ylabel("Number of recipes")
    plt.show()

In [290]:
def plot_pca(df):
    variaveis_selecionadas = ["Total Time", "Ingredients", "Review Count"]

    x = df[variaveis_selecionadas]

    x_scaled = StandardScaler().fit_transform(x)

    pca = PCA(n_components=2)
    x_pca = pca.fit_transform(x_scaled)

    plt.scatter(x_pca[:, 0], x_pca[:, 1])
    plt.xlabel("Principal Component 1")
    plt.ylabel("Principal Component 2")
    plt.title("PCA of Receitas Dataset")
    plt.show()

In [291]:
def user_interface():
    print("-"*40 + " Welcome to Pendia " + "-"*40)
    print("1- Log In;\n2- Sing In;\n0- Leave;")
    option = input("1- Log In\n2- Sing In\n0- Leave\n")
    print(option)
    name = ""
    password = ""
    while option != "0":
        state = False
        if os.path.exists("users.pkl"):
            with open("users.pkl", 'rb') as arq:
                d_users =  pkl.load(arq)
        else:
            d_users = {}
        if option == "1" and d_users == {}:
            print("Without users")
        elif option == "1":
            while(1):
                name = input("Name: ")
                print("Name: " + name)
                if name in d_users:
                    break
                else:
                    print("Incorret Name")
            while(1):
                password = input("Password: ")
                print("Password: " + password)
                if d_users[name] == password:
                    break
                else:
                    print("Incorret Password")
            print("-"*40 + " Hi " + name + " " + "-"*40)
            state = True
        elif option == "2":
            while(1):
                name = input("Name: ")
                print("Name: " + name)
                if name not in d_users:
                    password = input("Password: ")
                    print("Password: " + password)
                    d_users[name] = password
                    break
                else:
                    print("User Already exists")
            with open("users.pkl", 'wb') as arq:
                pkl.dump(d_users, arq)
            print("-"*40 + " Hi " + name + " " + "-"*40)
            state = True
        else:
            print("Invalid Option")
        
        if state:
            option = "0"
        else:
            print("1- Log In;\n2- Sing In;\n0- Leave;")
            option = input("1- Log In;\n2- Sing In;\n0- Leave;\n")
            print(option)
    if option != "0":
        path = "dataset/" + name + ".csv"
        df = read_file(path)
        display(df)
    else: 
        print("Bye, Have a Great Time!")
        return
    

        
    


In [292]:
def main():
    path = "dataset/clean_recipes.csv"
    df = read_file(path)
    treat_time(df)
    # create_ingredients_recipes(df)
    # create_author_recipes(df)
    # plot_distributions() 
    # plot_pca(df)   
    # user_interface()

    
    

In [293]:
if __name__ == '__main__':
    main()

12351
12185
[190, 80, 90, 75, 100, 125, 145, 185, 35, 55, 30, 20, 105, 35, 150, 145, 60, 65, 185, 185, 50, 60, 195, 50, 30, 185, 45, 185, 185, 80, 95, 360, 185, 185, 95, 65, 185, 185, 185, 185, 25, 70, 160, 170, 40, 30, 70, 1460, 50, 195, 150, 185, 185, 185, 185, 185, 160, 185, 185, 185, 185, 185, 25, 50, 57, 185, 185, 205, 100, 85, 0, 0, 0, 0, 0, 0, 0, 35, 40, 0, 0, 35, 75, 0, 0, 0, 160, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 70, 0, 0, 120, 55, 85, 0, 0, 0, 90, 75, 0, 0, 0, 0, 215, 75, 235, 180, 0, 0, 0, 0, 0, 0, 90, 0, 0, 0, 60, 22, 0, 180, 0, 0, 85, 0, 0, 0, 45, 0, 120, 40, 0, 0, 0, 75, 0, 0, 0, 30, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 70, 0, 135, 0, 60, 0, 0, 120, 0, 0, 60, 0, 90, 0, 0, 0, 190, 0, 0, 120, 0, 100, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 120, 50, 0, 0, 0, 0, 0, 0, 0, 0, 120, 265, 0, 60, 45, 0, 0, 0, 240, 0, 0, 60, 0, 0, 0, 0, 85, 105, 0, 0, 0, 0, 0, 0, 0, 40, 0, 70, 90, 90, 0, 90, 165, 0, 0, 20, 0, 4380, 7