In [83]:
import pandas as pd
from random import randint
import numpy as np
from collections import Counter
from computeNutriScore import computeNutriScore
import df_nutri_create as nutri

# Data path
DATA_FOLDER = './data/'

In [2]:
def Compute_energy(fat, sugar, prot, fiber, margin):
    ''' Computes energy with a margin in kJ. source : https://en.wikipedia.org/wiki/Food_energy '''

    energy = (37 * fat + 17 * (sugar + prot) + 8 * fiber) - margin
    if energy < 0:
        return 0
    return energy

def set_coherent_values(df):
    """ Compute coherent value for food values. Set incoherent values to NaN or assign a valid value. """
    for i in range(len(df)):
        
        # energy
        value = df.iat[i, 2]
        if  value < 0 or value > 4000:
            df.iat[i, 2] = np.nan
        
        # other columns
        for column in range(3, len(df.columns)):
            value = df.iat[i, column]
            if (value < 0) or (value > 100):
                df.iat[i, column] = np.nan
                
        # Salt / Sodium (should be perfectly correlated)        
        if np.isnan(df.iat[i, 6]):
            if ~np.isnan(df.iat[i, 7]):
                if ((df.iat[i, 7] * 2.5) <= 100):
                    df.iat[i, 6] = df.iat[i, 7] * 2.5 # Salt = Sodium * 2.5
                else:
                    df.iat[i, 7] = np.nan # Sodium must be a error value
        else:
            if np.isnan(df.iat[i, 7]):
                if ((df.iat[i, 6] / 2.5) <= 100):
                    df.iat[i, 7] = df.iat[i, 6] / 2.5 # Sodium = Salt / 2.5
                else:
                    df.iat[i, 6] = np.nan # Salt must be a error value
        
        # Saturated Fat / Fats                   
        if df.iat[i, 4] > df.iat[i, 3]: #saturated fat > fat
            df.iat[i, 3] = df.iat[i, 4]
        
        # Fruit-Vegetables-Nut Real / Estimate
        if ~np.isnan(df.iat[i, 8]) & np.isnan(df.iat[i, 9]):
            df.iat[i, 9] = df.iat[i, 8] # Estimate fruits-vegetables-nuts = Real
        if ~np.isnan(df.iat[i, 8]) & (df.iat[i, 9] > df.iat[i, 8]):
            df.iat[i, 9] = df.iat[i, 8]
        
        # Energy
        Fat, Sugar, Prot, Fiber = 0,0,0, 0
        if ~np.isnan(df.iat[i, 3]):
            Fat = df.iat[i, 3]
                
        if ~np.isnan(df.iat[i, 5]):
            Sugar = df.iat[i, 5]
                
        if ~np.isnan(df.iat[i, 11]):
            Prot = df.iat[i, 11]
            
        if ~np.isnan(df.iat[i, 10]):
            Fiber = df.iat[i, 10]
        
        energy_comput = Compute_energy(Fat, Sugar, Prot, Fiber, 150)
        
        if np.isnan(df.iat[i, 2]) | (df.iat[i, 2] < energy_comput):
            df.iat[i, 2] = energy_comput       
            
    return df

In [3]:
data_food_final = pd.read_csv('./data/data_food_final.csv', index_col  = [0])
data_food_final = set_coherent_values(data_food_final)

In [4]:
list_product = []
number_of_product = 1000

for index in range(number_of_product):
    list_product.append((data_food_final.iloc[[randint(0, len(data_food_final))]].index[0], randint(1, 500)))

sc_Beve, Nutri_Beve, sc_NBeve, Nutri_NBeve, df_water, df_product, df_beverages, df_non_beverages =\
    nutri.main_nutri(list_product, data_food_final)

if sc_Beve != None:
    print('The grade for your beverages is {} for a score of {}'.format(Nutri_Beve.upper(), sc_Beve))
if sc_NBeve != None:
    print('The grade for your non-beverages is {} for a score of {}'.format(Nutri_NBeve.upper(), sc_NBeve))
print('You take {} of water items'.format(len(df_water)))


The grade for your beverages is E for a score of 16
The grade for your non-beverages is C for a score of 8
You take 5 of water items


In [5]:
def relevant_tag(dic, tags, threshold = 5):
    """ Returns the least frequent tag from the list, who however has more than 'threshold' 
    correspondancies in the database """
    
    # Init
    dict_, sort = {}, []
    
    for tag in tags:
        dict_[tag] = dic[tag]
    
    # Keep only tag which have more occurencies than threshold
    dict_ = {k: v for k, v in dict_.items() if v >= threshold}
    if len(dict_) > 0:
        # Returns the most relevant tags
        sort = sorted(dict_, key = dict_.get, reverse=False)
        return sort
    else :
        return None

In [7]:
sc_Beve, Nutri_Beve, sc_NBeve, Nutri_NBeve, df_water, df_product, df_beverages, df_non_beverages =\
nutri.main_nutri([list_product[0]], data_food_final)

In [8]:
data_food_final = pd.read_csv(DATA_FOLDER + 'data_food_final.csv', index_col  = [0])

In [9]:
def list_df_tags(data_food):
    """ List of all the tags found in the dataset. Count the number of occurences and rank them
        according to this number. """
    
    # Split the tags found in the categories_tags columns
    all_tags = [tags.split(',') for tags in list(data_food[['categories_tags']].dropna().categories_tags)]
    
    # Init
    list_tags = []
    
    # List them
    for tags in all_tags:
        for tag in tags:
            list_tags.append(tag)
    
    # Return the list by number of occurences 
    return dict(Counter(list_tags).most_common())

In [10]:
dic_tag = list_df_tags(data_food_final)

In [94]:
data_food_final = pd.read_csv(DATA_FOLDER + 'data_food_final_score.csv', index_col  = [0])

In [114]:
def find_healthier_product(product_tuple, df, dic_tag):
    ''' Replace the product by a product of the same category that has a better Nutriscore '''
    # Initialize variables
    best_tag = ''
    best_sc, best_product, best_code = 0, {}, ''
    
    # A boolean to check if the product has been replaced
    replaced = False
    
    # Find the product in the dataset
    product = df.loc[product_tuple[0]]
        
    # Catch the best tag of the product
    tags = product.categories_tags.split(',')
    best_tags = relevant_tag(dic_tag, tags, 10)
    if best_tags != None:
        best_tag = best_tags[0]
    
    # Take the products which are of the same kind as the product we want to fill
    df_similar = df[df.categories_tags.str.contains(best_tag, case = False)]
    
    # Check the score & initialize params
    best_code = product_tuple[0]
    best_product = product
    best_sc = product['Predicted_NutriScore_score']
    
    for ind, row in df_similar.iterrows():
        
        # Check if one of the similar product is actually better that our product
        score = df_similar.loc[ind]['Predicted_NutriScore_score']
        
        if score < best_sc:
            best_sc = score
            best_product = df_similar.loc[ind]
            best_code = ind 
            replaced = True
       
    return replaced, best_product, best_sc, best_code

In [116]:
ideal_dict = {}

for index, product in enumerate(list_product[:5]):
    replaced, ideal_product, ideal_nutriscore, ideal_code = find_healthier_product(product, data_food_final, dic_tag)
    ideal_dict[product[0]] = (ideal_code, ideal_product, ideal_nutriscore)
    if replaced:
        print('Nous vous conseillons de remplacer le produit appelé "' + str(data_food_final.loc[product[0]][0])
              + '" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie : '
              + 'https://world.openfoodfacts.org/product/' + ideal_code)
    else:
        print('Le produit appelé "' + str(data_food_final.loc[product[0]][0])
              + '" est le meilleur de sa catégorie.')

Nous vous conseillons de remplacer le produit appelé "Ice Tea citron (12 litres)" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie : https://world.openfoodfacts.org/product/0020601407893
Nous vous conseillons de remplacer le produit appelé "Aloe water" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie : https://world.openfoodfacts.org/product/7691100952233
Nous vous conseillons de remplacer le produit appelé "Bonne Purée" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie : https://world.openfoodfacts.org/product/3250390000280
Nous vous conseillons de remplacer le produit appelé "Euro mix - melange euri (quick frozen)" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie : https://world.openfoodfacts.org/product/3256222975672
Nous vous conseillons de remplacer le produit appelé "Aceto Balsamico Di Modena" par cet autre produit, qui obtient le meilleur Nutriscore de cette catégorie :