# Imports y definiciones

In [27]:
import os
import json
import numpy as np
import pandas as pd
from ast import literal_eval
from collections import defaultdict

In [2]:
databases = 'C:/Users/Chris-Brota/Desktop/Databases'
display(os.listdir(databases))

# Se elige el JSON
db = f'{databases}/{os.listdir(databases)[1]}'

['foodb_2020_04_07_csv',
 'foodb_2020_04_07_json',
 'foodb_server_dump_2020_4_21.sql',
 'FoodData_Central_foundation_food_csv_2021-04-28']

# Revisión de componentes de las bebidas energéticas

In [3]:
# Poblando el diccionario de información
energyzer_fold = 'bebidas'
energyzer_files = [f'{energyzer_fold}/{i}' for i in os.listdir(energyzer_fold)]
energyzer_dict = defaultdict(list)

for name in energyzer_files:
    with open(name, 'r', encoding='utf8') as file:
        value_total = 0
        
        # Obtener la dosis
        dosis = file.readline().strip().split(';')
        
        # Saltar la primera linea
        file.readline()
        
        # Agregar los nombres
        energyzer_dict['"Name"'].append(name)
        
        # Modificando la dosis total
        if dosis[1] == 'g':
            dosis_total = float(dosis[0])
        elif dosis[1] == 'mg':
            dosis_total = float(dosis[0]) / 1e3
        elif dosis[1] == 'ug':
            dosis_total = float(dosis[0]) / 1e6
        
        # Agregando la dosis al registro
        energyzer_dict['"Size"'].append(dosis_total)
        
        # Revisando los datos
        for line in file:
            data_line = line.strip().split(';')
            
            # Obtención del gramaje
            gram_unit = data_line[2].lower()
            
            # Modificando el gramaje
            if gram_unit == 'g':
                value_to = float(data_line[1])
            elif gram_unit == 'mg':
                value_to = float(data_line[1]) / 1e3
            elif gram_unit == 'ug':
                value_to = float(data_line[1]) / 1e6
            
            # Normalizando por la dosis total
            value_perc = value_to / dosis_total
            
            # Agregando al diccionario
            energyzer_dict[data_line[0]].append(value_perc)

In [4]:
# Definición de las componentes que aportan
interest_compounds = list()

for key in energyzer_dict.keys():
    if key != '"Name"':
        # Si es que al menos tiene algo que aportar, se agrega
        if sum(energyzer_dict[key]) > 0:
            interest_compounds.append(key)

In [5]:
interest_compounds

['"Size"',
 '"Fat"',
 '"Protein"',
 '"Carbohydrate"',
 '"Water"',
 '"Caffeine"',
 '"Sugars"',
 '"Calcium, Ca"',
 '"Iron, Fe"',
 '"Magnesium, Mg"',
 '"Phosphorus, P"',
 '"Potassium, K"',
 '"Sodium, Na"',
 '"Zinc, Zn"',
 '"Copper, Cu"',
 '"Selenium, Se"',
 '"Vitamin C"',
 '"Thiamin"',
 '"Riboflavin"',
 '"Niacin"',
 '"Vitamin B6"',
 '"Folate"',
 '"Vitamin B12"',
 '"Choline"',
 '"Vitamin K"',
 '"Folic acid"',
 '"Folate, DFE"',
 '"Vitamin B12, added"',
 '"Net carbs"']

# Obtener macro categorías para eliminarlas

In [6]:
macro_categories = set()
with open('bebidas/energy_drink.csv', 'r', encoding='utf8') as file:
    # Saltarse las 2 primeras líneas
    file.readline()
    file.readline()
    
    for line in file:
        info_to_append = line.strip().split(';')[-2:]
        macro_categories.add(info_to_append[0])
        macro_categories.add(info_to_append[1])
        
# Se agregan los net carbs porque son un resumen de los carbohidratos
macro_categories.add('Net carbs')
macro_categories.add('Carbohydrate')
print(macro_categories)

{'Vitamin E (alpha-tocopherol)', 'Fat', 'Monounsaturated fatty acids', 'Vitamin A, RAE', 'Vitamin B12', '-', 'Saturated fatty acids', 'Folate, DFE', 'Net carbs', 'Carbohydrate'}


# Obtener los códigos de los componentes de interés

In [7]:
# Lectura del excel de codificación de las componentes
code_data = pd.read_excel('drink_comps_codes.xlsx')

# Limpiar los datos del archivo antes de partir
with open('interest_drink_comps_codes.csv', 'w', encoding='utf8') as file:
    pass

for i in range(code_data.shape[0]):
    if (f'\"{code_data["Nombre del componente"][i]}\"' in interest_compounds
        and code_data["Nombre del componente"][i] not in macro_categories):
        with open('interest_drink_comps_codes.csv', 'a', encoding='utf8') as file:
            file.write(code_data.loc[[i]].to_csv(sep=';', header=False, index=False).strip())
            file.write('\n')

# Filtro de componentes de interés

In [9]:
# Definición del diccionario de salida
interest_energyzer_dict = dict()

# Definición de los parámetros de interés
interest_keys = list()

with open('interest_drink_comps_codes.csv', 'r', encoding='utf8') as file:
    for line in file:
        interest_keys.append(f'\"{line.strip().split(";")[0]}\"')

# Para cada key, revisar si es que está en la lista y agregarlo al diccionario
# de interés
for key in energyzer_dict.keys():
    if key in ['"Name"', '"Size"'] or key in interest_keys:
        interest_energyzer_dict[key] = energyzer_dict[key]

# Test de sanidad para gramajes por bebida

In [11]:
for num, name in enumerate(interest_energyzer_dict['"Name"']):
    keys_to_rev = list(interest_energyzer_dict.keys())[2:]
#     print(keys_to_rev)
    
    # Cantidad de gramos por componentes
    grams_drink = 0
    
    for key in keys_to_rev:
        grams_drink += interest_energyzer_dict[key][num]
        
    print(name, grams_drink)

bebidas/energy_drink.csv 1.0001171748387099
bebidas/energy_drink_full_throttle.csv 0.9971819883870967
bebidas/energy_drink_monster.csv 0.996079283870968
bebidas/energy_drink_mountain_dew_amp.csv 0.9972590506854838
bebidas/energy_drink_nos.csv 0.9982144846774194
bebidas/energy_drink_no_fear.csv 0.9881232202419352
bebidas/energy_drink_no_fear_motherload.csv 0.9881232202419352
bebidas/energy_drink_ocean_spray_cranenergy_juice_drink.csv 1.0000881504032255
bebidas/energy_drink_red_bull.csv 1.0001171748387099
bebidas/energy_drink_rockstar.csv 0.9974657514112905
bebidas/energy_drink_sobe_energize_energy_juice_drink.csv 0.9893096774193548
bebidas/energy_drink_vault.csv 0.9976112157258066
bebidas/energy_drink_xs.csv 0.9874392880645161
bebidas/energy_drink_xs_gold_plus.csv 0.9874392880645161


## OJO

La suma de la masa de los componentes no dan exactos con la porción total a analizar. 
**Por revisar si es que hay componentes que se forman como suma de otros.**

# Registro en archivo

In [25]:
with open('results/summary_drinks_dict.txt', 'w', encoding='utf8') as file:
    file.write(str(interest_energyzer_dict))

# Definición de la tabla objetivo

In [30]:
with open('results/objective_table.csv', 'w', encoding='utf8') as file:
    file.write('Compound;proportion\n')
    
    for key in interest_energyzer_dict.keys():
        if key not in ['"Name"', '"Size"']:
            mean_value = np.mean(interest_energyzer_dict[key])
            file.write(f'{key};{mean_value}\n')

# Búsqueda de alimentos energéticos por descripción

In [16]:
with open(f'{db}/Food.json', 'r', encoding='utf8') as file:
    # Definición de la lista de alimentos energéticos 
    energetic_foods = list()
    
    keyword = 'energ'
    
    # Para cada línea en el archivo
    for line in file:
        data = json.loads(line.strip())
        
        if data['description'] is not None:
            if keyword in data['description']:
                energetic_foods.append((data['id'], data['name'], data['public_id']))

In [17]:
energetic_foods

[(246, 'Triticale', 'FOOD00246'),
 (670, 'Sugar', 'FOOD00651'),
 (676, 'Sugar substitute', 'FOOD00656'),
 (727, 'Energy drink', 'FOOD00703'),
 (779, 'Nutritional drink', 'FOOD00754'),
 (848, 'Trail mix', 'FOOD00819'),
 (893, 'Milk and milk products', 'FOOD00863'),
 (905, 'Root vegetables', 'FOOD00873'),
 (947, 'Guarana', 'FOOD00915'),
 (982, 'Cottage cheese', 'FOOD00950')]

In [18]:
energetic_list = ['FOOD00336', 'FOOD00336', 'FOOD00168', 'FOOD00058', 'FOOD00572', 
                  'FOOD00068', 'FOOD00367', 'FOOD00219', 'FOOD00723', 'FOOD00206', 
                  'FOOD00555', 'FOOD00109', 'FOOD00379', 'FOOD00159', 'FOOD00647', 
                  'FOOD00165', 'FOOD00548', 'FOOD00183', 'FOOD00040']

# Agregando las encontradas anteriormente
energetic_list.extend(['FOOD00246', 'FOOD00873', 'FOOD00915'])

# Búsqueda de alimentos energéticos por nutrientes o principios activos

In [19]:
# Definición de la lista de componentes
compound = ['FDB003010', 'FDB009020', 'FDB000571', 'FDB016592', 'FDB013228', 'FDB012292', 
            'FDB014504', 'FDB001108', 'FDB023903', 'FDB016251', 'FDB000572', 'FDB012738', 
            'FDB013567', 'FDB003518', 'FDB024414', 'FDB003521', 'FDB031212', 'FDB011795', 
            'FDB003523', 'FDB003191', 'FDB000455', 'FDB000453', 'FDB014601', 'FDB014797', 
            'FDB002251', 'FDB022939', 'FDB001014', 'FDB000574', 'FDB017534', 'FDB023125', 
            'FDB001977', 'FDB003729']

In [20]:
# Pasando a número
with open(f'{db}/Compound.json', 'r', encoding='utf8') as file:
    # Definición de la lista de las componentes
    compound_num = list()
    
    # Para cada línea en el archivo
    for line in file:
        data = json.loads(line.strip())
        
        if data['public_id'] in compound:
            compound_num.append((data['id'], data['public_id']))

In [21]:
comp_only_num = [i[0] for i in compound_num]
print(comp_only_num)

[453, 455, 571, 572, 574, 1014, 1108, 1977, 2251, 3011, 3192, 3522, 3524, 3730, 9021, 11798, 12295, 12741, 13231, 13570, 14507, 14604, 14800, 16258, 16599, 17541, 23102, 23288, 24066, 24577, 31375]


In [22]:
energetic_list_ppo = list()

# Revisando ahora en la base de contenidos
with open(f'{db}/Content.json', 'r', encoding='utf8') as file:
    # Definición de la lista de alimentos energéticos 
    energetic_foods_num = list()
    
#     i = 0
    # Para cada línea en el archivo
    for line in file:
        data = json.loads(line.strip())
        
#         print(data)
#         i+=1
#         if i > 1:
#             break
            
        if data['source_id'] in comp_only_num:
            energetic_foods_num.append(data['food_id'])

In [94]:
len(set(energetic_foods_num))

967

# Obtener las tablas de cada alimento