In [None]:
import pandas as pd

from pathlib import Path

DATA_PATH = '../../data'
Path(DATA_PATH+'/main').mkdir(parents=True, exist_ok=True)

# Food intake records

Reading file output from AromiDiet, and processing for later analysis.

In [None]:
#full list of food intakes
food_records = pd.read_excel(
    DATA_PATH+'/RAW/aromi_product_ingredient.xlsx', 
    sheet_name='Tutkimusraportti',
).drop(
    columns=[
        'Syntymäaika',
        'Ryhmätagi',
        'Ruokapäiväkirjaryhmä',
        'Ruokapäiväkirjan nimi',
    ]
)

## Animal Source Energy Proportion

In [None]:
food_records['timestamp'] = pd.to_datetime (
    food_records.Ruokailuaika
)
food_records.drop(columns=['Ruokailuaika'], inplace=True)

food_records['food_code'] = food_records['Tuotetunnus'].fillna(food_records['Reseptin/tuotteen tunnus'])

#manually created list of animal proportion in food items
fap = pd.read_excel(
    DATA_PATH+'/public/food_animal_proportion.xlsx', 
    sheet_name='manual_entry',
    usecols=('food_code','food_animal_proportion')
)

food_records = food_records.merge(
    fap, 
    left_on='food_code', 
    right_on='food_code', 
    how='left'
).copy() #getting rid of fragmentation warning

food_records['ase'] = food_records.ENERC * food_records.food_animal_proportion
len(food_records.index)

In [None]:
intakes_per_person = food_records.groupby(
    ['Tunnus']
).sum(
    numeric_only=True
).drop(
    columns=[
        'Kulutettu määrä',
        'Käyttömäärä',
        'food_animal_proportion'
    ]
)

intakes_per_person['asep'] = intakes_per_person.ase / intakes_per_person.ENERC
len(intakes_per_person)

## Food record time indicators

In [None]:
df = food_records[['Tunnus','timestamp']].copy()
df['date'] = df.timestamp.dt.date

fr_dates = df.groupby('Tunnus').agg({'timestamp': ['min', 'max'], 'date': 'nunique'})
fr_dates.columns = ('fr_start','fr_end','fr_days')
#fr_dates['fr_duration'] = fr_dates.fr_end - fr_dates.fr_start

intakes_per_person = intakes_per_person.merge(
    fr_dates,
    left_index=True,
    right_index=True,
)
len(intakes_per_person)

## Normalising intake metrics

In [None]:
#essential micronutrients are total sums, they need to be normalised to daily intakes

#we are divinding the total intake in the food records by the number of distinct days
micro_nutrients = [
    'VITC',
    'F20D5N3',
    'F22D6N3',
    'FE',
    'FOL',
    'CA',
    'F18D2CN6',
    'F18D3N3'
]

micro_means = pd.DataFrame()

for k in micro_nutrients:
    micro_means[f'{k}_daily_mean'] = (intakes_per_person[k] / intakes_per_person.fr_days)

In [None]:
#energy densities of macronutrients

e_densities = {
    'FAT': 37,
    'FAPU': 37,
    'FASAT': 37,
    'CHOAVL': 17,
    'PROT': 17,
    'FIBC': 8,
}

e_proportions = pd.DataFrame()

for k,v in e_densities.items():
    # add a column for the total energy from macro nutrient k
    e_proportions[f'e-{k}'] = intakes_per_person[k] * v
    # calculcate energy proportion of k
    e_proportions[f'ep-{k}'] = e_proportions[f'e-{k}'] / intakes_per_person.ENERC

In [None]:
#CHOLE as mg/MJ
CHOLE_per_e = (
    intakes_per_person.CHOLE / (intakes_per_person.ENERC / 1000)
).to_frame(
        name='CHOLE_per_e'
) #mg/MJ

In [None]:
FIBC_per_e = (
    intakes_per_person.FIBC / (intakes_per_person.ENERC / 1000)
).to_frame(
        name='FIBC_per_e'
) #g/MJ

In [None]:
intakes_per_person = pd.concat(
    [
        intakes_per_person,
        micro_means,
        e_proportions,
        CHOLE_per_e,
        FIBC_per_e
    ],
    axis=1
)

## Exporting to excel file

In [None]:
intakes_per_person.index.names = ['id']

intakes_per_person.to_excel(
    DATA_PATH+'/main/intakes_detailed_with_asep.xlsx'
)