# Nutrition Notebook

This notebook defines functions and queries for working with recommended daily intake of various nutrients.

## Linear Optimization Functions

The following functions help to construct and solve a linear optimization problem using nutrition data. We assume the data is in the form of a dataframe where each row represents a food item, and columns represent macronutrients, micronutrients, and other facts. The constraint matrix will be built in the same format, meaning that the actual constraints are the *columns,* not the *rows,* of the constraint matrix. In other words, the matrix we build is actually the transpose of the matrix we want.

### define_objective_function(data, nutrient, minimize=True)

Returns the coefficients for an objective function that minimizes the specified nutrient. If `maximize` is `True`, returns instead the negated coefficients, which makes this instead a maximization problem.

In [None]:
def define_objective_function(data, nutrient, maximize=False):
    objective = data.loc[:, nutrient].to_numpy()    
    
    if maximize:
        objective = np.multiply(objective, -1)
        
    return objective

### initialize_constraints_with_rdi(data)

#### Usage

```constraints = initialize_constraints_with_rdi(data)```

#### Description

Takes a dataframe whose rows represent food items, and having columns named `pct_*` that represent percentages of the Recommended Dietary Intake (RDI) of one or more nutrients, and returns a dataframe with linear constraints as columns, and an array of upper bounds as its last row. Assuming that your objective involves meeting the RDI, these can be used as the basis of a linear optimization by adding more constraints or by defining the desired objective function.

In [17]:
def initialize_constraints_with_rdi(data, days=1):
    # Fields named like pct_foo are assumed to mean "percent of RDI for nutrient foo"
    nutrients = [ field for field in data.columns if field[:4] == 'pct_' ]

    # Require 100% of every nutrient with an RDA. Since the optimizer treats
    # constraints as "less than," we need to negate everything
    constraints = np.multiply( data.loc[:, nutrients].to_numpy(), -1 )
    bounds = [ -1 * days for row in constraints.T ]
    
    # Convert these arrays into a dataframe
    df = pd.DataFrame(constraints)
    df.columns = [x + '_rdi' for x in nutrients]
 
    # Append the bounds as the last row and set the index values
    df.loc[len(df)] = bounds
    df.set_index(np.array(data['food_code'].values.astype(str).tolist() + ['upper_bounds']))

    return df

### add_constraint(data, constraints=None, nutrient=None, min=None, max=None, days=1)

Takes the given data and constraints, and adds new constraints for the specified nutrient. If `min` is specified, a constraint is added requiring that the total for `nutrient` must be at least `min`. Similarly if `max` is specified, a constraint is added requiring that the total be no more than `max`.

If nutrient isn't specified, or if both `max` and `min` are `None`, then the constraints are returned unchanged.

In [8]:
# Function for adding constraints to the nutrition LP
def add_constraint(data, constraints=None, nutrient=None, min=None, max=None, days=1):
    if not nutrient:
        return(constraints)
    if nutrient not in data.columns:
        return(constraints)

    lower = data[[nutrient]].copy()
    upper = data[[nutrient]].copy()

    if min is not None:
        lower = lower.append({nutrient: min*days}, ignore_index=True)
        constraints = pd.concat([constraints.reset_index(drop=True), -lower], axis = 1)
        constraints.columns.values[-1] = nutrient + '_min'

    if max is not None:
        upper.loc[len(upper)] = max * days
        constraints = pd.concat([constraints.reset_index(drop=True), upper], axis = 1)
        constraints.columns.values[-1] = nutrient + '_max'
    
    return(constraints)

### add_weight_constraint(data, constraints=[], bounds=[], regex=r'', limits=[], min=None, max=None)

This method adds a constraint on the `min` and/or `max` weight for all food items matching `regex`. It then returns the updated `constraints` and `bounds`. The `min` and `max` weight are specified in grams.

The purpose of `limits` is to keep track of per-food-item limits, in case the user wants to disallow some food items completely.

In [9]:
# TODO: Fix this to use the new scheme where constraints are a dataframe

# Function for adding a constraint on the total weight of specified foods
def add_weight_constraint(data, constraints=[], bounds=[], regex=r'', limits=[], min=None, max=None):
    matched = data['food_code'].astype(str).str.match(regex)
    limits = [(0,max/100) if matched[i] else limits[i] for i in range(len(matched))]
    
    coefs = np.transpose([1 if matched[i] else 0 for i in range(len(matched))])
    
    # Minimum...
    if min:
        constraints = np.c_[constraints, -1 * coefs]
        bounds.append(-1 * min / 100)

    # Maximum...
    if max:
        constraints = np.c_[constraints, +1 * coefs]
        bounds.append(+1 * max / 100)
    
    return(constraints, bounds, limits)

## Query Functions

This section defines functions that return SQL queries related to nutrients and recommended daily intake.

### query_food_percent_rdi(age=None, sex=None)

In [10]:
def query_food_percent_rdi(age=None, sex=None):
    if not (age and sex):
        return None

    return("""
        SELECT * FROM contrib.food_dri_pct_view
        WHERE sex = '%s'
        AND   type = 'recommended'
        AND   age_from = (
                SELECT MAX(age_from)
                FROM contrib.food_dri_pct_view
                WHERE age_from <= %s AND sex = '%s'
              )
    """ % (sex, age, sex))


### query_nutrient_tolerable_upper_limits(age=None, sex=None)

In [11]:
def query_nutrient_tolerable_upper_limits(age=None, sex=None):
    if not (age and sex):
        return None

    return("""
        SELECT
            tagname, (tolerable_upper.amount / rec.amount) AS pct_tolerable_upper
        FROM (
                SELECT age_from, sex, tagname, amount
                FROM dietary_reference_intake.dietary_reference_intake
                WHERE type = 'tolerable_upper'
                AND age_from = (
                    SELECT MAX(age_from)
                    FROM dietary_reference_intake.dietary_reference_intake
                    WHERE age_from <= %s
                    AND sex = '%s'
                )
                AND sex = '%s'
        ) AS tolerable_upper
        JOIN dietary_reference_intake.dietary_reference_intake AS rec USING(age_from, sex, tagname)
        WHERE
            rec.type = 'recommended'
            AND (tolerable_upper.amount / rec.amount) > 1
    """ % (age, sex, sex))


## Database Functions

These functions actually execute the queries against the database, while making sure that the column names are specified correction and other creature comforts are arranged for.

### fetch_food_percent_rdi(age=None, sex=None)

In [12]:
def fetch_food_percent_rdi(schema=None, age=None, sex=None):
    if not (age and sex):
        return None
    
    args = {"age": age, "sex": sex}
    if schema:
        args["schema"] = schema

    data = fetch_dataframe(query_food_percent_rdi(**args))
    if data is None:
        return None
    
    return data

### fetch_nutrient_tolerable_upper_limits(age=None, sex=None)

In [13]:
def fetch_nutrient_tolerable_upper_limits(schema=None, age=None, sex=None):
    if not (age and sex):
        return None
    
    args = {"age": age, "sex": sex}
    if schema:
        args["schema"] = schema

    data = fetch_dataframe(query_nutrient_tolerable_upper_limits(**args))
    if data is None:
        return None
    
    # Rename nutrients to match the food data column names
    for i in range(len(data)):
        data.loc[i, "tagname"] = "pct_" + data.loc[i, "tagname"]

    return data

## Data Cleanup Functions

These functions clean up nutrition data by, e.g., imputing zero for missing nutrient information, deleting foods with no nutritional value, etc.

### impute_zero_for_missing_nutrient_data(data)

Fills in missing nutrient information with zeros and returns the result.

In [None]:
def impute_zero_for_missing_nutrient_data(data):
    return data.fillna(0, inplace=False)

### drop_foods_with_missing_nutrient_data(data)

Returns the data frame with all records removed that contain NaN.

In [None]:
def drop_foods_with_missing_nutrient_data(data):
    return data.dropna().reset_index(drop=True)

### delete_non_nutritive_foods(data)

Returns a dataframe with records removed whose nutrient value is literally zero. Assumes that nutrient information is found in columns named `pct_*` and that those columns are all contiguous. Results are unspecified for records with non-numeric values, including `NaN`, in any of those fields.

In [None]:
def delete_non_nutritive_foods(data):
    columns = [column for column in data.columns if column[:4] == 'pct_']
    first = columns[0]
    last  = columns[-1]

    data = data[np.linalg.norm(data.loc[:, first:last], axis=1) != 0]
    data.reset_index(drop=True, inplace=True)

    return data

## Data Manipulation Functions

The following sections define functions that can be used to manipulate the input data, for example by removing certain food items.

### delete_food_items(data, items)

This function takes two data frames, both of which must contain the field `food_code`, and deletes any items from the first dataframe that are found in the second with an `amount` greater than zero.

In [18]:
def delete_food_items(data, items):
    items = items[items['amount'] > 0]
    return data[ ~data['food_code'].isin(items['food_code']) ].reset_index(drop=True)

## Display Functions

The following functions display the solution to the optimization problem.

In [16]:
from scipy.optimize import OptimizeResult

# Returns macro nutrients for the solution
def get_macros(data, solution, days=1):
    if isinstance(solution, OptimizeResult):
        solution = solution.x
    
    macros = [
        np.dot(solution, data.loc[:, 'enerc_kcal'].to_numpy()),
        np.multiply(np.dot(solution, data.loc[:, 'chocdf'].to_numpy()), 4),
        np.multiply(np.dot(solution, data.loc[:, 'procnt'].to_numpy()), 4),
        np.multiply(np.dot(solution, data.loc[:, 'fat'].to_numpy()), 9),
        np.multiply(np.dot(solution, data.loc[:, 'f18d2'].to_numpy()), 9),
        np.dot(solution, data.loc[:, 'fibtg'].to_numpy()),
    ]

    macros = pd.DataFrame(macros)
    macros.index = ['Calories', 'Carb Cal', 'Protein Cal', 'Fat Cal', 'Omega-6 Cal', 'Fiber gm']
    macros.columns = ['Value']

    return(macros)

# Returns micro nutrients for the solution
def get_micros(data, solution, days=1):
    if isinstance(solution, OptimizeResult):
        solution = solution.x

    # Get the nutrients for the foods
    nutrients = data.loc[:, 'pct_fibtg':'pct_f18d3'].copy()
    columns = nutrients.columns
    
    nutrients = (nutrients.T * solution).T
    nutrients = round(nutrients, 3)
    
    # Total them and convert to conventional percents
    nutrients = np.multiply(nutrients.sum(axis=0), 100/days)
    
    # Set the index and column values
    nutrients = pd.DataFrame(nutrients)
    nutrients.index = columns
    nutrients.columns = ['Value']

    return nutrients

# Returns the actual shopping basket for the solution
def get_rations(data, solution, insert_before='enerc_kcal', truncate=True):
    if isinstance(solution, OptimizeResult):
        solution = solution.x

    soln = pd.DataFrame(solution)
    soln.columns = ['amount']

    # Insert the new column at the requested point
    i = data.columns.get_loc(insert_before)
    soln = pd.concat([data.iloc[:, :i], soln, data.iloc[:, i:]], axis = 1)

    # Multiply every column after "amount" by the solution
    i = 1 + soln.columns.get_loc('amount')
    for col in soln.columns[i:]:
        soln[col] = soln[col].multiply(solution, axis=0)

    # Convert the amount column into grams
    soln['amount'] = soln['amount'].multiply(100)

    if truncate:
        soln = soln.loc[soln['amount'] > 0]

    return soln

# Print the macros, micros, and solution
def summarize_solution(data, solution):
    if solution.success:
        data = data[solution.x > 0].reset_index(drop=True, inplace=False)
        solution = solution.x[solution.x != 0]

        print(get_macros(data, solution))
        print()
        print(get_micros(data, solution))
        print()
        print(get_rations(data, solution))
    return