In [1]:
%pip install eep153_tools
%pip install python_gnupg
%pip install -U gspread_pandas

# === Preprocess common data (run this once) ===
import pandas as pd
from eep153_tools.sheets import read_sheets
import re
from scipy.optimize import linprog as lp
import numpy as np

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


### Helper Function for Formatting IDs

This code defines a helper function `format_id` which takes an ID and an optional zero-padding parameter. It returns a formatted string version of the ID if possible. The function handles cases where the ID might be null, empty, or in a non-standard format. The code also sets a data URL for reference.

In [2]:
# Helper function
def format_id(id, zeropadding=0):
    if pd.isnull(id) or id in ['', '.']:
        return None
    try:
        return ('%d' % id).zfill(zeropadding)
    except TypeError:
        return id.split('.')[0].strip().zfill(zeropadding)
    except ValueError:
        return None

data_url = "https://docs.google.com/spreadsheets/d/1l0Xl1NwSRN0dPwjHRWDEnChBWqTx7VPXOnLnJKI2lAY/edit?gid=415594035#gid=415594035"

### Load and Clean Data

This code loads the original recipes data from a Google Sheet, applies formatting to specific columns using the `format_id` helper function, and renames one of the columns for clarity.


In [3]:
# Load the original recipes data from the specified Google Sheet (sheet named "recipes")
og_recipes = read_sheets(data_url, sheet="recipes")

# Clean and transform the data:
# - Apply the format_id function to 'parent_foodcode' and 'ingred_code' columns to standardize their format
# - Rename the 'parent_desc' column to 'recipe' for better clarity
og_recipes = (og_recipes
              .assign(
                  parent_foodcode=lambda df: df["parent_foodcode"].apply(format_id),
                  ingred_code=lambda df: df["ingred_code"].apply(format_id)
              )
              .rename(columns={"parent_desc": "recipe"}))

In [4]:
# Define a list of key food items to INCLUDE in ingredient descriptions
key_foods = [
    "Yogurt, Greek", "Cheese, Cottage", "lowfat, 1% Milk", "milk, lowfat" "Cheese, Parmesan", "Banana", "Apple", "Orange",
    "Avocado", "figs", "dates", "raisins", "apricots, dried", "Grapefruit", "Grapes",
    "pear", "Peach", "watermelon", "Oats", "Bread, rye", "Brown Rice", "Pasta",
    "Quinoa", "Rolled Oats", "Rice Cakes", "Whole Grain Cereal", "Special K", "Bread, whole-wheat", 
    "Bread, whole-mutligrain", "Popcorn, Air-popped",
    "whole grain pasta", "Almonds", "Peanut Butter", "Chicken", "Egg", "Tofu",
    "Lentils", "Beans, Black", "Tuna", "Salmon", "Soup, Bean ",
    "Steak", "Tilapia", "Pork", "Venison", "Cod", "Ground turkey", "turkey, Ground",
    "beef, ground", "Ground beef", "Tempeh", "chickpea", "beans, kidney", "Sweet Potato", "Potato",
    "Spinach", "Broccoli", "Bell Pepper", "Carrot", "Beets", "peas", "Tomato",
    "Creatine", "Omega-3", "BCAAs", "Blueberries", "Strawberries", "juice, raw",
    "Garlic", "Lemon", "Onion", "Asparagus", "kale", "collards", "chard, swiss", "protein powder", "brussel sprouts", "oat",
    "sunflower", "salmon", "tuna", "mackerel", "fish", "chip"
    
]

# Define a list of foods or terms to EXCLUDE
key_excludable = [
    "sugar", "syrup", "soda", "candy", "artificial", "processed", "preservative", "Millet",
    "yolk", "Fruit juice", "juice drink", "Sunny D", "sweetened", "added sugar", "liver", "babyfood", "baby food", "carp", "chip"
]

# Escape the items so that parentheses and other special characters are treated literally
escaped_key_foods = [re.escape(food) for food in key_foods]
escaped_excludable = [re.escape(term) for term in key_excludable]

# Wrap each escaped term in a non-capturing group '(?: ... )' before joining with '|'
include_pattern = '|'.join(f"(?:{term})" for term in escaped_key_foods)
exclude_pattern = '|'.join(f"(?:{term})" for term in escaped_excludable)

# 1) Include mask: meals that have at least one ingredient containing a key food
meal_mask_include = og_recipes.groupby('parent_foodcode')['ingred_desc'] \
    .transform(lambda x: x.str.contains(include_pattern, case=False, na=False).any())

# 2a) Exclude mask for INGREDIENTS: meals that have any ingredient containing an excludable term
meal_mask_exclude_ingredients = og_recipes.groupby('parent_foodcode')['ingred_desc'] \
    .transform(lambda x: x.str.contains(exclude_pattern, case=False, na=False).any())

# 2b) Exclude mask for RECIPE NAMES: meals whose recipe name contains an excludable term
meal_mask_exclude_names = og_recipes.groupby('parent_foodcode')['recipe'] \
    .transform(lambda x: x.str.contains(exclude_pattern, case=False, na=False).any())

# Combine both ingredient and recipe-name exclusions
meal_mask_exclude_total = meal_mask_exclude_ingredients | meal_mask_exclude_names

# 3) Final mask: include meals that pass the "include" filter AND do not match the exclusion filter
final_mask = meal_mask_include & (~meal_mask_exclude_total)

# Filter the original recipes dataset
recipes = og_recipes[final_mask]

In [5]:
# Load nutrition data and merge
nutrition = read_sheets(data_url, sheet="nutrients") \
            .assign(ingred_code=lambda df: df["ingred_code"].apply(format_id))

### Process and Aggregate Nutrient Information

This section makes a copy of the filtered recipes, normalizes ingredient weights to percentages, and merges nutrient information. Then, it scales nutrient values by their ingredient's normalized weight and aggregates the nutrient profile by meal. Finally, the code extracts recipe names for further use.

In [6]:
# Make an explicit copy of recipes before modifying
recipes = recipes.copy()

# Normalize ingredient weights to percentages by dividing by the total weight per meal.
# Using .loc for assignment ensures we're modifying the DataFrame in place.
recipes.loc[:, 'ingred_wt'] = recipes['ingred_wt'] / recipes.groupby('parent_foodcode')['ingred_wt'].transform("sum")

# Merge nutrient information into recipes on the 'ingred_code' column.
# This performs a left join, ensuring all recipes are kept.
df = recipes.merge(nutrition, how="left", on="ingred_code")

# Identify numeric columns (e.g., nutrient values) in the merged DataFrame.
numeric_cols = list(df.select_dtypes(include=["number"]).columns)

# Remove 'ingred_wt' from the list as we don't want to scale it.
numeric_cols.remove("ingred_wt")

# Multiply each nutrient value by the normalized ingredient weight to get weighted nutrient values.
df[numeric_cols] = df[numeric_cols].mul(df["ingred_wt"], axis=0)

# Aggregate nutrient profiles by meal (identified by 'parent_foodcode').
# For nutrient columns, sum their weighted values; for the recipe name, take the first occurrence.
df = df.groupby('parent_foodcode').agg({
    **{col: "sum" for col in numeric_cols},
    "recipe": "first"
})

# Rename the index to 'recipe_id' for clarity.
df.index.name = "recipe_id"

# Extract recipe names for further use.
food_names = df["recipe"]

### Load Prices and Map to Food Names

This code loads pricing data from a Google Sheet, applies ID formatting, and filters prices for a specific year. It then matches the price data with the corresponding recipes based on common food codes, maps the prices to food names, and prepares a transposed version of the nutrient data for further analysis.


In [7]:
# --- After merging and aggregating recipes ---

# Extract recipe names for further use.
food_names = df["recipe"]

# Load prices data from the "prices" sheet, selecting only the necessary columns.
prices = read_sheets(data_url, sheet="prices")[["food_code", "year", "price"]]

# Format the 'food_code' column using the helper function 'format_id'
prices["food_code"] = prices["food_code"].apply(format_id)

# Set a multi-index using 'year' and 'food_code' for easier slicing and alignment.
prices = prices.set_index(["year", "food_code"])

# Filter the prices data to include only records for the year "2017/2018".
prices = prices.xs("2017/2018", level="year")

# Remove rows where the price is missing.
prices = prices.dropna(subset="price")

# Find the intersection of food codes that are common between our aggregated recipes (df) and the prices data.
common_recipes = df.index.intersection(prices.index)

# Subset both the recipes and prices data to only include common recipes.
df = df.loc[common_recipes]
prices = prices.loc[common_recipes]

# --- Now update the identifiers to be food names ---

# Instead of mapping prices.index to food names, update A_all’s columns.
# First, transpose the nutrient data:
A_all = df.T

# Now, update A_all's columns using the food_names Series.
# food_names is indexed by the recipe IDs (food codes), so this maps each code to its corresponding name.
A_all.columns = food_names.loc[A_all.columns]

# (Optional) If you want your prices DataFrame to use food names too, then do:
prices.index = prices.index.map(food_names)

# Now A_all's columns and prices' index should match (both being food names).

In [8]:
# Load RDA data (nutrient constraints)
rda = read_sheets(data_url, sheet="rda")
rda = rda.set_index("Nutrient")
rda_df = rda

# Define the Diet Minimizer Function

This function, `diet_minimizer`, uses linear programming to optimize a daily diet based on nutrient constraints for a given sex and athlete type. It constructs nutrient constraints from recommended dietary allowances (RDA) and upper limits (UL), then minimizes cost while meeting these constraints.

In [9]:
def diet_minimizer(sex, athlete_type, training_type):
    import numpy as np  
    group = f"{sex}_{athlete_type}_{training_type}"
    
    # Create nutrient constraints based on the chosen demographic
    bmin = pd.to_numeric(rda.loc[rda['Constraint Type'].isin(['RDA', 'AI']), group], errors='coerce')
    bmax = pd.to_numeric(rda.loc[rda['Constraint Type'].isin(['UL']), group], errors='coerce')

    # Remove non-finite values
    bmin = bmin[np.isfinite(bmin)]
    bmax = bmax[np.isfinite(bmax)]

    # Filter constraints to only include nutrients available in A_all.
    bmin = bmin[bmin.index.isin(A_all.index)]
    bmax = bmax[bmax.index.isin(A_all.index)]

    # Remove excluded foods from A_all and prices before optimization
    filtered_A_all = A_all.loc[~A_all.index.isin(key_excludable)]
    filtered_prices = prices.loc[~prices.index.isin(key_excludable)]

    # Ensure reindexing aligns with filtered food data
    Amin = filtered_A_all.reindex(bmin.index).dropna(how='all')
    Amax = filtered_A_all.reindex(bmax.index).dropna(how='all')

    # Combine constraints
    b = pd.concat([bmin, -bmax]).dropna()
    A = pd.concat([Amin, -Amax])

    # Convert to NumPy arrays
    b = b.to_numpy().flatten()  
    A = A.to_numpy()
    
    # Prepare cost vector (filtered)
    p = filtered_prices["price"].to_numpy()

    # Tolerance for negligible quantities
    tol = 1e-6

    # Import linear programming solver
    from scipy.optimize import linprog as lp

    # Check that b contains only finite values
    if not np.all(np.isfinite(b)):
        raise ValueError("The constraint vector b contains non-finite values!")

    # Solve the linear programming problem
    result = lp(p, -A, -b, method='highs')
    
    # Check if optimization succeeded
    if not result.success:
        raise ValueError("Optimization failed: " + result.message)

    # Extract optimized diet quantities and total cost
    diet_quantities = pd.Series(result.x, index=filtered_prices.index)
    total_cost = result.fun 
    print(diet_quantities[diet_quantities > 0]*100)

    # Select foods with quantities above tolerance threshold
    selected_foods = diet_quantities[diet_quantities >= tol]

    # Create DataFrame listing foods and their cost per 100g
    df_foods = pd.DataFrame({
        "Food": selected_foods.index,
        "Cost per 100g": [float(filtered_prices.loc[food, 'price']) for food in selected_foods.index]
    })

    print(f"Your daily diet is ${total_cost:.2f}")
    return df_foods

In [13]:
male_endurance = diet_minimizer("Male", "Strength", "Normal")

Mackerel, canned                                                          109.733499
Egg, whole, fried with oil                                                 41.504175
Peanut butter, lower sodium                                               347.908971
Pasta, gluten free                                                        237.522778
Oatmeal, regular or quick, made with milk, no added fat                   247.946342
Beans and rice, with tomatoes                                              43.041167
Orange juice, 100%, with calcium added, canned, bottled or in a carton    141.878749
Banana, raw                                                               326.558287
Potato, boiled, from fresh, peel eaten, made with margarine               145.889606
Greens, NS as to form, cooked                                               7.632775
dtype: float64
Your daily diet is $4.64


In [14]:
def add_custom_ingredients(diet_df, custom_food_servings, supplement_data=None):
    """
    Adds the cheapest meal(s) matching each custom food to the diet.
    If no recipe match is found, it checks a supplement dictionary (if provided)
    for manual data.
    
    Parameters
    ----------
    diet_df : pd.DataFrame
        The current daily diet DataFrame (output of diet_minimizer),
        with columns ["Food", "Cost per 100g"].
    custom_food_servings : dict
        A dictionary mapping custom food strings to their desired serving sizes (in grams).
        Example: {"rice cake": 50, "Apple": 80, "creatine": 5, "BCAAs": 10}
    supplement_data : dict, optional
        A dictionary mapping supplement names (in lower case) to their details.
        Example:
            {
                "creatine": {"price": 20.0, "serving": 5},
                "bcaas": {"price": 30.0, "serving": 10}
            }
        If a custom food is not found in recipes, the function will check here.
    
    Returns
    -------
    pd.DataFrame
        Updated daily diet DataFrame with new rows added for each matched custom food.
        Each new row includes "Food", "Cost per 100g", "Serving (g)", and "Cost Contribution".
    """
    import pandas as pd
    
    # If no supplement data provided, use an empty dictionary.
    if supplement_data is None:
        supplement_data = {}
    
    # Make a copy so we don't modify the original diet_df
    updated_diet = diet_df.copy()
    
    # Save the original cost (here we sum cost per 100g, though you might want a more sophisticated cost sum)
    original_cost = updated_diet["Cost per 100g"].sum()
    
    custom_rows = []
    
    for food, serving_size in custom_food_servings.items():
        # Look for a match in the global 'recipes' DataFrame (assumes recipes is defined)
        mask = recipes['recipe'].str.contains(food, case=False, na=False)
        matching_meals = recipes[mask]['parent_foodcode'].unique()
        
        # If no match is found in recipes, check if it's a supplement.
        if len(matching_meals) == 0:
            food_lower = food.lower()
            if food_lower in supplement_data:
                data = supplement_data[food_lower]
                # Use the manually provided cost from supplement_data.
                price = data["price"]
                adjusted_cost = price * (serving_size / 100.0)
                new_row = pd.DataFrame({
                    "Food": [food],
                    "Cost per 100g": [price],
                    "Serving (g)": [serving_size],
                    "Cost Contribution": [adjusted_cost]
                })
                custom_rows.append(new_row)
            else:
                print(f"No match found for '{food}' in recipes and no supplement data provided.")
            continue
        
        cheapest_price = float('inf')
        cheapest_meal_name = None
        
        # Loop through each matching meal to find the cheapest option.
        for meal_code in matching_meals:
            # Skip if meal_code is not present in the aggregated nutrient DataFrame 'df'
            if meal_code not in df.index:
                continue
            meal_name = df.loc[meal_code, 'recipe']
            # Look up the price using the meal name. If multiple rows match, take the first one.
            meal_price_info = prices.loc[meal_name, 'price']
            if isinstance(meal_price_info, pd.Series):
                meal_price_info = meal_price_info.iloc[0]
            meal_cost = float(meal_price_info)
            if meal_cost < cheapest_price:
                cheapest_price = meal_cost
                cheapest_meal_name = meal_name
        
        if cheapest_meal_name is None:
            print(f"No priced meal found for '{food}' among matches.")
            continue
        
        serving_fraction = serving_size / 100.0
        adjusted_cost = serving_fraction * cheapest_price
        
        new_row = pd.DataFrame({
            "Food": [cheapest_meal_name],
            "Cost per 100g": [cheapest_price],
            "Serving (g)": [serving_size],
            "Cost Contribution": [adjusted_cost]
        })
        custom_rows.append(new_row)
    
    if custom_rows:
        custom_df = pd.concat(custom_rows, ignore_index=True)
        # Append the custom rows to the original diet
        updated_diet = pd.concat([updated_diet, custom_df], ignore_index=True)
    else:
        custom_df = pd.DataFrame()
    
    # Calculate total custom cost as the sum of "Cost Contribution"
    custom_cost = custom_df["Cost Contribution"].sum() if not custom_df.empty else 0.0
    
    # Total cost is the sum of the original diet cost and the custom cost.
    total_cost = original_cost + custom_cost
    
    print(f"Updated daily diet total cost is: ${total_cost:.2f}")
    
    # Optionally, remove extra columns before returning:
    updated_diet = updated_diet.drop(columns=['Serving (g)', 'Cost Contribution'], errors='ignore')
    
    return updated_diet

In [15]:
supplement_data = {
    "creatine": {"price": 14.0, "serving": 5},  # $20 per 100g; 5g per serving
    "bcaa": {"price": 30.0, "serving": 10}       # $30 per 100g; 10g per serving
}

# Example custom foods dictionary: keys can be meal names or supplement names.
custom_food_servings = {
    "Rice cake": 15,
    "Creatine": 5,  # This will be matched in supplement_data if no recipe match is found.
    "protein": 20,
    "spinach": 200
}

updated_diet_df = add_custom_ingredients(male_endurance, custom_food_servings, supplement_data=supplement_data)
updated_diet_df

Updated daily diet total cost is: $4.69


Unnamed: 0,Food,Cost per 100g
0,"Mackerel, canned",0.594039
1,"Egg, whole, fried with oil",0.398344
2,"Peanut butter, lower sodium",0.51391
3,"Pasta, gluten free",0.114248
4,"Oatmeal, regular or quick, made with milk, no ...",0.174142
5,"Beans and rice, with tomatoes",0.178637
6,"Orange juice, 100%, with calcium added, canned...",0.181187
7,"Banana, raw",0.189998
8,"Potato, boiled, from fresh, peel eaten, made w...",0.236255
9,"Greens, NS as to form, cooked",0.368179


In [16]:
def get_dri_from_rda_df(sex, athlete_type, training_type):
    """
    Returns a pandas.Series of Dietary Reference Intakes (DRIs)/Recommended Daily Allowances (RDAs)
    for the given group, using data from the rda_df DataFrame.
    
    Parameters
    ----------
    sex : str
        "Male" or "Female".
    athlete_type : str
        For example, "Endurance", "Strength", or "Bodybuilding".
    training_type : str, optional
        For example, "Normal" or "Intense". Defaults to "Normal".
        
    Returns
    -------
    pd.Series
        Series indexed by nutrient names containing the recommended intake values.
    
    Raises
    ------
    ValueError
        If no matching column is found in rda_df.
    """
    # First try full column name with training type.
    group_col = f"{sex}_{athlete_type}_{training_type}"
    if group_col not in rda_df.columns:
        # Fallback: try without training type.
        group_col = f"{sex}_{athlete_type}"
        if group_col not in rda_df.columns:
            raise ValueError(f"Column for group '{sex}_{athlete_type}_{training_type}' not found in rda_df.")
    
    # Extract the column.
    dri_series = rda_df[group_col].copy()
    
    # Convert values to numeric: remove commas if present, and coerce errors.
    dri_series = dri_series.apply(lambda x: float(str(x).replace(',', '')) if isinstance(x, str) else x)
    dri_series = pd.to_numeric(dri_series, errors='coerce')
    
    return dri_series

# Example usage:
# Assuming rda_df is already loaded and looks similar to the provided table,
# this will return the DRI values for a male endurance athlete with normal training.
dri_male_endurance = get_dri_from_rda_df("Male", "Endurance", "Intense")
dri_male_endurance

Nutrient
Energy            4700.0
Protein            140.0
Carbohydrate       810.0
Dietary Fiber       34.0
Linoleic Acid       17.0
Linolenic Acid       1.6
Calcium           1000.0
Iron                10.0
Magnesium          460.0
Phosphorus         700.0
Potassium         3750.0
Sodium            3000.0
Zinc                11.0
Copper               0.9
Selenium            55.0
Vitamin A          900.0
Vitamin E           15.0
Vitamin D           15.0
Vitamin C          110.0
Thiamin              1.2
Riboflavin           1.3
Niacin              16.0
Vitamin B6           1.9
Vitamin B12          2.4
Choline            550.0
Vitamin K          120.0
Folate             400.0
Name: Male_Endurance_Intense, dtype: float64

In [17]:
def get_personalized_dri(sex, athlete_type, training_type, age):
    """
    Returns a personalized pandas.Series of Dietary Reference Intakes (DRIs)/Recommended Daily Allowances (RDAs)
    for an individual based on sex, athlete type, training type, and age.
    
    The function uses the base DRIs from the rda_df DataFrame (via get_dri_from_rda_df) and then applies an age-based
    adjustment to key nutrients (Energy, Protein, Carbohydrate). For ages:
      - Under 18: The requirements are scaled by (age/18).
      - Between 18 and 60: The base values are used (metabolic rate is assumed consistent).
      - Above 60: The key nutrient values are decreased by 0.7% for each year beyond 60.
    
    Parameters
    ----------
    sex : str
        "Male" or "Female".
    athlete_type : str
        For example, "Endurance", "Strength", or "Bodybuilding".
    training_type : str
        For example, "Normal" or "Intense".
    age : int or float
        Age of the individual.
        
    Returns
    -------
    pd.Series
        Series indexed by nutrient names containing the personalized DRI values.
    """
    # Retrieve the base DRIs from rda_df using your provided function.
    base_dri = get_dri_from_rda_df(sex, athlete_type, training_type)
    personalized = base_dri.copy()
    
    # Define the key macronutrients that we want to adjust.
    key_nutrients = ["Energy", "Protein", "Carbohydrate"]
    
    # Apply an age adjustment:
    if age < 18:
        # For children, scale linearly by age/18.
        factor = age / 18.0
    elif age <= 60:
        # For adults aged 18-60, assume no change in metabolic rate.
        factor = 1.0
    else:
        # For adults older than 60, decrease key nutrients by 0.7% per year above 60.
        factor = 1 - 0.007 * (age - 60)
    
    # Adjust only the key macronutrients.
    for nutrient in key_nutrients:
        # Note: This assumes that the index names in the base_dri match exactly.
        if nutrient in personalized.index and pd.notnull(personalized[nutrient]):
            personalized[nutrient] *= factor
    
    return personalized

# Example usage:
# Assuming rda_df is loaded and contains columns like "Male_Endurance_Normal" or "Male_Endurance_Intense",
# this will return personalized DRIs for a 25-year-old Male Endurance athlete with intense training,
# and compare them to a 65-year-old.
dri_10 = get_personalized_dri("Male", "Endurance", "Normal", age=20)
dri_65 = get_personalized_dri("Male", "Endurance", "Intense", age=65)

print("\nPersonalized DRIs for a 65-year-old Male Endurance athlete (Intense training):")
dri_65


Personalized DRIs for a 65-year-old Male Endurance athlete (Intense training):


Nutrient
Energy            4535.50
Protein            135.10
Carbohydrate       781.65
Dietary Fiber       34.00
Linoleic Acid       17.00
Linolenic Acid       1.60
Calcium           1000.00
Iron                10.00
Magnesium          460.00
Phosphorus         700.00
Potassium         3750.00
Sodium            3000.00
Zinc                11.00
Copper               0.90
Selenium            55.00
Vitamin A          900.00
Vitamin E           15.00
Vitamin D           15.00
Vitamin C          110.00
Thiamin              1.20
Riboflavin           1.30
Niacin              16.00
Vitamin B6           1.90
Vitamin B12          2.40
Choline            550.00
Vitamin K          120.00
Folate             400.00
Name: Male_Endurance_Intense, dtype: float64

In [18]:
def diet_minimizer_personalized(sex, athlete_type, training_type, age):
    """
    Calculates the minimum-cost daily diet for an individual with personalized nutrient requirements.
    It obtains personalized DRIs using get_personalized_dri, then solves a linear programming problem
    to determine the combination of foods (from A_all and prices) that meets those nutrient targets.
    
    Parameters:
      sex : str
          "Male" or "Female"
      athlete_type : str
          For example, "Endurance", "Strength", or "Bodybuilding"
      training_type : str
          For example, "Normal" or "Intense"
      age : int or float
          Age of the individual
      
    Assumes:
      - get_personalized_dri(sex, athlete_type, training_type, age) is defined.
      - A_all is a DataFrame with nutrients as its index and foods as its columns,
        where values represent nutrient content per unit (e.g., per 100g).
      - prices is a DataFrame with food names as its index and a numeric "price" column.
      - key_excludable is a list of food names to exclude.
      
    Returns:
      pd.DataFrame: A DataFrame listing the selected foods (with non-negligible quantities)
                    and their cost per 100g, and prints the total daily cost.
    """
    import numpy as np
    from scipy.optimize import linprog

    
    # Step 1: Get personalized nutrient targets.
    personalized_dri = get_personalized_dri(sex, athlete_type, training_type, age)
    
    # Step 2: Restrict the nutrient targets to only those nutrients available in A_all.
    nutrients = [nutrient for nutrient in personalized_dri.index if nutrient in A_all.index]
    b = np.array([personalized_dri[nutrient] for nutrient in nutrients], dtype=float)
    
    # Step 3: Get the nutrient composition matrix for these nutrients.
    filtered_A_all = A_all.loc[nutrients]
    
    # Step 4: Exclude foods that are in key_excludable from A_all and prices.
    filtered_A_all = filtered_A_all.loc[:, ~filtered_A_all.columns.isin(key_excludable)]
    filtered_prices = prices.loc[~prices.index.isin(key_excludable)]
    
    # Step 5: Align the food lists.
    food_list = filtered_A_all.columns.intersection(filtered_prices.index)
    filtered_A_all = filtered_A_all[food_list]
    filtered_prices = filtered_prices.loc[food_list]
    
    # Step 6: Convert nutrient matrix to NumPy array.
    A = filtered_A_all.to_numpy(dtype=float)  # Shape: (m, n)
    
    # Step 7: Prepare the cost vector and ensure it is 1-D.
    p = np.array(filtered_prices["price"].values, dtype=float).squeeze()
    if p.ndim != 1:
        p = p.ravel()
    
    # Step 8: Set bounds (quantities >= 0).
    bounds = [(0, None)] * p.shape[0]
    
    # Step 9: Set up the LP constraint: A x >= b  -->  -A x <= -b.
    res = linprog(c=p, A_ub=-A, b_ub=-b, bounds=bounds, method='highs')
    
    if not res.success:
        raise ValueError("Optimization failed: " + res.message)
    
    # Step 10: Retrieve optimized quantities and total cost.
    diet_quantities = pd.Series(res.x, index=filtered_prices.index)
    total_cost = res.fun
    print(diet_quantities[diet_quantities > 0]*100)
    
    # Filter out negligible quantities.
    tol = 1e-6
    selected_foods = diet_quantities[diet_quantities >= tol]
    
    df_foods = pd.DataFrame({
        "Food": selected_foods.index,
        "Cost per 100g": [float(filtered_prices.loc[food, "price"]) for food in selected_foods.index]
    })
    
    print(f"\nYour daily personalized diet is ${total_cost:.2f}")
    return df_foods


# Assuming rda_df, A_all, prices, and key_excludable are defined in your environment,
# this will compute the minimum-cost diet for a 25-year-old Male Endurance athlete with Normal training.
result = diet_minimizer_personalized("Male", "Endurance", "Normal", age=25)
result

Mackerel, canned                                                            57.369349
Egg, whole, fried no added fat                                              84.614106
Split peas, from dried, fat added                                          607.502051
Pasta, gluten free                                                        1035.098693
Cereal, frosted oat cereal with marshmallows                               194.690845
Orange juice, 100%,  freshly squeezed                                       65.862177
Orange juice, 100%, with calcium added, canned, bottled or in a carton      28.695860
Potato, boiled, from fresh, peel eaten, made with margarine                103.608875
Greens, NS as to form, cooked                                                0.985451
dtype: float64

Your daily personalized diet is $4.19


Unnamed: 0,Food,Cost per 100g
0,"Mackerel, canned",0.594039
1,"Egg, whole, fried no added fat",0.396952
2,"Split peas, from dried, fat added",0.140336
3,"Pasta, gluten free",0.114248
4,"Cereal, frosted oat cereal with marshmallows",0.54413
5,"Orange juice, 100%, freshly squeezed",0.180944
6,"Orange juice, 100%, with calcium added, canned...",0.181187
7,"Potato, boiled, from fresh, peel eaten, made w...",0.236255
8,"Greens, NS as to form, cooked",0.368179
