In [13]:
import pandas as pd
import os
print(os.getcwd())
from pathlib import Path
data_path = "/Users/anastasiiadobson/Library/CloudStorage/Dropbox/WORK/BOOTCAMP/SMOOTHIES/data/processed/"
nutrients_df = pd.read_csv(data_path + "nutrient_matrix_ai.csv")
layers_df = pd.read_csv(data_path + "ingredient_layers.csv")
goal_matrix = pd.read_csv(data_path + "goal_matrix.csv")


/Users/anastasiiadobson/Library/CloudStorage/Dropbox/WORK/BOOTCAMP/SMOOTHIES/smoothie_tool


In [14]:
# === Step 1: Normalize the nutrient values ===
nutrient_score_map = {
    'No': 0,
    'Low': 1,
    'Moderate': 2,
    'High': 3,
    'Yes': 2  # Assuming 'Yes' means Moderate value
}
nutrients_scored = nutrients_df.copy()
for col in nutrients_df.columns[1:]:
    nutrients_scored[col] = nutrients_df[col].map(nutrient_score_map)

In [15]:
# === Step 2: Prompt user for goal ===
available_goals = goal_matrix.columns[1:].tolist()

print("Available Goals:")
for i, goal in enumerate(available_goals):
    print(f"{i + 1}. {goal}")

goal_index = int(input("Choose a goal by entering the number: ")) - 1
selected_goal = available_goals[goal_index]
print(f"\nSelected goal: {selected_goal}")

Available Goals:
1. Digestive Health
2. Energy
3. Longevity
4. Muscle
5. WeightManagement

Selected goal: Digestive Health


In [None]:
# === Step 3: Get relevant nutrients for the selected goal ===
goal_weights = goal_matrix.set_index("Ingredient")[selected_goal]
selected_nutrients = goal_weights[goal_weights > 0].index.tolist()


['Complex Carbohydrates', 'B-Vitamins', 'Magnesium', 'Adaptogen', 'Fiber', 'Low Glycemic Load', 'Protein', 'Fiber (Soluble + Insoluble)', 'Prebiotics', 'Probiotics', 'Antioxidants', 'Polyphenols', 'Omega-3', 'Anti-inflammatory agents']


In [21]:
# === Step 4: Score each ingredient for the goal ===
nutrients_scored["Score"] = nutrients_scored[selected_nutrients].sum(axis=1)


In [26]:
nutrients_scored

Unnamed: 0,Ingredient,Complex Carbohydrates,Iron,B-Vitamins,Magnesium,Adaptogen,Fiber,Low Glycemic Load,Protein,Fiber (Soluble + Insoluble),Prebiotics,Probiotics,BCAAs,Creatine,Antioxidants,Polyphenols,Omega-3,Anti-inflammatory agents,Sodium,Score
0,coconut water,1.0,1,1.0,2,0,1.0,2,1.0,1.0,0,0,0,0,2.0,2,0.0,0,,13.0
1,orange juice,1.0,1,2.0,1,0,1.0,2,1.0,1.0,0,0,0,0,2.0,2,0.0,2,,15.0
2,almond milk,1.0,1,0.0,2,0,1.0,2,1.0,1.0,0,0,0,0,2.0,2,0.0,0,,12.0
3,oat milk,3.0,1,2.0,2,0,3.0,2,1.0,2.0,2,0,0,0,2.0,2,0.0,0,,21.0
4,green tea,1.0,1,1.0,1,0,1.0,2,1.0,1.0,0,0,0,0,3.0,3,0.0,3,,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,chia & hemp,1.0,2,1.0,2,0,3.0,2,3.0,3.0,2,0,0,0,2.0,2,3.0,2,,26.0
168,turmeric + pepper,1.0,1,1.0,1,0,1.0,2,0.0,1.0,0,0,0,0,3.0,3,0.0,2,,15.0
169,cocoa nibs,1.0,2,1.0,2,0,3.0,2,2.0,3.0,0,0,0,0,3.0,3,0.0,2,,22.0
170,clove,1.0,2,1.0,1,0,1.0,2,1.0,1.0,0,0,0,0,2.0,2,0.0,2,,14.0


In [25]:
layers_df

Unnamed: 0,Layer,Ingredient
0,Liquid,Coconut water
1,Liquid,Orange juice
2,Liquid,Almond milk
3,Liquid,Oat milk
4,Liquid,Green tea
...,...,...
255,Elevate,Manuka honey
256,Elevate,Cinnamon
257,Elevate,Ashwagandha
258,Elevate,Schisandra


In [48]:
from rapidfuzz import fuzz, process
import pandas as pd
import unicodedata

# Step 0: Work on a copy
layers_df_cleaned = layers_df.copy()

# Step 1: Normalize, strip, lowercase
layers_df_cleaned["Ingredient"] = layers_df_cleaned["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))  # remove accents
    .encode("ascii", "ignore")                       # drop non-ascii
    .decode("utf-8")                                 # decode to string
    .strip()                                         # trim whitespace
    .lower()                                         # lowercase
)

# Optional: keep a display column
layers_df_cleaned["Ingredient_clean"] = layers_df_cleaned["Ingredient"].str.title()

# Step 2: Remove common modifiers
def simplify_ingredient(text):
    modifiers = ['unsweetened', 'organic', 'raw', 'fresh', 'natural', 'pure', 'cold brew']
    words = text.split()
    return ' '.join([w for w in words if w not in modifiers])

layers_df_cleaned["Ingredient_simplified"] = layers_df_cleaned["Ingredient"].apply(simplify_ingredient)

# Step 3: Deduplicate similar names
unique_ingredients = sorted(layers_df_cleaned["Ingredient_simplified"].unique())
ingredient_map = {}
processed = set()

for i, ingredient in enumerate(unique_ingredients):
    if ingredient in processed:
        continue
    matches = process.extract(ingredient, unique_ingredients[i+1:], scorer=fuzz.token_sort_ratio)
    for match, score, _ in matches:
        if score > 80:
            ingredient_map[match] = ingredient
            processed.add(match)

# Step 4: Map back to deduplicated names
layers_df_cleaned["Ingredient"] = layers_df_cleaned["Ingredient_simplified"].apply(
    lambda x: ingredient_map.get(x, x)
)

# Step 5: Display result
print(f"Unique ingredients after deduping: {sorted(layers_df_cleaned['Ingredient'].unique())}")

Unique ingredients after deduping: ['acacia fiber', 'acai', 'almond butter', 'almond milk', 'almond skins', 'almond yogurt', 'aloe vera juice', 'apple', 'apple cider vinegar', 'apple juice', 'apple pectin', 'apple skin', 'artichoke fiber', 'artichoke hearts', 'ashwagandha', 'avocado', 'bamboo fiber', 'banana', 'baobab', 'barley', 'barley flakes', 'beet', 'beetroot', 'berries', 'blueberries', 'bok choy', 'bone broth', 'bone broth protein', 'broccoli', 'buckwheat flakes', 'cacao nibs', 'cacao powder', 'cantaloupe', 'cardamom', 'carrot', 'carrot juice', 'casein', 'cashew milk', 'cauliflower', 'cayenne', 'celery', 'celery juice', 'chamomile tea', 'chia & hemp', 'chia + almond milk', 'chia protein', 'chia seeds', 'chicory root', 'chocolate almond milk', 'cinnamon', 'clove', 'cocoa nibs', 'coconut milk', 'coconut water', 'cold brew tea', 'cold coffee', 'collagen', 'collagen milk', 'collagen peptides', 'cooked beetroot', 'cooked oats', 'cottage cheese', 'creatine', 'cucumber', 'cucumber juice

In [51]:
layers_df_cleaned = layers_df_cleaned[["Layer", "Ingredient"]]
layers_df_cleaned

Unnamed: 0,Layer,Ingredient
0,Liquid,coconut water
1,Liquid,orange juice
2,Liquid,almond milk
3,Liquid,oat milk
4,Liquid,green tea
...,...,...
255,Elevate,manuka honey
256,Elevate,cinnamon
257,Elevate,ashwagandha
258,Elevate,schisandra


In [52]:
# === Step 5: Join layer info ===
full_df = pd.merge(nutrients_scored, layers_df_cleaned, on="Ingredient", how="inner")


In [79]:
full_df
full_df.to_csv("full_df.csv", index=False)

In [75]:
nutrient_matrix_amounts = pd.read_csv(data_path + "nutrient_matrix_amounts.csv")
print(nutrient_matrix_amounts[["Ingredient", "Protein", "Fiber"]])
nutrient_matrix_amounts_cleaned = nutrient_matrix_amounts.replace("Unknown", pd.NA)

# Count non-null values (i.e. known values) per column
known_counts = nutrient_matrix_amounts_cleaned[["Protein", "Fiber"]].notna().sum()

print("Number of known values per column:")
print(known_counts)
def clean_grams(x):
    try:
        if isinstance(x, str) and 'g' in x:
            return float(x.replace('g', '').strip())
        return float(x)
    except:
        return pd.NA

# Copy & clean
amounts_df = nutrient_matrix_amounts.copy()
amounts_df["Protein_clean"] = amounts_df["Protein"].apply(clean_grams)
amounts_df["Fiber_clean"] = amounts_df["Fiber"].apply(clean_grams)

# Normalize ingredient names (same as for other DataFrames)
import unicodedata

amounts_df["Ingredient"] = amounts_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)
full_df["Ingredient"] = full_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)

print(full_df)

            Ingredient  Protein    Fiber
0        coconut water    0.72g     0.7g
1         orange juice      0.7      0.2
2          almond milk     0.5g     0.5g
3             oat milk  Unknown  Unknown
4            green tea        0        0
..                 ...      ...      ...
164        chia & hemp  Unknown  Unknown
165  turmeric + pepper  Unknown  Unknown
166         cocoa nibs  Unknown  Unknown
167              clove      5.0      5.0
168      reishi powder  Unknown  Unknown

[169 rows x 3 columns]
Number of known values per column:
Protein    118
Fiber      113
dtype: int64
            Ingredient  Complex Carbohydrates  Iron  B-Vitamins  Magnesium  \
0        coconut water                    1.0     1         1.0          2   
1        coconut water                    1.0     1         1.0          2   
2        coconut water                    1.0     1         1.0          2   
3         orange juice                    1.0     1         2.0          1   
4          almon

In [77]:
# === Step 6: Pick best ingredient per layer while respecting fiber cap ===
fiber_cap = 15  # grams — you can adjust this as needed
total_fiber = 0
selected_ingredients = []

# We'll store smoothie components here
best_per_layer = []

# Sort full_df once
sorted_df = full_df.sort_values(by="Score", ascending=False)

# Go layer by layer
for layer in full_df["Layer"].unique():
    layer_df = sorted_df[sorted_df["Layer"] == layer]
    
    for _, row in layer_df.iterrows():
        fiber_val = row.get("Fiber", 0)
        if pd.isna(fiber_val):
            fiber_val = 0
        
        # Only accept if we stay under the cap
        if total_fiber + fiber_val <= fiber_cap:
            total_fiber += fiber_val
            best_per_layer.append({
                "Layer": layer,
                "Ingredient": row["Ingredient"],
                "Score": row["Score"],
                "Fiber": fiber_val
            })
            break  # Move to next layer

# Convert to DataFrame
best_per_layer_df = pd.DataFrame(best_per_layer)

# === Step 7: Show the smoothie ===
print("\n🥤 Your Optimized Smoothie for Goal:", selected_goal)
for _, row in best_per_layer_df.iterrows():
    print(f"- {row['Layer']}: {row['Ingredient']} (Score: {row['Score']}, Fiber: {row['Fiber']}g)")

print(f"\nTotal Fiber: {round(total_fiber, 2)}g / {fiber_cap}g cap")


🥤 Your Optimized Smoothie for Goal: Digestive Health
- Liquid: spinach water (Score: 26.0, Fiber: 3.0g)
- Body: quinoa (Score: 27.0, Fiber: 3.0g)
- Fiber: chia seeds (Score: 28.0, Fiber: 3.0g)
- Protein: spirulina (Score: 29.0, Fiber: 3.0g)
- Elevate: schisandra (Score: 26.0, Fiber: 2.0g)

Total Fiber: 14.0g / 15g cap


In [78]:
total_fiber = 0
total_protein = 0
best_per_layer = []
body_weight_kg = 60  # or use st.slider / st.number_input in Streamlit

# Choose multiplier based on user goal
goal_multipliers = {
    "Digestive Health": 1.0,
    "Energy": 1.2,
    "Longevity": 1.2,
    "Muscle": 1.8,
    "WeightManagement": 1.5
}

protein_min = round(goal_multipliers[selected_goal] * body_weight_kg, 1)
sorted_df = full_df.sort_values(by="Score", ascending=False)

# To track fallback if protein not met
remaining_layers = list(full_df["Layer"].unique())

for layer in remaining_layers:
    layer_df = sorted_df[sorted_df["Layer"] == layer]
    
    selected = None
    for _, row in layer_df.iterrows():
        fiber_val = row.get("Fiber", 0) or 0
        protein_val = row.get("Protein", 0) or 0
        
        # Check if adding this keeps us under fiber cap
        if total_fiber + fiber_val <= fiber_cap:
            selected = {
                "Layer": layer,
                "Ingredient": row["Ingredient"],
                "Score": row["Score"],
                "Fiber": fiber_val,
                "Protein": protein_val
            }
            total_fiber += fiber_val
            total_protein += protein_val
            break  # Done with this layer

    if selected:
        best_per_layer.append(selected)
    else:
        print(f"⚠️ No valid ingredient for layer '{layer}' under fiber cap.")

# Final smoothie DataFrame
best_per_layer_df = pd.DataFrame(best_per_layer)

# === Output ===
print("\n🥤 Your Optimized Smoothie for Goal:", selected_goal)
for _, row in best_per_layer_df.iterrows():
    print(f"- {row['Layer']}: {row['Ingredient']} (Score: {row['Score']}, Fiber: {row['Fiber']}g, Protein: {row['Protein']}g)")

print(f"\nTotal Fiber: {round(total_fiber, 2)}g / {fiber_cap}g cap")
print(f"Total Protein: {round(total_protein, 2)}g")

# Check protein target
if total_protein < protein_min:
    print(f"⚠️ Protein goal not met. Consider adding a protein booster (target: {protein_min}g).")


🥤 Your Optimized Smoothie for Goal: Digestive Health
- Liquid: spinach water (Score: 26.0, Fiber: 3.0g, Protein: 1.0g)
- Body: quinoa (Score: 27.0, Fiber: 3.0g, Protein: 3.0g)
- Fiber: chia seeds (Score: 28.0, Fiber: 3.0g, Protein: 2.0g)
- Protein: spirulina (Score: 29.0, Fiber: 3.0g, Protein: 3.0g)
- Elevate: schisandra (Score: 26.0, Fiber: 2.0g, Protein: 1.0g)

Total Fiber: 14.0g / 15g cap
Total Protein: 10.0g
⚠️ Protein goal not met. Consider adding a protein booster (target: 60.0g).


In [74]:
nutrient_matrix_amounts = pd.read_csv(data_path + "nutrient_matrix_amounts.csv")
print(nutrient_matrix_amounts[["Ingredient", "Protein", "Fiber"]])
nutrient_matrix_amounts_cleaned = nutrient_matrix_amounts.replace("Unknown", pd.NA)

# Count non-null values (i.e. known values) per column
known_counts = nutrient_matrix_amounts_cleaned[["Protein", "Fiber"]].notna().sum()

print("Number of known values per column:")
print(known_counts)
def clean_grams(x):
    try:
        if isinstance(x, str) and 'g' in x:
            return float(x.replace('g', '').strip())
        return float(x)
    except:
        return pd.NA

# Copy & clean
amounts_df = nutrient_matrix_amounts.copy()
amounts_df["Protein_clean"] = amounts_df["Protein"].apply(clean_grams)
amounts_df["Fiber_clean"] = amounts_df["Fiber"].apply(clean_grams)

# Normalize ingredient names (same as for other DataFrames)
import unicodedata

amounts_df["Ingredient"] = amounts_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)
full_df["Ingredient"] = full_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)

print(full_df)

            Ingredient  Protein    Fiber
0        coconut water    0.72g     0.7g
1         orange juice      0.7      0.2
2          almond milk     0.5g     0.5g
3             oat milk  Unknown  Unknown
4            green tea        0        0
..                 ...      ...      ...
164        chia & hemp  Unknown  Unknown
165  turmeric + pepper  Unknown  Unknown
166         cocoa nibs  Unknown  Unknown
167              clove      5.0      5.0
168      reishi powder  Unknown  Unknown

[169 rows x 3 columns]
Number of known values per column:
Protein    118
Fiber      113
dtype: int64
            Ingredient  Complex Carbohydrates  Iron  B-Vitamins  Magnesium  \
0        coconut water                    1.0     1         1.0          2   
1        coconut water                    1.0     1         1.0          2   
2        coconut water                    1.0     1         1.0          2   
3         orange juice                    1.0     1         2.0          1   
4          almon

In [65]:
nutrient_matrix_amounts_cleaned = nutrient_matrix_amounts.replace("Unknown", pd.NA)

# Count non-null values (i.e. known values) per column
known_counts = nutrient_matrix_amounts_cleaned[["Protein", "Fiber"]].notna().sum()

print("Number of known values per column:")
print(known_counts)

Number of known values per column:
Protein    118
Fiber      113
dtype: int64


In [66]:
def clean_grams(x):
    try:
        if isinstance(x, str) and 'g' in x:
            return float(x.replace('g', '').strip())
        return float(x)
    except:
        return pd.NA

# Copy & clean
amounts_df = nutrient_matrix_amounts.copy()
amounts_df["Protein_clean"] = amounts_df["Protein"].apply(clean_grams)
amounts_df["Fiber_clean"] = amounts_df["Fiber"].apply(clean_grams)

# Normalize ingredient names (same as for other DataFrames)
import unicodedata

amounts_df["Ingredient"] = amounts_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)

In [67]:
full_df["Ingredient"] = full_df["Ingredient"].apply(
    lambda x: unicodedata.normalize("NFKD", str(x))
    .encode("ascii", "ignore")
    .decode("utf-8")
    .strip()
    .lower()
)

In [61]:
full_df = full_df.merge(
    amounts_df[["Ingredient", "Protein_clean", "Fiber_clean"]],
    on="Ingredient",
    how="left"
)

In [68]:
full_df

Unnamed: 0,Ingredient,Complex Carbohydrates,Iron,B-Vitamins,Magnesium,Adaptogen,Fiber,Low Glycemic Load,Protein,Fiber (Soluble + Insoluble),...,Creatine,Antioxidants,Polyphenols,Omega-3,Anti-inflammatory agents,Sodium,Score,Layer,Protein_clean,Fiber_clean
0,coconut water,1.0,1,1.0,2,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,0,,13.0,Liquid,0.72,0.7
1,coconut water,1.0,1,1.0,2,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,0,,13.0,Liquid,0.72,0.7
2,coconut water,1.0,1,1.0,2,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,0,,13.0,Liquid,0.72,0.7
3,orange juice,1.0,1,2.0,1,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,2,,15.0,Liquid,0.7,0.2
4,almond milk,1.0,1,0.0,2,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,0,,12.0,Liquid,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253,chia & hemp,1.0,2,1.0,2,0,3.0,2,3.0,3.0,...,0,2.0,2,3.0,2,,26.0,Protein,,
254,turmeric + pepper,1.0,1,1.0,1,0,1.0,2,0.0,1.0,...,0,3.0,3,0.0,2,,15.0,Elevate,,
255,cocoa nibs,1.0,2,1.0,2,0,3.0,2,2.0,3.0,...,0,3.0,3,0.0,2,,22.0,Elevate,,
256,clove,1.0,2,1.0,1,0,1.0,2,1.0,1.0,...,0,2.0,2,0.0,2,,14.0,Elevate,5.0,5.0


In [8]:
# Keep only the highest scoring row per ingredient
full_df = (
    full_df.sort_values(by="Score", ascending=False)
           .drop_duplicates(subset=["Ingredient"], keep="first")
           .reset_index(drop=True)
)

# Print after cleaning
print("✅ full_df AFTER deduplication:", full_df)
output_path = "full_df.csv"
full_df.to_csv(output_path, index=False)



✅ full_df AFTER deduplication:            Ingredient  Complex Carbohydrates  Iron  B-Vitamins  Magnesium  \
0           spirulina                    1.0     3         3.0          3   
1          chia seeds                    1.0     1         2.0          2   
2             lentils                    3.0     2         3.0          2   
3         red lentils                    2.0     3         3.0          3   
4              quinoa                    3.0     2         2.0          3   
..                ...                    ...   ...         ...        ...   
165          creatine                    0.0     0         0.0          0   
166   vanilla extract                    0.0     0         0.0          0   
167         ice cubes                    0.0     0         0.0          0   
168             water                    0.0     0         0.0          0   
169  water + creatine                    0.0     0         0.0          0   

     Adaptogen  Fiber  Low Glycemic Load  Pr

In [26]:
"""GENERATE SMOOTHIES BASED ON THE FLAVOUR PROFILE AND MAXIMUM SCORE PER GOAL  """
import pandas as pd
import random

# === Load your cleaned dataframe ===
full_df = pd.read_csv("/Users/anastasiiadobson/Library/CloudStorage/Dropbox/WORK/BOOTCAMP/SMOOTHIES/data/processed/full_df_with_flavors.csv")
print("✅ Loaded full_df:", full_df.shape)

# === Parameters ===
goal_multipliers = {
    "Digestive Health": 1.0,
    "Energy": 1.2,
    "Longevity": 1.2,
    "Muscle": 1.8,
    "WeightManagement": 1.5
}

top_n = 20
min_shared_flavor_count = 3
n_random_combos = 50000  # per goal

# === Initialize result collector ===
all_smoothies = []

# === Loop over each goal ===
for selected_goal, multiplier in goal_multipliers.items():
    print(f"\n🔄 Processing goal: {selected_goal}")

    # Step 1: Get top N per layer
    layer_options = {}
    for layer in full_df["Layer"].unique():
        df_layer = (
            full_df[full_df["Layer"] == layer]
            .sort_values(by="Score", ascending=False)
            .head(top_n)
            [["Ingredient", "Score", "Protein", "Fiber", "FlavorProfile"]]
        )
        layer_options[layer] = df_layer
        print(f"📌 Top {top_n} ingredients loaded for layer '{layer}'")

    # Step 2: Random sampling
    print(f"🎲 Sampling {n_random_combos} random combinations...")
    layers = list(layer_options.values())
    random_combos = []
    for i in range(n_random_combos):
        combo = [random.choice(layer.to_dict("records")) for layer in layers]
        random_combos.append(combo)
        if i % 5000 == 0:
            print(f"⏳ Sampled combo #{i+1}/{n_random_combos}")

    # Step 3: Score and filter
    smoothies = []
    for combo in random_combos:
        ingredients = [item["Ingredient"] for item in combo]
        if len(set(ingredients)) < len(ingredients):
            continue  # skip duplicates

        flavors = [item.get("FlavorProfile", "unknown") for item in combo]
        flavor_counts = pd.Series(flavors).value_counts()
        most_common_flavor_count = flavor_counts.iloc[0] if not flavor_counts.empty else 0

        if most_common_flavor_count < min_shared_flavor_count:
            continue

        smoothies.append({
            "Ingredients": ingredients,
            "Used": set(ingredients),
            "Total_Score": sum(item["Score"] for item in combo) * multiplier,
            "Total_Protein": sum(item["Protein"] for item in combo),
            "Total_Fiber": sum(item["Fiber"] for item in combo),
            "FlavorMode": flavor_counts.index[0],
            "Goal": selected_goal
        })

    print(f"✅ Valid smoothies after filtering: {len(smoothies)}")

    # Step 4: Select 10 best non-overlapping
    selected_smoothies = []
    used_ingredients = set()
    for smoothie in sorted(smoothies, key=lambda x: x["Total_Score"], reverse=True):
        if smoothie["Used"].isdisjoint(used_ingredients):
            selected_smoothies.append(smoothie)
            used_ingredients.update(smoothie["Used"])
        if len(selected_smoothies) == 10:
            break

    print(f"🥤 Selected top 10 smoothies for goal '{selected_goal}'")

    # Step 5: Store
    for i, s in enumerate(selected_smoothies, 1):
        all_smoothies.append({
            "Goal": s["Goal"],
            "SmoothieID": f"{s['Goal'].replace(' ', '')}_{i}",
            "Ingredients": ", ".join(s["Ingredients"]),
            "Flavor": s["FlavorMode"],
            "Total_Score": round(s["Total_Score"], 2),
            "Total_Protein": round(s["Total_Protein"], 1),
            "Total_Fiber": round(s["Total_Fiber"], 1)
        })

# === Save final output ===
df_all = pd.DataFrame(all_smoothies)
df_all.to_csv("smoothies_all_goals.csv", index=False)
print("\n✅ Done! All smoothies saved to 'smoothies_all_goals.csv'")

✅ Loaded full_df: (170, 24)

🔄 Processing goal: Digestive Health
📌 Top 20 ingredients loaded for layer 'Protein'
📌 Top 20 ingredients loaded for layer 'Fiber'
📌 Top 20 ingredients loaded for layer 'Body'
📌 Top 20 ingredients loaded for layer 'Liquid'
📌 Top 20 ingredients loaded for layer 'Elevate'
🎲 Sampling 50000 random combinations...
⏳ Sampled combo #1/50000


⏳ Sampled combo #5001/50000
⏳ Sampled combo #10001/50000
⏳ Sampled combo #15001/50000
⏳ Sampled combo #20001/50000
⏳ Sampled combo #25001/50000
⏳ Sampled combo #30001/50000
⏳ Sampled combo #35001/50000
⏳ Sampled combo #40001/50000
⏳ Sampled combo #45001/50000
✅ Valid smoothies after filtering: 7287
🥤 Selected top 10 smoothies for goal 'Digestive Health'

🔄 Processing goal: Energy
📌 Top 20 ingredients loaded for layer 'Protein'
📌 Top 20 ingredients loaded for layer 'Fiber'
📌 Top 20 ingredients loaded for layer 'Body'
📌 Top 20 ingredients loaded for layer 'Liquid'
📌 Top 20 ingredients loaded for layer 'Elevate'
🎲 Sampling 50000 random combinations...
⏳ Sampled combo #1/50000
⏳ Sampled combo #5001/50000
⏳ Sampled combo #10001/50000
⏳ Sampled combo #15001/50000
⏳ Sampled combo #20001/50000
⏳ Sampled combo #25001/50000
⏳ Sampled combo #30001/50000
⏳ Sampled combo #35001/50000
⏳ Sampled combo #40001/50000
⏳ Sampled combo #45001/50000
✅ Valid smoothies after filtering: 7325
🥤 Selected top 10 