# DoorDash-Based Knapsack Optimizer: Online Grocery Selection for Protein Efficiency

In [1]:
import pandas as pd
from collections import defaultdict

# Load data
df = pd.read_csv("../data/efficient_shopping_optimizer_data.csv")

# Filter DoorDash only
online_df = df[df['source'].str.contains('Doordash', case=False, na=False)].copy()
online_df['store'] = online_df['source'].str.extract(r'Doordash_(\w+)', expand=False)
online_df = online_df.dropna(subset=['item_name', 'price_total', 'protein_score', 'category', 'store'])

# Prepare data
protein_scores = online_df['protein_score'].tolist()
unit_prices = online_df['price_total'].tolist()
item_names = online_df['item_name'].tolist()
stores = online_df['store'].tolist()
categories = online_df['category'].tolist()
budget = 50
n = len(protein_scores)

# DP scaling
scaled_weights = [int(round(w * 100)) for w in unit_prices]
scaled_budget = int(round(budget * 100))

dp = [[0.0] * (scaled_budget + 1) for _ in range(n + 1)]
for i in range(1, n + 1):
    for w in range(scaled_budget + 1):
        if scaled_weights[i - 1] > w:
            dp[i][w] = dp[i - 1][w]
        else:
            dp[i][w] = max(dp[i - 1][w], dp[i - 1][w - scaled_weights[i - 1]] + protein_scores[i - 1])

# Backtrack
selected_indices = []
w = scaled_budget
for i in range(n, 0, -1):
    if dp[i][w] != dp[i - 1][w]:
        selected_indices.append(i - 1)
        w -= scaled_weights[i - 1]

In [2]:
# Output
grouped_selection = defaultdict(lambda: defaultdict(list))
for i in sorted(selected_indices, key=lambda i: (stores[i], categories[i], item_names[i])):
    grouped_selection[stores[i]][categories[i]].append(i)

print("Selected Items:")
total_cost = total_score = 0.0
for store in sorted(grouped_selection):
    print(f"{store}:")
    for category in sorted(grouped_selection[store]):
        print(f"  {category}:")
        for i in grouped_selection[store][category]:
            print(f"    - {item_names[i]} | ${unit_prices[i]:.2f} | Protein Score: {protein_scores[i]:.2f}")
            total_cost += unit_prices[i]
            total_score += protein_scores[i]
print(f"Total Expenditure: ${total_cost:.2f}")
print(f"Total Protein Score: {total_score:.2f}")

Selected Items:
ALDI:
  protein:
    - Chicken Breast | $9.99 | Protein Score: 25.88
    - Egg | $3.19 | Protein Score: 22.57
    - Plain Greek Yogurt | $3.79 | Protein Score: 22.50
  vegetables:
    - Cabbage | $2.25 | Protein Score: 14.52
    - Carrot | $1.39 | Protein Score: 5.22
    - Potato | $4.69 | Protein Score: 20.35
Meijer:
  fruit:
    - Nectarine | $0.99 | Protein Score: 1.66
  protein:
    - Chicken Breast | $10.28 | Protein Score: 35.72
    - Pork | $4.42 | Protein Score: 22.41
  snack:
    - Potato Chips | $2.49 | Protein Score: 6.26
  vegetables:
    - Cabbage | $2.37 | Protein Score: 13.78
    - Carrot | $0.22 | Protein Score: 2.47
    - Potato | $3.59 | Protein Score: 13.27
Total Expenditure: $49.66
Total Protein Score: 206.62
