In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from datetime import datetime
import networkx as nx

Visual 1: waste_prod_vs_time.py

Type: Stacked Histogram

Variables: Waste, Product x. Time

In [3]:
def load_pantry_with_category(engine):
    """
    Pulls information from cookbook.db
    """
    query = """
    SELECT
        p.pantry_id,
        p.ingredient_id,
        p.amount,
        p.unit,
        p.date_purchased,
        p.expiration_date,
        ti.category
    FROM pantry p
    LEFT JOIN sold_as sa
        ON p.ingredient_id = sa.ingredient_id
    LEFT JOIN tj_inventory ti
        ON sa.product_id = ti.product_id;
    """
    return pd.read_sql(query, engine)

In [4]:
def prep_expiring_food_bins(pantry_df, today=None):
    """
    Add days_to_expiry + expiry_bucket.
    """
    df = pantry_df.copy()
    df["expiration_date"] = pd.to_datetime(df["expiration_date"], errors="coerce")
    df = df[df["expiration_date"].notna()]

    if today is None:
        today = pd.Timestamp(datetime.today().date())

    df["days_to_expiry"] = (df["expiration_date"] - today).dt.days

    df = df[df["days_to_expiry"] >= 0]

    bins = [0, 1, 3, 7, 14, 30, np.inf]
    labels = ["0–1 day", "2–3 days", "4–7 days", "8–14 days", "15–30 days", "30+ days"]

    df["expiry_bucket"] = pd.cut(
        df["days_to_expiry"],
        bins=bins,
        labels=labels,
        right=True
    )

    return df

In [5]:
def plot_expiring_food_histogram(pantry_df):
    """
    Visual 1: Stacked Histogram
    Variables: Waste, Product x. Time
    """
    df = prep_expiring_food_bins(pantry_df)

    df["category"] = df["category"].fillna("Unknown")

    grouped = (
        df.groupby(["expiry_bucket", "category"], observed=True)["amount"]
          .sum()
          .reset_index()
    )

    pivot = grouped.pivot(
        index="expiry_bucket",
        columns="category",
        values="amount"
    ).fillna(0)

    pivot = pivot.reindex(
        ["0–1 day", "2–3 days", "4–7 days", "8–14 days", "15–30 days", "30+ days"]
    )

    fig, ax = plt.subplots()
    pivot.plot(kind="bar", stacked=True, ax=ax)
    ax.set_xlabel("Time Until Expiry")
    ax.set_ylabel("Total Amount")
    ax.set_title("Expiring Food Forecast")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    return fig

In [7]:
# engine = create_engine("sqlite:///cookbook.db")
#
# pantry_df = load_pantry_with_category(engine)
# plot_expiring_food_histogram(pantry_df)

Run visual 1

In [None]:
# from sqlalchemy import create_engine
# from visuals.visual1_expiring import load_pantry_with_category, plot_expiring_food_histogram
#
# engine = create_engine("sqlite:///cookbook.db")
#
# pantry_df = load_pantry_with_category(engine)
#
# plot_expiring_food_histogram(pantry_df)

Visual 2: consumption_vs_waste.py

Type: Dual axis line-chart

Variables: Consumption via Recipe at X time, Waste, vs. Time

In [None]:
def get_forecast_waste_by_date(pantry_df):
    """
    From pantry_df, compute how much is expiring each day.
    """
    df = pantry_df.copy()
    df["expiration_date"] = pd.to_datetime(df["expiration_date"], errors="coerce")

    df = df[df["expiration_date"].notna()]

    daily = (
        df.groupby("expiration_date", as_index=False)["amount"]
          .sum()
          .rename(columns={"expiration_date": "date", "amount": "forecast_waste"})
    )
    return daily

In [None]:
def get_planned_consumption_by_date(engine):
    """
    Use recipe_selected + cookbook tables to compute how much
    is planned to be consumed each day.
    """
    # Which recipes were selected, and when
    selected = pd.read_sql("SELECT * FROM recipe_selected;", engine)
    selected["date"] = pd.to_datetime(selected["sel_ts"]).dt.date

    # Ingredient amounts per recipe
    cookbook = pd.read_sql("SELECT * FROM cookbook;", engine)

    # Join recipe selections with their ingredient amounts
    df = selected.merge(cookbook, on="recipe_id", how="left")

    df["date"] = pd.to_datetime(df["date"])

    daily = (
        df.groupby("date", as_index=False)["amount"]
          .sum()
          .rename(columns={"amount": "planned_consumption"})
    )
    return daily

In [None]:
def plot_consumption_vs_waste(pantry_df, engine):
    """
    Visual 2: Dual-axis line chart:
    Variables: Consumption via Recipe at X time, Waste, vs. Time
    """
    # Left axis: forecast waste from pantry
    forecast_df = get_forecast_waste_by_date(pantry_df)

    # Right axis: planned consumption from recipe schedule
    cons_df = get_planned_consumption_by_date(engine)

    # Combine on date
    df = pd.merge(forecast_df, cons_df, on="date", how="outer").sort_values("date")
    df["forecast_waste"] = df["forecast_waste"].fillna(0)
    df["planned_consumption"] = df["planned_consumption"].fillna(0)

    fig, ax1 = plt.subplots(figsize=(10, 5))

    # Left axis: forecast waste
    ax1.plot(df["date"], df["forecast_waste"], label="Forecast Waste", linewidth=2)
    ax1.set_xlabel("Date")
    ax1.set_ylabel("Forecast Waste")

    # Right axis: planned consumption
    ax2 = ax1.twinx()
    ax2.plot(
        df["date"],
        df["planned_consumption"],
        linestyle="--",
        label="Planned Consumption",
        linewidth=2,
        color="orange",
    )
    ax2.set_ylabel("Planned Consumption")

    fig.suptitle("Planned Consumption vs Forecast Waste")
    fig.autofmt_xdate()

    # Combined legend
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

    plt.tight_layout()
    return fig

Run visual 2

In [None]:
# from sqlalchemy import create_engine
#
# from visuals.visual1_expiring import load_pantry_with_category
# from visuals.visual2_consumption import plot_consumption_vs_waste
#
# engine = create_engine("sqlite:///cookbook.db")
#
# pantry_df = load_pantry_with_category(engine)
#
# plot_consumption_vs_waste(pantry_df, engine)

Visual 3: waste_gen_vs_saved.py

Type: Waterfall chart

Variables: Realized Waste per Category,
possibly identify “Top Waste Category”, Avoided Waste per Category


In [None]:
def compute_waste_summary_from_pantry(pantry_df, today=None):
    """
    Compute a simple waste summary by category.
    """
    if today is None:
        today = pd.Timestamp(datetime.today().date())

    df = pantry_df.copy()
    df["expiration_date"] = pd.to_datetime(df["expiration_date"], errors="coerce")
    df["category"] = df["category"].fillna("Unknown")

    expired = df[df["expiration_date"] < today]

    realized = (
        expired.groupby("category", as_index=False)["amount"]
               .sum()
               .rename(columns={"amount": "realized_waste"})
    )

    realized["avoided_waste"] = 0.0

    return realized

In [None]:
def plot_waste_waterfall(waste_summary):
    """
    Visual 3:
    Waterfall-style bar chart of realized vs avoided waste by category.
    """
    df = waste_summary.copy()
    df = df.sort_values("realized_waste", ascending=False)

    steps = []
    labels = []

    cumulative = 0

    steps.append(0)
    labels.append("Start")

    for _, row in df.iterrows():
        cat = row["category"]
        realized = row["realized_waste"]
        avoided = row["avoided_waste"]

        # Down step = realized waste
        steps.append(-realized)
        labels.append(f"{cat} wasted")

        # Up step = avoided waste
        if avoided != 0:
            steps.append(avoided)
            labels.append(f"{cat} avoided")

    x = np.arange(len(labels))

    fig, ax = plt.subplots(figsize=(10, 5))

    prev = 0
    for i, step in enumerate(steps):
        color = "red" if step < 0 else "green"
        ax.bar(x[i], step, bottom=prev, color=color)
        prev += step

    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=45, ha="right")
    ax.set_ylabel("Waste (amount units)")
    ax.set_title("Realized vs Avoided Waste by Category (Simple Approximation)")
    plt.tight_layout()
    return fig

Run Visual 3

In [None]:
# from sqlalchemy import create_engine
#
# from visuals.visual1_expiring import load_pantry_with_category
# from visuals.visual3_waste import compute_waste_summary_from_pantry, plot_waste_waterfall
#
# engine = create_engine("sqlite:///cookbook.db")
#
# pantry_df = load_pantry_with_category(engine)
# waste_summary = compute_waste_summary_from_pantry(pantry_df)
# plot_waste_waterfall(waste_summary)

Visual 4: recipe_ingredient_overlap.py

Type: Network Graph

Variables: Recipe, recipe category, clustered by overlapping ingredients

In [None]:
def load_recipe_ingredient_data(engine):
    """
    Load recipes, ingredients, and their relationships from the database.
    """
    recipes = pd.read_sql("SELECT recipe_id, title, category FROM recipe;", engine)
    ingredients = pd.read_sql(
        "SELECT ingredient_id, norm_name FROM usable_ingredients;", engine
    )
    inv_idx = pd.read_sql(
        "SELECT ingredient_id, recipe_id FROM ingredient_recipe_inverted_index;",
        engine,
    )

    return recipes, ingredients, inv_idx

In [None]:
def build_recipe_ingredient_graph(recipes_df, ingredients_df, inv_idx_df):
    """
    Build a NetworkX graph of recipes and ingredients.
    """
    G = nx.Graph()

    # Add recipe nodes
    for _, row in recipes_df.iterrows():
        G.add_node(
            f"recipe_{row['recipe_id']}",
            type="recipe",
            label=row["title"],
            category=row.get("category", None),
        )

    # Add ingredient nodes
    for _, row in ingredients_df.iterrows():
        G.add_node(
            f"ingredient_{row['ingredient_id']}",
            type="ingredient",
            label=row["norm_name"],
        )

    # Add edges between recipes and ingredients
    for _, row in inv_idx_df.iterrows():
        r_node = f"recipe_{row['recipe_id']}"
        i_node = f"ingredient_{row['ingredient_id']}"
        if r_node in G.nodes and i_node in G.nodes:
            G.add_edge(r_node, i_node)

    return G

In [None]:
def plot_recipe_overlap_network(G, sample_n_recipes=30):
    """
    Visual 4:
    Plot a recipe–ingredient overlap network.

    Nodes:
      - Squares = recipes
      - Circles = ingredients
    """
    H = G.copy()

    # Recipe sample (to limit graph output, OPTIONAL)
    recipe_nodes = [n for n, d in H.nodes(data=True) if d.get("type") == "recipe"]
    if sample_n_recipes is not None and len(recipe_nodes) > sample_n_recipes:
        sampled_recipes = set(recipe_nodes[:sample_n_recipes])
        neighbors = set()
        for r in sampled_recipes:
            neighbors.update(H.neighbors(r))
        keep_nodes = sampled_recipes | neighbors
        H = H.subgraph(keep_nodes).copy()

    # Layout
    pos = nx.spring_layout(H, k=0.3, iterations=50)

    recipe_nodes = [n for n, d in H.nodes(data=True) if d.get("type") == "recipe"]
    ingredient_nodes = [n for n, d in H.nodes(data=True) if d.get("type") == "ingredient"]

    # Visualize
    fig, ax = plt.subplots(figsize=(10, 8))

    nx.draw_networkx_nodes(
        H, pos, nodelist=recipe_nodes, node_shape="s", node_size=400, label="Recipes", ax=ax
    )
    nx.draw_networkx_nodes(
        H, pos, nodelist=ingredient_nodes, node_shape="o", node_size=200, alpha=0.6, label="Ingredients", ax=ax
    )
    nx.draw_networkx_edges(H, pos, alpha=0.3, ax=ax)

    # Labels
    labels = {n: d["label"] for n, d in H.nodes(data=True) if d.get("type") == "recipe"}
    nx.draw_networkx_labels(H, pos, labels=labels, font_size=8, ax=ax)

    ax.set_title("Recipe–Ingredient Overlap Network")
    ax.set_axis_off()
    ax.legend()
    fig.tight_layout()
    return fig

Run Visual 4

In [None]:
# from sqlalchemy import create_engine
#
# from visuals.visual4_network import (
#     load_recipe_ingredient_data,
#     build_recipe_ingredient_graph,
#     plot_recipe_overlap_network,
# )
#
# engine = create_engine("sqlite:///cookbook.db")
# recipes_df, ingredients_df, inv_idx_df = load_recipe_ingredient_data(engine)
# G = build_recipe_ingredient_graph(recipes_df, ingredients_df, inv_idx_df)
# plot_recipe_overlap_network(G, sample_n_recipes=30)