In [1]:
# this folder is used to explore quantities
import re
import pandas as pd
import numpy as np


cookbook = pd.read_csv('../quantity_exploration/parsed_raw_ingredients_all_mini_with_recipe_context.csv')
tj_inventory = pd.read_csv("../data/trader_joes_products_v2.csv")

In [2]:
tj_inventory

Unnamed: 0,category,product_name,price,unit,url
0,Bakery,Herbed Dinner Rolls,$3.49,/12 Oz,https://www.traderjoes.com/home/products/pdp/h...
1,Bakery,Pumpkin & Spice Brioche Style Liège Waffles,$4.49,/11.64 Oz,https://www.traderjoes.com/home/products/pdp/p...
2,Bakery,Glazed Pumpkin Pie Spice Donut Holes,$3.49,/6 Oz,https://www.traderjoes.com/home/products/pdp/g...
3,Bakery,Pumpkin Spice Mini Sheet Cake,$5.99,/18 Oz,https://www.traderjoes.com/home/products/pdp/p...
4,Bakery,Sliced Apple Cinnamon Sourdough Bread,$4.99,/17.63 Oz,https://www.traderjoes.com/home/products/pdp/s...
...,...,...,...,...,...
1244,Snacks & Sweets,Scandinavian Swimmers,$3.79,/14 Oz,https://www.traderjoes.com/home/products/pdp/s...
1245,Snacks & Sweets,Dark Chocolate Orange Sticks,$3.99,/10 Oz,https://www.traderjoes.com/home/products/pdp/d...
1246,Snacks & Sweets,Twisted Cookie Sticks,$2.29,/2.65 Oz,https://www.traderjoes.com/home/products/pdp/t...
1247,Snacks & Sweets,Plantain Chips,$1.99,/6 Oz,https://www.traderjoes.com/home/products/pdp/p...


In [2]:
def split_quantity(qty):
    if pd.isna(qty) or not isinstance(qty, str) or qty.strip() == "":
        return pd.Series([np.nan, np.nan])

    qty = qty.lower().strip()

    # Match numeric part (integer, float, or fraction) + unit
    match = re.match(r"([\d¼½¾⅓⅔⅛⅜⅝⅞\s\/\.]+)\s*([a-zA-Z]+.*)?", qty)
    if match:
        amount = match.group(1).strip()
        unit = match.group(2).strip() if match.group(2) else np.nan
        return pd.Series([amount, unit])
    else:
        return pd.Series([np.nan, np.nan])

In [15]:
ingre= cookbook[['title','name', 'quantity_text']].copy()
ingre[["amount", "unit"]] = ingre["quantity_text"].apply(split_quantity)

In [16]:
ingre = ingre.dropna().reset_index(drop=True)

fractions_map = {
    "½": "1/2",
    "¼": "1/4",
    "¾": "3/4",
    "⅓": "1/3",
    "⅔": "2/3",
    "⅛": "1/8",
    "⅜": "3/8",
    "⅝": "5/8",
    "⅞": "7/8"
}

def normalize_fraction(text):
    if isinstance(text, str):
        for sym, rep in fractions_map.items():
            text = text.replace(sym, rep)
        return text
    return text

ingre["amount"] = ingre["amount"].apply(normalize_fraction)


In [17]:
ingre.head()

Unnamed: 0,title,name,quantity_text,amount,unit
0,Aromatic Garlic Ginger Rice,butter,4 tablespoons,4,tablespoons
1,Aromatic Garlic Ginger Rice,garlic,2 tablespoons,2,tablespoons
2,Aromatic Garlic Ginger Rice,ginger,1 tablespoon,1,tablespoon
3,Aromatic Garlic Ginger Rice,jasmine rice,1 cup,1,cup
4,Aromatic Garlic Ginger Rice,hot water,2 cups,2,cups


In [18]:
wood = ingre[ingre['unit'] == 'wooden']
wood


Unnamed: 0,title,name,quantity_text,amount,unit
1019,Fruity Frozen Yogurt Pops,craft sticks,6 wooden,6,wooden


In [7]:
store = tj_inventory[['product_name', 'unit']].copy()

In [8]:
def parse_store_unit(u):
    if not isinstance(u, str):
        return pd.Series([None, None])

    # Extract numbers and unit (e.g., "12", "oz" from "/12 Oz")
    match = re.match(r"\/?([\d\.]+)\s*([a-zA-Z\s]+)", u)
    if match:
        qty = float(match.group(1))
        unit = match.group(2).strip().lower()
        return pd.Series([qty, unit])
    else:
        return pd.Series([None, None])

store[["store_qty", "store_unit"]] = store["unit"].apply(parse_store_unit)

In [11]:
store.head()

Unnamed: 0,product_name,unit,store_qty,store_unit
0,Herbed Dinner Rolls,/12 Oz,12.0,oz
1,Pumpkin & Spice Brioche Style Liège Waffles,/11.64 Oz,11.64,oz
2,Glazed Pumpkin Pie Spice Donut Holes,/6 Oz,6.0,oz
3,Pumpkin Spice Mini Sheet Cake,/18 Oz,18.0,oz
4,Sliced Apple Cinnamon Sourdough Bread,/17.63 Oz,17.63,oz


In [9]:
store['store_unit'].unique()

array(['oz', 'lb', 'fl oz', 'doz', 'pint', 'qt', 'each'], dtype=object)