In [1]:
import numpy as np
import pandas as pd

In [2]:
recipes = pd.read_csv('https://raw.githubusercontent.com/bu-cds-dx704/dx704-project-04/refs/heads/main/recipes.tsv', sep="\t")
recipes = recipes.set_index("recipe_slug")
recipes

Unnamed: 0_level_0,recipe_title,recipe_introduction
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1
falafel,Falafel,Falafel is a popular Middle Eastern dish made ...
spamburger,Spamburger,Spamburger is a type of hamburger that is made...
bacon-fried-rice,Bacon Fried Rice,Bacon fried rice is a savory and satisfying di...
chicken-fingers,Chicken Fingers,Chicken fingers are a popular dish made from c...
apple-crisp,Apple Crisp,Apple crisp is a classic dessert made with bak...
...,...,...
bacon-mac-and-cheese,Bacon Mac And Cheese,Bacon mac and cheese is a delicious and comfor...
chicken-alfredo-lasagna,Chicken Alfredo Lasagna,Chicken alfredo lasagna is a delicious twist o...
classic-beef-lasagna,Classic Beef Lasagna,Classic beef lasagna is a hearty and comfortin...
vegetarian-mushroom-lasagna,Vegetarian Mushroom Lasagna,Vegetarian mushroom lasagna is a delicious and...


In [3]:
def add_feature(feature_name):
    recipes[feature_name] = recipes["recipe_title"].str.lower().str.contains(feature_name.lower())

In [4]:
add_feature("apple")
add_feature("bacon")
add_feature("chicken")
add_feature("crisp")
add_feature("lasagna")
add_feature("veg")

recipes

Unnamed: 0_level_0,recipe_title,recipe_introduction,apple,bacon,chicken,crisp,lasagna,veg
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
falafel,Falafel,Falafel is a popular Middle Eastern dish made ...,False,False,False,False,False,False
spamburger,Spamburger,Spamburger is a type of hamburger that is made...,False,False,False,False,False,False
bacon-fried-rice,Bacon Fried Rice,Bacon fried rice is a savory and satisfying di...,False,True,False,False,False,False
chicken-fingers,Chicken Fingers,Chicken fingers are a popular dish made from c...,False,False,True,False,False,False
apple-crisp,Apple Crisp,Apple crisp is a classic dessert made with bak...,True,False,False,True,False,False
...,...,...,...,...,...,...,...,...
bacon-mac-and-cheese,Bacon Mac And Cheese,Bacon mac and cheese is a delicious and comfor...,False,True,False,False,False,False
chicken-alfredo-lasagna,Chicken Alfredo Lasagna,Chicken alfredo lasagna is a delicious twist o...,False,False,True,False,True,False
classic-beef-lasagna,Classic Beef Lasagna,Classic beef lasagna is a hearty and comfortin...,False,False,False,False,True,False
vegetarian-mushroom-lasagna,Vegetarian Mushroom Lasagna,Vegetarian mushroom lasagna is a delicious and...,False,False,False,False,True,True


In [5]:
ratings = pd.DataFrame({"recipe_title": recipes["recipe_title"],
                               "rating_bad": np.random.uniform(low=2, high=3, size=len(recipes))},
                              index=recipes.index)
ratings.loc[recipes['veg'], 'rating_bad'] = 1
ratings.loc[recipes['bacon'], 'rating_bad'] = 5
ratings['rating'] = ratings['rating_bad']
ratings

Unnamed: 0_level_0,recipe_title,rating_bad,rating
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
falafel,Falafel,2.630769,2.630769
spamburger,Spamburger,2.975923,2.975923
bacon-fried-rice,Bacon Fried Rice,5.000000,5.000000
chicken-fingers,Chicken Fingers,2.709550,2.709550
apple-crisp,Apple Crisp,2.923251,2.923251
...,...,...,...
bacon-mac-and-cheese,Bacon Mac And Cheese,5.000000,5.000000
chicken-alfredo-lasagna,Chicken Alfredo Lasagna,2.087846,2.087846
classic-beef-lasagna,Classic Beef Lasagna,2.108806,2.108806
vegetarian-mushroom-lasagna,Vegetarian Mushroom Lasagna,1.000000,1.000000


In [6]:
features = recipes[[c for c in recipes.columns if not c.startswith("recipe_")]]
features

Unnamed: 0_level_0,apple,bacon,chicken,crisp,lasagna,veg
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
falafel,False,False,False,False,False,False
spamburger,False,False,False,False,False,False
bacon-fried-rice,False,True,False,False,False,False
chicken-fingers,False,False,True,False,False,False
apple-crisp,True,False,False,True,False,False
...,...,...,...,...,...,...
bacon-mac-and-cheese,False,True,False,False,False,False
chicken-alfredo-lasagna,False,False,True,False,True,False
classic-beef-lasagna,False,False,False,False,True,False
vegetarian-mushroom-lasagna,False,False,False,False,True,True


In [7]:
features.sum()

Unnamed: 0,0
apple,4
bacon,13
chicken,5
crisp,6
lasagna,6
veg,2


In [8]:
features.T.sum().T

Unnamed: 0_level_0,0
recipe_slug,Unnamed: 1_level_1
falafel,0
spamburger,0
bacon-fried-rice,1
chicken-fingers,1
apple-crisp,2
...,...
bacon-mac-and-cheese,1
chicken-alfredo-lasagna,2
classic-beef-lasagna,1
vegetarian-mushroom-lasagna,2


In [9]:
def calculate_bounds(recipe_choices=[], alpha=1.0):
    recipe_choices = list(recipe_choices)

    D = features.loc[recipe_choices].to_numpy(dtype="float64")
    c = ratings.loc[recipe_choices, 'rating'].to_numpy(dtype="float64")

    DTDI = np.eye(D.shape[1])
    if len(recipe_choices) > 0:
        DTDI += D.T @ D
    DTDI_inv = np.linalg.inv(DTDI)

    theta_hat = DTDI_inv @ D.T @ c
    features_array = features.to_numpy()
    means = features_array @ theta_hat

    variances = []
    for z in features_array:
        z = z.reshape(-1, 1)
        variances.append((z.T @ DTDI_inv @ z).item())
    variances = np.array(variances)

    df = pd.DataFrame({"score_estimate": means,
    "score_bound": means + alpha * np.sqrt(variances)}, index=features.index)
    df["num_features"] = features.T.sum().T

    return df

calculate_bounds().sort_values("score_bound", ascending=False)

Unnamed: 0_level_0,score_estimate,score_bound,num_features
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cranberry-apple-crisp,0.0,1.414214,2
apple-crisp,0.0,1.414214,2
chicken-alfredo-lasagna,0.0,1.414214,2
vegetable-lasagna,0.0,1.414214,2
vegetarian-mushroom-lasagna,0.0,1.414214,2
...,...,...,...
cherry-pie,0.0,0.000000,0
cold-sesame-noodles,0.0,0.000000,0
soba-noodle-salad-with-peanut-dressing,0.0,0.000000,0
dan-dan-noodles,0.0,0.000000,0


In [10]:
calculate_bounds(["bacon-mac-and-cheese"]* 3)

Unnamed: 0_level_0,score_estimate,score_bound,num_features
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
falafel,0.00,0.000000,0
spamburger,0.00,0.000000,0
bacon-fried-rice,3.75,4.250000,1
chicken-fingers,0.00,1.000000,1
apple-crisp,0.00,1.414214,2
...,...,...,...
bacon-mac-and-cheese,3.75,4.250000,1
chicken-alfredo-lasagna,0.00,1.414214,2
classic-beef-lasagna,0.00,1.000000,1
vegetarian-mushroom-lasagna,0.00,1.414214,2


In [11]:
def try_picks(**kwargs):
    recipe_choices = []
    for i in range(100):
        current_bounds = calculate_bounds([r.index[0] for r in recipe_choices], **kwargs)
        best_bound = current_bounds["score_bound"].max()
        best_recipes = current_bounds[current_bounds["score_bound"] == best_bound].copy()
        best_recipes["true_rating"] = ratings.loc[best_recipes.index, "rating"]
        choice = best_recipes.sample(1)
        recipe_choices.append(choice)

    return pd.concat(recipe_choices, axis=0).reset_index()

picks = try_picks()
picks

Unnamed: 0,recipe_slug,score_estimate,score_bound,num_features,true_rating
0,vegetarian-mushroom-lasagna,0.000000,1.414214,2,1.000000
1,chicken-alfredo-lasagna,0.333333,1.624328,2,2.087846
2,chicken-alfredo-lasagna,1.429904,2.220473,2,2.087846
3,chicken-alfredo-lasagna,1.682958,2.303132,2,2.087846
4,chicken-alfredo-lasagna,1.795427,2.322474,2,2.087846
...,...,...,...,...,...
95,bacon-wrapped-chicken,4.951269,5.082905,2,5.000000
96,bacon-wrapped-chicken,4.952099,5.082609,2,5.000000
97,bacon-wrapped-chicken,4.952901,5.082314,2,5.000000
98,bacon-wrapped-chicken,4.953677,5.082020,2,5.000000


In [12]:
picks.groupby("recipe_slug").count()

Unnamed: 0_level_0,score_estimate,score_bound,num_features,true_rating
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bacon-wrapped-chicken,62,62,62,62
chicken-alfredo-lasagna,37,37,37,37
vegetarian-mushroom-lasagna,1,1,1,1


In [18]:
picks = try_picks(alpha=2)
picks.groupby("recipe_slug").count()

Unnamed: 0_level_0,score_estimate,score_bound,num_features,true_rating
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bacon-wrapped-chicken,98,98,98,98
chicken-alfredo-lasagna,1,1,1,1
vegetarian-mushroom-lasagna,1,1,1,1


In [19]:
ratings["rating_good"] = (ratings["rating_bad"] - 1) * 0.2
ratings["rating"] = ratings["rating_good"]
ratings

Unnamed: 0_level_0,recipe_title,rating_bad,rating,rating_good
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
falafel,Falafel,2.630769,0.326154,0.326154
spamburger,Spamburger,2.975923,0.395185,0.395185
bacon-fried-rice,Bacon Fried Rice,5.000000,0.800000,0.800000
chicken-fingers,Chicken Fingers,2.709550,0.341910,0.341910
apple-crisp,Apple Crisp,2.923251,0.384650,0.384650
...,...,...,...,...
bacon-mac-and-cheese,Bacon Mac And Cheese,5.000000,0.800000,0.800000
chicken-alfredo-lasagna,Chicken Alfredo Lasagna,2.087846,0.217569,0.217569
classic-beef-lasagna,Classic Beef Lasagna,2.108806,0.221761,0.221761
vegetarian-mushroom-lasagna,Vegetarian Mushroom Lasagna,1.000000,0.000000,0.000000


In [20]:
picks = try_picks(alpha=2)
picks.groupby("recipe_slug").count()

Unnamed: 0_level_0,score_estimate,score_bound,num_features,true_rating
recipe_slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
apple-crisp,3,3,3,3
apple-pie,1,1,1,1
bacon-and-egg-breakfast-sandwich,4,4,4,4
bacon-chocolate-chip-cookies,3,3,3,3
bacon-egg-muffins,1,1,1,1
bacon-fried-rice,2,2,2,2
bacon-mac-and-cheese,4,4,4,4
bacon-souffle,3,3,3,3
bacon-wrapped-asparagus,5,5,5,5
bacon-wrapped-chicken,47,47,47,47
