In [1]:
import pandas as pd

probs = pd.read_json("aspect_entailment_probabilities.json")
probs

Unnamed: 0,sequence,labels,scores
0,Shrimp roasted with vegetables in a zesty Ital...,"[shrimp, balanced diet]","[0.9963915348052971, 0.9231709837913511]"
1,Shrimp balls made with pork fat deep fried in oil,"[shrimp, balanced diet]","[0.90291154384613, 0.005868990905582]"
2,Breaded shrimp with whole milk and creole seas...,"[shrimp, balanced diet]","[0.9757009148597711, 0.39306277036666804]"
3,Cucumber sauce with fat-free yogurt and reduce...,"[balanced diet, shrimp]","[0.9392296075820921, 8.820796210784466e-05]"
4,Healthy napa cabbage and green onion coleslaw ...,"[balanced diet, shrimp]","[0.9921187162399291, 0.00011050567263700001]"
...,...,...,...
2495,Fish meat cooked on a grill / stovetop / oven,"[seafood, only have a bbq, eating dairy makes ...","[0.995876610279083, 0.002118620323017, 3.79707..."
2496,Fish meat cooked in a steamer,"[seafood, only have a bbq, eating dairy makes ...","[0.9986196756362911, 0.000332452327711, 2.4294..."
2497,Fish meat prepared with swiss cheese,"[seafood, eating dairy makes me sick, only hav...","[0.9940803647041321, 0.004660131875425, 0.0004..."
2498,Seafood linguine recipe containing shrimp,"[seafood, only have a bbq, eating dairy makes ...","[0.9984157681465141, 0.000328484253259, 7.7745..."


In [2]:
query_probs = pd.read_json("query_entailment_probabilities.json")
query_probs

Unnamed: 0,sequence,labels,scores
0,Shrimp roasted with vegetables in a zesty Ital...,[I would like a shrimp recipe and I'm trying t...,[0.890443801879882]
1,Shrimp balls made with pork fat deep fried in oil,[I would like a shrimp recipe and I'm trying t...,[0.064984962344169]
2,Breaded shrimp with whole milk and creole seas...,[I would like a shrimp recipe and I'm trying t...,[0.7555378675460811]
3,Cucumber sauce with fat-free yogurt and reduce...,[I would like a shrimp recipe and I'm trying t...,[0.001509124645963]
4,Healthy napa cabbage and green onion coleslaw ...,[I would like a shrimp recipe and I'm trying t...,[0.0007269414491020001]
...,...,...,...
2495,Salmon fillets grilled in a pan with herbs and...,[What are recipes for fish but not baked in th...,[0.9021332859992981]
2496,Baked halibut fish fillets with Worcestershire...,[What are recipes for fish but not baked in th...,[0.167374327778816]
2497,Breaded fish fillets baked with parmesan chees...,[What are recipes for fish but not baked in th...,[0.08219508081674501]
2498,Cheesy chicken breast halves skillet-cooked wi...,[What are recipes for fish but not baked in th...,[0.0012377836974330001]


In [3]:
recipe_mpr = pd.read_json('https://raw.githubusercontent.com/D3Mlab/Recipe-MPR/main/data/500QA.json', orient = 'records')

recipe_mpr_clean = (
    recipe_mpr
    .assign(
        query_aspects = lambda d: d.correctness_explanation.apply(lambda x: list(x.keys())),
        num_aspects = lambda d: d.query_aspects.apply(len),
        option_id = lambda d: d.options.apply(lambda x: list(x.keys())),
        option_text = lambda d: d.options.apply(lambda x: list(x.values()))
    )
    .query("num_aspects > 1")
    .explode(['option_id', 'option_text'])
    .assign(correct = lambda d: d.answer == d.option_id)
    [['query', 'query_aspects', 'option_id', 'option_text', 'correct']]
    .explode(['query_aspects'])
)

recipe_mpr_clean

Unnamed: 0,query,query_aspects,option_id,option_text,correct
0,I want to make a warm dish containing oysters,warm dish,08cb462fdf,Simple creamy oyster soup,True
0,I want to make a warm dish containing oysters,oysters,08cb462fdf,Simple creamy oyster soup,True
0,I want to make a warm dish containing oysters,warm dish,5b9441298f,Seasoned salted crackers shaped like oysters,False
0,I want to make a warm dish containing oysters,oysters,5b9441298f,Seasoned salted crackers shaped like oysters,False
0,I want to make a warm dish containing oysters,warm dish,00310c3462,Creamy clam chowder made with whole milk and b...,False
...,...,...,...,...,...
499,What's a good broccoli side dish I can make fo...,side dish,29500eb70f,Beef paired with bacon entree,False
499,What's a good broccoli side dish I can make fo...,loves meat,29500eb70f,Beef paired with bacon entree,False
499,What's a good broccoli side dish I can make fo...,broccoli,81e0d049d7,Broccoli cooked in the oven,False
499,What's a good broccoli side dish I can make fo...,side dish,81e0d049d7,Broccoli cooked in the oven,False


In [4]:
import numpy as np
def noisy_or(probs):
  return 1 - np.product(1 - probs)

query_item_scores = (
    probs
    .explode(['labels', 'scores'])
    .rename(columns = {'sequence': 'option_text', 'labels': 'query_aspects'})
    .merge(recipe_mpr_clean)
    .groupby(['query', 'option_text', 'correct'])
    ['scores']
    .agg(['min', 'max', 'prod', noisy_or])
    .reset_index()
    .rename(columns = {'prod': 'all', 'noisy_or': 'any'})
)

query_item_scores

Unnamed: 0,query,option_text,correct,min,max,all,any
0,I would like a shrimp recipe and I'm trying t...,Breaded shrimp with whole milk and creole seas...,False,0.393063,0.975701,0.383512,0.985252
1,I would like a shrimp recipe and I'm trying t...,Cucumber sauce with fat-free yogurt and reduce...,False,0.000088,0.93923,0.000083,0.939235
2,I would like a shrimp recipe and I'm trying t...,Healthy napa cabbage and green onion coleslaw ...,False,0.000111,0.992119,0.00011,0.992120
3,I would like a shrimp recipe and I'm trying t...,Shrimp balls made with pork fat deep fried in oil,False,0.005869,0.902912,0.005299,0.903481
4,I would like a shrimp recipe and I'm trying t...,Shrimp roasted with vegetables in a zesty Ital...,True,0.923171,0.996392,0.91984,0.999723
...,...,...,...,...,...,...,...
2495,Would I be able to get a breakfast recipe with...,Breakfast sausage made from scratch using pork...,False,0.000043,0.000892,0.0,0.000999
2496,Would I be able to get a breakfast recipe with...,Fish meat cooked in a steamer,False,0.000024,0.99862,0.0,0.998620
2497,Would I be able to get a breakfast recipe with...,Fish meat cooked on a grill / stovetop / oven,True,0.000038,0.995877,0.0,0.995886
2498,Would I be able to get a breakfast recipe with...,Fish meat prepared with swiss cheese,False,0.000449,0.99408,0.000002,0.994111


In [5]:
query_item_scores = (
    query_probs
    .explode(['labels', 'scores'])
    .rename(columns = {'sequence': 'option_text', 'labels': 'query', 'scores': 'monolithic'})
    .merge(
        query_item_scores
        .assign(query = lambda d: d['query'].apply(lambda x: x.replace(",", "").strip()))
    )
)

query_item_scores

Unnamed: 0,option_text,query,monolithic,correct,min,max,all,any
0,Shrimp roasted with vegetables in a zesty Ital...,I would like a shrimp recipe and I'm trying to...,0.890444,True,0.923171,0.996392,0.91984,0.999723
1,Shrimp balls made with pork fat deep fried in oil,I would like a shrimp recipe and I'm trying to...,0.064985,False,0.005869,0.902912,0.005299,0.903481
2,Breaded shrimp with whole milk and creole seas...,I would like a shrimp recipe and I'm trying to...,0.755538,False,0.393063,0.975701,0.383512,0.985252
3,Cucumber sauce with fat-free yogurt and reduce...,I would like a shrimp recipe and I'm trying to...,0.001509,False,0.000088,0.93923,0.000083,0.939235
4,Healthy napa cabbage and green onion coleslaw ...,I would like a shrimp recipe and I'm trying to...,0.000727,False,0.000111,0.992119,0.00011,0.992120
...,...,...,...,...,...,...,...,...
2495,Salmon fillets grilled in a pan with herbs and...,What are recipes for fish but not baked in the...,0.902133,True,0.998007,0.99921,0.997219,0.999998
2496,Baked halibut fish fillets with Worcestershire...,What are recipes for fish but not baked in the...,0.167374,False,0.00297,0.998685,0.002966,0.998689
2497,Breaded fish fillets baked with parmesan chees...,What are recipes for fish but not baked in the...,0.082195,False,0.000728,0.990353,0.000721,0.990360
2498,Cheesy chicken breast halves skillet-cooked wi...,What are recipes for fish but not baked in the...,0.001238,False,0.000102,0.998578,0.000101,0.998578


In [6]:
(
    pd.concat(
    [
        query_item_scores
        .assign(rank = lambda d: d.groupby('query')[score_column].rank(ascending=False).astype(int))
        .query('correct')
        ['rank']
        .value_counts()
        .rename(score_column)
        for score_column in ['min', 'max', 'all', 'any', 'monolithic']
    ],
    axis = 1
    )
    .T
    .sort_values(1, ascending = False)
    / 5
)


rank,1,2,3,4,5
all,72.8,13.4,8.8,2.6,2.4
min,70.6,14.6,8.4,4.4,2.0
monolithic,69.0,18.4,8.6,1.8,2.2
any,53.4,22.2,13.8,6.2,4.4
max,36.8,24.8,20.8,11.4,6.2
