In [1]:
multi_aspect_queries = {
    'Italian place with a burger': ['Italian place', 'burger'],
    'A cafe that also offers beer': ['cafe', 'beer'],
    'Japanese restaurant with pasta': ['Japanese restaurant', 'pasta'],
    'An ice cream shop with bubble tea': ['ice cream shop', 'bubble tea'],
    'I am in search of a fancy Pakistani restaurant with authentic food': ['fancy', 'Pakistani restaurant']
}

In [2]:
import pandas as pd

rird_reviews = (
    pd.read_csv(
        'https://raw.githubusercontent.com/D3Mlab/rir/main/data/50_restaurants_all_rates.csv',
        usecols = ["name", "review_text"]
    )
    .groupby('name')
    .head(5)
    .rename(columns = {'review_text': 'sequence', 'name': 'restaurant'})
)

rird_reviews

Unnamed: 0,sequence,restaurant
0,Bit of a wait at the door. No problem. Spicy k...,Hokkaido Ramen Santouka
1,"Not sure why, but the shoyu broth and noodle h...",Hokkaido Ramen Santouka
2,It should come to no surprise that I am a huge...,Hokkaido Ramen Santouka
3,"WHAT TO TRY?!Their ramen!! Personally, liked t...",Hokkaido Ramen Santouka
4,Came here on a Sunday at around 6pm and there ...,Hokkaido Ramen Santouka
...,...,...
28737,Is it just me or has kinton ramen decreased th...,KINTON RAMEN
28738,I'm pretty much here for lunch every other wee...,KINTON RAMEN
28739,"This is a super belated review, as I came here...",KINTON RAMEN
28740,"After a crazy week of midterms, assignments, p...",KINTON RAMEN


In [3]:
probs = (
    pd.read_json("aspect_entailment_probabilities.json")
    .pipe(pd.merge, rird_reviews, on = 'sequence', how = 'inner')
)
probs

Unnamed: 0,sequence,labels,scores,restaurant
0,Bit of a wait at the door. No problem. Spicy k...,"[Japanese restaurant, cafe, beer, Pakistani re...","[0.9665927290916441, 0.47768211364746005, 0.05...",Hokkaido Ramen Santouka
1,"Not sure why, but the shoyu broth and noodle h...","[Japanese restaurant, fancy, cafe, bubble tea,...","[0.9252469539642331, 0.8747860789299011, 0.300...",Hokkaido Ramen Santouka
2,It should come to no surprise that I am a huge...,"[Japanese restaurant, fancy, cafe, ice cream s...","[0.9657204151153561, 0.878256320953369, 0.6107...",Hokkaido Ramen Santouka
3,"WHAT TO TRY?!Their ramen!! Personally, liked t...","[Japanese restaurant, fancy, cafe, Pakistani r...","[0.9517374634742731, 0.7329214811325071, 0.511...",Hokkaido Ramen Santouka
4,Came here on a Sunday at around 6pm and there ...,"[Japanese restaurant, fancy, cafe, burger, Pak...","[0.9442551732063291, 0.6086125373840331, 0.326...",Hokkaido Ramen Santouka
...,...,...,...,...
245,Is it just me or has kinton ramen decreased th...,"[Japanese restaurant, fancy, cafe, Pakistani r...","[0.9550507068634031, 0.515392124652862, 0.3679...",KINTON RAMEN
246,I'm pretty much here for lunch every other wee...,"[Japanese restaurant, fancy, cafe, pasta, beer...","[0.8074813485145561, 0.28223115205764704, 0.21...",KINTON RAMEN
247,"This is a super belated review, as I came here...","[fancy, Japanese restaurant, cafe, bubble tea,...","[0.9803725481033321, 0.972660958766937, 0.6445...",KINTON RAMEN
248,"After a crazy week of midterms, assignments, p...","[Japanese restaurant, fancy, cafe, bubble tea,...","[0.98530101776123, 0.456487208604812, 0.437323...",KINTON RAMEN


In [4]:
query_probs = pd.read_json("query_entailment_probabilities.json")
query_probs

Unnamed: 0,sequence,labels,scores
0,Bit of a wait at the door. No problem. Spicy k...,"[A cafe that also offers beer, I am in search ...","[0.333880096673965, 0.100818902254104, 0.02953..."
1,"Not sure why, but the shoyu broth and noodle h...","[A cafe that also offers beer, Japanese restau...","[0.13973394036293002, 0.024683702737092, 0.002..."
2,It should come to no surprise that I am a huge...,"[A cafe that also offers beer, Japanese restau...","[0.290431022644042, 0.19327768683433502, 0.054..."
3,"WHAT TO TRY?!Their ramen!! Personally, liked t...","[Japanese restaurant with pasta, A cafe that a...","[0.18128916621208102, 0.025067737326025002, 0...."
4,Came here on a Sunday at around 6pm and there ...,"[A cafe that also offers beer, Japanese restau...","[0.22608198225498202, 0.08156598359346301, 0.0..."
...,...,...,...
245,Is it just me or has kinton ramen decreased th...,[I am in search of a fancy Pakistani restauran...,"[0.39815038442611606, 0.22647038102149902, 0.1..."
246,I'm pretty much here for lunch every other wee...,"[Japanese restaurant with pasta, A cafe that a...","[0.036107074469327004, 0.023303780704736002, 0..."
247,"This is a super belated review, as I came here...","[Japanese restaurant with pasta, A cafe that a...","[0.236581072211265, 0.14242428541183402, 0.042..."
248,"After a crazy week of midterms, assignments, p...","[A cafe that also offers beer, Japanese restau...","[0.22503134608268702, 0.188280180096626, 0.065..."


In [5]:
rird = pd.read_csv('https://raw.githubusercontent.com/D3Mlab/rir/main/data/PMD.csv')

rird_clean = (
    rird
    .rename(columns = {'Restaurant name': 'restaurant', 'If only Low or  High': 'correct'})
    [['query', 'restaurant', 'correct']]
    .assign(
        query_aspects = lambda d: d['query'].map(multi_aspect_queries),
        correct = lambda d: d['correct'].astype(bool)
    )
    .dropna()
    .explode(['query_aspects'])
)

rird_clean

Unnamed: 0,query,restaurant,correct,query_aspects
67,I am in search of a fancy Pakistani restaurant...,Ding Tai Fung,False,fancy
67,I am in search of a fancy Pakistani restaurant...,Ding Tai Fung,False,Pakistani restaurant
91,Japanese restaurant with pasta,Ding Tai Fung,False,Japanese restaurant
91,Japanese restaurant with pasta,Ding Tai Fung,False,pasta
93,Italian place with a burger,Ding Tai Fung,False,Italian place
...,...,...,...,...
4993,Italian place with a burger,Cactus Club Cafe,False,burger
4996,A cafe that also offers beer,Cactus Club Cafe,True,cafe
4996,A cafe that also offers beer,Cactus Club Cafe,True,beer
4998,An ice cream shop with bubble tea,Cactus Club Cafe,False,ice cream shop


In [6]:
import numpy as np
def noisy_or(probs):
  return 1 - np.product(1 - probs)

query_review_scores = (
    probs
    .explode(['labels', 'scores'])
    .rename(columns = {'sequence': 'review_text', 'labels': 'query_aspects'})
    .merge(rird_clean)
    .groupby(['query', 'restaurant', 'review_text', 'correct'])
    ['scores']
    .agg(['min', 'max', 'prod', noisy_or])
    .reset_index()
    .rename(columns = {'prod': 'all', 'noisy_or': 'any'})
)

query_review_scores

Unnamed: 0,query,restaurant,review_text,correct,min,max,all,any
0,A cafe that also offers beer,Alchemy Coffee,A nice and simple place for breakfast/brunch i...,False,0.000752,0.622969,0.000468,0.623253
1,A cafe that also offers beer,Alchemy Coffee,Came here on Friday morning 930ish. Can't fin...,False,0.020308,0.925135,0.018788,0.926655
2,A cafe that also offers beer,Alchemy Coffee,First time coming here because I usually avoid...,False,0.029387,0.870833,0.025591,0.874629
3,A cafe that also offers beer,Alchemy Coffee,Service was good. Eggs Benny I give 3/5. The g...,False,0.016793,0.848258,0.014244,0.850806
4,A cafe that also offers beer,Alchemy Coffee,This is one of restaurants that had been on my...,False,0.001132,0.332867,0.000377,0.333623
...,...,...,...,...,...,...,...,...
1245,Japanese restaurant with pasta,Real Sports Bar & Grill,"Ambiance, amazing. Wait staff, helpful, funny ...",False,0.037518,0.398548,0.014953,0.421114
1246,Japanese restaurant with pasta,Real Sports Bar & Grill,As someone returning to Canada after 4 years a...,False,0.000709,0.00266,0.000002,0.003367
1247,Japanese restaurant with pasta,Real Sports Bar & Grill,Came for my birthday dinner with the wife and ...,False,0.017138,0.063922,0.001096,0.079965
1248,Japanese restaurant with pasta,Real Sports Bar & Grill,Good staff and food is ok. Overpriced.Drink se...,False,0.423185,0.432346,0.182962,0.672569


In [7]:
query_review_scores_long = (
    query_probs
    .explode(['labels', 'scores'])
    .rename(columns = {'sequence': 'review_text', 'labels': 'query', 'scores': 'monolithic'})
    .merge(query_review_scores)
    .assign(
        mono_x_all = lambda d: d['monolithic'] * d['all'], 
        mono_plus_all = lambda d: d['monolithic'] + d['all'] 
    )
    # Pivot long
    .melt(id_vars = ['query', 'restaurant', 'review_text', 'correct'], var_name='method', value_name='score')
    .convert_dtypes()
)

query_review_scores_long

Unnamed: 0,query,restaurant,review_text,correct,method,score
0,A cafe that also offers beer,Hokkaido Ramen Santouka,Bit of a wait at the door. No problem. Spicy k...,False,monolithic,0.33388
1,I am in search of a fancy Pakistani restaurant...,Hokkaido Ramen Santouka,Bit of a wait at the door. No problem. Spicy k...,False,monolithic,0.100819
2,Japanese restaurant with pasta,Hokkaido Ramen Santouka,Bit of a wait at the door. No problem. Spicy k...,False,monolithic,0.029536
3,An ice cream shop with bubble tea,Hokkaido Ramen Santouka,Bit of a wait at the door. No problem. Spicy k...,False,monolithic,0.004847
4,Italian place with a burger,Hokkaido Ramen Santouka,Bit of a wait at the door. No problem. Spicy k...,False,monolithic,0.000916
...,...,...,...,...,...,...
8745,Japanese restaurant with pasta,KINTON RAMEN,First time visiting Kinton although I have tri...,False,mono_plus_all,0.246351
8746,A cafe that also offers beer,KINTON RAMEN,First time visiting Kinton although I have tri...,False,mono_plus_all,0.163392
8747,An ice cream shop with bubble tea,KINTON RAMEN,First time visiting Kinton although I have tri...,False,mono_plus_all,0.076038
8748,Italian place with a burger,KINTON RAMEN,First time visiting Kinton although I have tri...,False,mono_plus_all,0.023532


In [8]:
for k in range(5):
    print(query_review_scores_long.groupby(['query', 'restaurant', 'method'])['score'].nlargest(k).mean())

<NA>
0.3153095976353582
0.25233652508970833
0.21118900813871058
0.17964582521327319


In [9]:
grouping_cols = ['query', 'restaurant', 'method', 'correct']
query_item_scores = pd.concat(
    [(
        query_review_scores_long
        .groupby(grouping_cols)
        ['score'].nlargest(k)
        .groupby(grouping_cols)
        .mean()
        .reset_index()
        .assign(k = k)
    ) for k in range(1, 6)]
)

query_item_scores

Unnamed: 0,query,restaurant,method,correct,score,k
0,A cafe that also offers beer,Alchemy Coffee,all,False,0.025591,1
1,A cafe that also offers beer,Alchemy Coffee,any,False,0.926655,1
2,A cafe that also offers beer,Alchemy Coffee,max,False,0.925135,1
3,A cafe that also offers beer,Alchemy Coffee,min,False,0.029387,1
4,A cafe that also offers beer,Alchemy Coffee,mono_plus_all,False,0.52575,1
...,...,...,...,...,...,...
1745,Japanese restaurant with pasta,Real Sports Bar & Grill,max,False,0.204009,5
1746,Japanese restaurant with pasta,Real Sports Bar & Grill,min,False,0.104599,5
1747,Japanese restaurant with pasta,Real Sports Bar & Grill,mono_plus_all,False,0.182949,5
1748,Japanese restaurant with pasta,Real Sports Bar & Grill,mono_x_all,False,0.014238,5


In [10]:
# Average rank for correct restaurant across the 5 queries
# 'method' refers to aggregation across aspects
# 'k' refers to the top k used for late fusion
(
    query_item_scores
    .assign(rank = lambda d: d.groupby(['query', 'method', 'k'])['score'].rank(ascending=False).astype(int))
    .sort_values(['query', 'method', 'k', 'rank'])
    .query('correct')
    .groupby(['method', 'k'])
    ['rank']
    .mean()
    .unstack()
)

k,1,2,3,4,5
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
all,8.8,8.8,8.8,8.8,8.8
any,9.6,10.4,10.6,10.2,11.2
max,11.6,11.2,12.8,11.4,11.2
min,11.0,10.8,10.2,10.2,10.2
mono_plus_all,7.8,8.0,8.4,7.8,8.0
mono_x_all,9.2,9.4,9.4,9.4,9.4
monolithic,11.6,11.2,11.2,11.2,11.2
