In [1]:
import os
import numpy as np
import pandas as pd
from copy import deepcopy

from src.config import user_ground_truth, user_recommendations, name_ref
from src.config import mturk_rating_file, mturk_simjudgement_file
from src.config import mturk_rating_raw, mturk_rating_filter, mturk_rating_file
from src.config import mturk_substitution_score_file, mturk_substitution_rating_scores

import warnings
warnings.filterwarnings('ignore')

# User study 1, ground truth & algo recommendation

In [2]:
ground_truth = pd.read_csv(user_ground_truth)
ground_truth.head(2)

Unnamed: 0,UID,username,TS,date,PID,food
0,2135,akwmoody,153,2015-03-14,28742,"Mcintosh - Apple, 1 medium apple (150 g)"
1,2135,akwmoody,153,2015-03-14,9861,"Dannon Oikos - Fat Free Greek Vanilla Yogurt, ..."


In [3]:
recommendations = pd.read_csv(user_recommendations)
ref = pd.read_csv(name_ref, header=None)
recommendations['food'] = recommendations['PID'].map(ref.set_index(1).to_dict()[0])
recommendations.head(2)

Unnamed: 0,UID,username,models,pred_rank,PID,food
0,99,Alicia10633,mixture_decay_model,1,38534,"Shoprite - Large White Egg, 2 egg"
1,99,Alicia10633,mixture_decay_model,2,19044,"Heinz - Tomato Ketchup 64oz, 1 Tbsp"


# User study 1, worker ratings

In [4]:
d = pd.read_csv(mturk_rating_file)
d.head(2)

Unnamed: 0,WorkerId,gt_item,rec_item,gt_item_id,rec_item_id,judgement,hint
0,A1G5PO0BLQPBJ7,"Generic - Medium Green Apple, 1 Medium 6.4 oz","Kroger - Mini Cinnamon Roll, 1 piece (9g)",25625,15623,0,0
1,AAXYYH9MI3PJM,"Generic - Medium Green Apple, 1 Medium 6.4 oz","Kroger - Mini Cinnamon Roll, 1 piece (9g)",25625,15623,0,0


# User study 1, SimJudgement by majority voting 

In [5]:
if not os.path.exists(mturk_simjudgement_file):

    # load MTurk worker choices
    response = pd.read_csv(mturk_rating_raw)

    c1, c2 = 'Input.text', 'Answer.sim.choice-'
    response['Input.hint'] = response['Input.hint'].apply(lambda s: [int(x) for x in s[1:-1].split(', ')])
    response = response.rename(columns={'Input.text':'target'})
    dfs = []
    for idx in range(10):
        cols = ['WorkerId',  'target', 'Input.hint']
        df = response[cols + [c1+str(idx), c2+str(idx)]].rename(columns={c1+str(idx):c1, c2+str(idx):c2})
        df['Input.hint'] = df['Input.hint'].apply(lambda s: s[idx])
        dfs.append(df)

    df = pd.concat(dfs)
    df[c2] = df[c2].astype(int)

    df['fingerprint'] = df['target'] + df[c1]

    desired_pairs = np.load(mturk_rating_filter, allow_pickle=True)
    df = df[df['fingerprint'].isin(desired_pairs)]

    # Load name reference
    index_ref = pd.read_csv(name_ref, header=None).set_index(0).to_dict()[1]
    df['item_idx'] = df['Input.text'].map(index_ref)
    df['target_idx'] = df['target'].map(index_ref)
    df.columns = ['WorkerId', 'gt_item',  'hint', 'rec_item', 'judgement', 'fingerprint', 'gt_item_id', 'rec_item_id']
    cols = ['WorkerId', 'gt_item', 'rec_item', 'gt_item_id', 'rec_item_id', 'hint', 'judgement', 'fingerprint']
    df = df[cols]
    
    d = deepcopy(df)
    cols = ['WorkerId', 'gt_item', 'rec_item', 'gt_item_id', 'rec_item_id', 'judgement', 'hint']
    d = d[cols]
    d.to_csv(mturk_rating_file, index=False)

    # group by user, for each prediction get sum and count
    df = df.groupby(['gt_item', 'rec_item', 
                    'gt_item_id', 'rec_item_id', 'hint', 'fingerprint',]).agg({'judgement':(len, sum)}).reset_index()
    df.columns = df.columns.map(''.join)
    # majority voting
    df['perc'] = df['judgementsum']/df['judgementlen']
    df['SimJudgement'] = df['perc'].apply(lambda s: 1 if s>0.5 else 0)
    df = df[[c for c in df.columns if c not in ['judgementlen', 'judgementsum', 'perc']]]
    cols = ['gt_item', 'rec_item', 'gt_item_id', 'rec_item_id', 'hint', 'SimJudgement']
    df = df[cols]
    df.to_csv(mturk_simjudgement_file, index=False)

df = pd.read_csv(mturk_simjudgement_file)

In [6]:
df.head(2)

Unnamed: 0,gt_item,rec_item,gt_item_id,rec_item_id,hint,SimJudgement
0,"Aldis Friendly Farms - Fat Free Skim Milk, 0.5...",Aldi Happy Farms - Italian Style Shredded Chee...,742,903,1,1
1,"Aldis Friendly Farms - Fat Free Skim Milk, 0.5...","Aldis Friendly Farms - Fat Free Skim Milk, 0.5...",903,903,1,1


In [7]:
# Number of recommended item pairs and negative sampled pairs
print('Number of recommended item pairs:', df['hint'].value_counts()[1])
print('Number of negative sampled pairs:', df['hint'].value_counts()[0])
print('Total:', df.shape[0])

Number of recommended item pairs: 4596
Number of negative sampled pairs: 1386
Total: 5982


# User study 1, SimJudgement + similarity scores

In [16]:
if not os.path.exists(mturk_substitution_score_file):

    dfs = []
    for metric, filename in mturk_substitution_rating_scores.items():
        df_temp = pd.read_csv(filename)
        cols = ['gt_item', 'rec_item', 'gt_item_id', 'rec_item_id', 'hint', 'SimJudgement']
        dfs.append(df_temp.set_index(cols))
    df_all = pd.concat(dfs, axis=1).reset_index().drop(columns=['Unnamed: 0'])
    df_all['identical'] = df_all['gt_item'] == df_all['rec_item']
    df_all['identical'] = df_all['identical'].astype(int)

    df_all.to_csv(mturk_substitution_score_file, index=False)
    
df_all = pd.read_csv(mturk_substitution_score_file)
print('Number of identical item pairs:', df_all['identical'].value_counts()[1])
print('Number of non-identical sampled pairs:', df_all['identical'].value_counts()[0])
print('Total:', df_all.shape[0])

Number of identical item pairs: 61
Number of non-identical sampled pairs: 5921
Total: 5982


In [17]:
df_all.head(2)

Unnamed: 0,gt_item,rec_item,gt_item_id,rec_item_id,hint,SimJudgement,BLEU-1,BLEU-2,ROUGE-1,ROUGE-2,ROUGE-L,BERTScore,BERTScore-F1,hMatch-1,hMatch-2,hMatch-freq,hSim-1,hSim-2,hSim-freq,identical
0,"Aldis Friendly Farms - Fat Free Skim Milk, 0.5...",Aldi Happy Farms - Italian Style Shredded Chee...,742,903,1,1,0.125,5.273843e-155,0.125,0.0,0.125,0.377351,0.303154,0.666667,0.8,0.985405,0.743477,0.743477,0.983395,0
1,"Aldis Friendly Farms - Fat Free Skim Milk, 0.5...","Aldis Friendly Farms - Fat Free Skim Milk, 0.5...",903,903,1,1,1.0,1.0,1.0,1.0,1.0,0.999999,0.999999,1.0,1.0,1.0,1.0,1.0,1.0,1


In [18]:
df_all.corr().iloc[3:4, 4:].round(3).T

Unnamed: 0,SimJudgement
BLEU-1,0.425
BLEU-2,0.37
ROUGE-1,0.406
ROUGE-2,0.361
ROUGE-L,0.403
BERTScore,0.32
BERTScore-F1,0.356
hMatch-1,0.491
hMatch-2,0.48
hMatch-freq,0.408
