In [1]:
import re
import pandas as pd

from nltk.tokenize import word_tokenize, RegexpTokenizer
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge 

from src.config import substitution_rating_file, preference_rating_file
from src.config import preference_rating_scores

import warnings
warnings.filterwarnings('ignore')

In [2]:
# predictions
df = pd.read_csv(preference_rating_file)
# ground truth
df2 = pd.read_csv(substitution_rating_file)[['user', 'item_1']].drop_duplicates().reset_index(drop=True)
df2 = df2[['user', 'item_1']]
df2.columns = ['user', 'gt_item']

gt = df2.groupby('user')['gt_item'].apply(list).to_dict()
df['gt'] = df['user'].map(gt)

In [3]:
tokenizer = RegexpTokenizer(r'\w+')

def clean_text(s):
    try:
        return ' '.join(tokenizer.tokenize(re.sub(r'[^a-zA-Z ]',r'', s).lower()))
    except:
        print(s)
        return ''

def get_weighted_scores(single_cands, multi_refs, weights=(0.25, 0.25, 0.25, 0.25)): 
    hypothesis = word_tokenize(clean_text(single_cands))
    reference = [word_tokenize(clean_text(ref)) for ref in multi_refs] 
    return sentence_bleu(reference, hypothesis, weights=weights)

weight_scheme = {'BLEU-1':(1,0,0,0), 'BLEU-2':(0.5, 0.5, 0, 0)}
for m, w in weight_scheme.items():

    def weighted_scores(line):
        single_cands = line['choice']
        multi_refs = line['gt']
        return get_weighted_scores(single_cands, multi_refs, weights=w)

    df[m]= df.apply(weighted_scores, axis=1)

cols = ['user', 'qn',  'rating', 'choice', 'BLEU-1', 'BLEU-2']
df[cols].to_csv(preference_rating_scores['BLEU'], index=False)

In [4]:
rouge = Rouge()

def get_rouge_scores(single_cands, reference):
    s = rouge.get_scores(clean_text(single_cands), clean_text(reference))[0]
    lst = []
    for m0 in ['rouge-1', 'rouge-2', 'rouge-l']:
        for m in ['f', 'p', 'r']: 
            lst.append(s[m0][m])
    return lst
                
def rouge_scores(line):
    single_cands = line['choice']
    multi_refs = line['gt_item']
    return get_rouge_scores(single_cands, multi_refs)

d = pd.merge(df2[['user', 'gt_item']], df[['user', 'qn', 'rating', 'choice']], how='outer', on=['user'])
d['ROUGE-1_f'], d['ROUGE-1_p'], d['ROUGE-1_r'], d['ROUGE-2_f'], \
 d['ROUGE-2_p'], d['ROUGE-2_r'], d['ROUGE-L_f'], d['ROUGE-L_p'], d['ROUGE-L_r'] = zip(*d.apply(rouge_scores, axis=1))

cols = ['user', 'qn', 'rating', 'gt_item', 'choice', 'ROUGE-1_r', 'ROUGE-2_r',  'ROUGE-L_r']
d = d[cols].rename(columns = {c:c.replace('_r', '') for c in ['ROUGE-1_r', 'ROUGE-2_r',  'ROUGE-L_r']})

d.to_csv(preference_rating_scores['ROUGE'], index=False)