In [1]:
# Import libraries.
import pandas as pd
import glob
import math
import simpledorff
from sklearn.metrics import ndcg_score

In [2]:
# Initialize paths for files containing product-level ratings from 1-5, 5 being the most relevant.
u1_rating_file_path='../resources/eval/Ratings_U1/*.xlsx'
u2_rating_file_path='../resources/eval/Ratings_U2/*.xlsx'
u3_rating_file_path='../resources/eval/Ratings_U3.xlsx'

In [3]:
# Function to calculate Normalized Discounted Cumulative Gain for each query, each ranking method, and each user.
def NDCG(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path):
    
    # Create file lists for Users 1 and 2 since they have similar rating file formats.
    u1_file_paths=glob.glob(u1_rating_file_path)
    u2_file_paths=glob.glob(u2_rating_file_path)
    file_paths=[u1_file_paths, u2_file_paths]
    user_df_dict={}
    
    # Loop through each file in the folders for Users 1 and 2.
    for file_path in file_paths:
        ndcg_dict={}
        for path in file_path:
            
            # Read each rating file as a dataframe and perform basic sanity checks.
            df=pd.read_excel(path)
            df.dropna(subset=["Query ID", "User ID"], inplace=True)
            # Ensure that each line item has product-level rating.
            df=df.loc[~(df["Detailed Rating (product level)"].isna())|(~(df["Relevance Indicator"].isna()))]
            # Ensure that the relevance indicator is present for at least one product for a given query and ranking method.
            df.loc[(df["product_id"].duplicated(keep=False))&(~(df["Detailed Rating (product level)"].isna())), "Relevance Indicator"]="Most relevant"
            df.dropna(subset=["Detailed Rating (product level)"], inplace=True)
            
            # Calculate the ideal rank
            df["ideal_rating"]=sorted(list(df["Detailed Rating (product level)"]), reverse=True)
  
            # Calculate the NDCG
            ndcg_10 = ndcg_score(df['ideal_rating'].values.reshape(1,-1), df['Detailed Rating (product level)'].values.reshape(1,-1), k=10)
            ndcg_5 = ndcg_score(df['ideal_rating'].values.reshape(1,-1), df['Detailed Rating (product level)'].values.reshape(1,-1), k=5)
            ndcg_3 = ndcg_score(df['ideal_rating'].values.reshape(1,-1), df['Detailed Rating (product level)'].values.reshape(1,-1), k=3)
            
            # Add relevant information as keys and values in a dictionary.
            if "Query ID" not in ndcg_dict.keys():
                ndcg_dict["Query ID"]=[df["Query ID"].iloc[0]]
            else:
                ndcg_dict["Query ID"].append(df["Query ID"].iloc[0])
            if "Ranking Method" not in ndcg_dict.keys():
                ndcg_dict["Ranking Method"]=[df["Ranking Method"].iloc[0]]
            else:
                ndcg_dict["Ranking Method"].append(df["Ranking Method"].iloc[0])
            if "User ID" not in ndcg_dict.keys():
                ndcg_dict["User ID"]=[df["User ID"].iloc[0]]
            else:
                ndcg_dict["User ID"].append(df["User ID"].iloc[0])
            if "NDCG@10" not in ndcg_dict.keys():
                ndcg_dict["NDCG@10"]=[ndcg_10]
            else:
                ndcg_dict["NDCG@10"].append(ndcg_10)
            if "NDCG@5" not in ndcg_dict.keys():
                ndcg_dict["NDCG@5"]=[ndcg_5]
            else:
                ndcg_dict["NDCG@5"].append(ndcg_5)
            if "NDCG@3" not in ndcg_dict.keys():
                ndcg_dict["NDCG@3"]=[ndcg_3]
            else:
                ndcg_dict["NDCG@3"].append(ndcg_3)

        # Convert the dictionary to a dataframe and tidy it up.
        u_df=pd.DataFrame(ndcg_dict)
        u_df1=u_df.loc[u_df["Query ID"]!="Q10"].copy()
        u_df2=u_df.loc[u_df["Query ID"]=="Q10"].copy()
        u_df=pd.concat([u_df1, u_df2])
        u_df.reset_index(drop=True, inplace=True)
        # Add each user's result dataframe as a value in a dictionary.
        user_df_dict[df["User ID"].iloc[0]]=u_df
    
    # Create a list of query IDs.
    queries=list(u_df["Query ID"].unique())
    
    # Loop through each query ID for User 3, whose rating file differs from those of Users 1 and 2.
    ndcg_dict={}
    for query in queries:
        
        # Read each sheet in the rating file as a dataframe and perform basic sanity checks.
        df=pd.read_excel(u3_rating_file_path, sheet_name=query)
        df.dropna(subset=["product_id","review_id"], inplace=True)
        
        # Separate the dataframe based on ranking method for ease of calculation.
        dfa=df.loc[df["Ranking Method"]=="Average"].copy()
        dfd=df.loc[df["Ranking Method"]=="Discounted Reward Only"].copy()
        dfda=df.loc[df["Ranking Method"]=="Discounted Reward with Adjustment"].copy()

        # Create a list of the dataframes corresponding to each ranking method.
        df_list=[dfa, dfd, dfda]

        # Loop through each dataframe in the list corresponding to a different ranking method.
        for dfx in df_list:
            # Calculate the ideal rank
            dfx["ideal_rating"]=sorted(list(dfx["Detailed Rating (product level)"]), reverse=True)

            # Calculate the NDCG
            ndcg_10 = ndcg_score(dfx['ideal_rating'].values.reshape(1,-1), dfx['Detailed Rating (product level)'].values.reshape(1,-1), k=10)
            ndcg_5 = ndcg_score(dfx['ideal_rating'].values.reshape(1,-1), dfx['Detailed Rating (product level)'].values.reshape(1,-1), k=5)
            ndcg_3 = ndcg_score(dfx['ideal_rating'].values.reshape(1,-1), dfx['Detailed Rating (product level)'].values.reshape(1,-1), k=3)

            # Add relevant information as keys and values in a dictionary.
            if "Query ID" not in ndcg_dict.keys():
                ndcg_dict["Query ID"]=[dfx["Query ID"].iloc[0]]
            else:
                ndcg_dict["Query ID"].append(dfx["Query ID"].iloc[0])
            if "Ranking Method" not in ndcg_dict.keys():
                ndcg_dict["Ranking Method"]=[dfx["Ranking Method"].iloc[0]]
            else:
                ndcg_dict["Ranking Method"].append(dfx["Ranking Method"].iloc[0])
            if "User ID" not in ndcg_dict.keys():
                ndcg_dict["User ID"]=[dfx["User ID"].iloc[0]]
            else:
                ndcg_dict["User ID"].append(dfx["User ID"].iloc[0])
            if "NDCG@10" not in ndcg_dict.keys():
                ndcg_dict["NDCG@10"]=[ndcg_10]
            else:
                ndcg_dict["NDCG@10"].append(ndcg_10)
            if "NDCG@5" not in ndcg_dict.keys():
                ndcg_dict["NDCG@5"]=[ndcg_5]
            else:
                ndcg_dict["NDCG@5"].append(ndcg_5)
            if "NDCG@3" not in ndcg_dict.keys():
                ndcg_dict["NDCG@3"]=[ndcg_3]
            else:
                ndcg_dict["NDCG@3"].append(ndcg_3)

    # Convert the dictionary to a dataframe and tidy it up.
    u_df=pd.DataFrame(ndcg_dict)
    # Add User 3's result dataframe as a value in the dictionary that contains the result dataframes for Users 1 and 2.
    user_df_dict[u_df["User ID"].iloc[0]]=u_df
    
    # Concatenate all three users' result dataframes into one big dataframe.
    user_df=pd.DataFrame()
    for df in user_df_dict.keys():
        user_df=pd.concat([user_df, user_df_dict[df]])

    # Return the dictionary containing each user's result dataframes and the big dataframe containing all 3 users' NDCG results.
    return user_df_dict, user_df

In [4]:
df_ndcg_by_users, df_ndcg = NDCG(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path)

In [5]:
df_ndcg_by_users.keys()

dict_keys(['U1', 'U2', 'U3'])

In [6]:
df_ndcg_by_users['U2']

Unnamed: 0,Query ID,Ranking Method,User ID,NDCG@10,NDCG@5,NDCG@3
0,Q1,Average,U2,0.926717,0.855328,0.796736
1,Q1,Discounted Reward Only,U2,0.94609,0.838319,0.856825
2,Q1,Discounted Reward with Adjustment,U2,0.945335,0.824223,0.821877
3,Q2,Average,U2,0.938423,0.833333,0.833333
4,Q2,Discounted Reward Only,U2,0.938423,0.833333,0.833333
5,Q2,Discounted Reward with Adjustment,U2,0.933606,0.82092,0.833333
6,Q3,Average,U2,0.963934,0.922161,0.898368
7,Q3,Discounted Reward Only,U2,0.958255,0.901201,0.887982
8,Q3,Discounted Reward with Adjustment,U2,0.957288,0.897008,0.885905
9,Q4,Average,U2,0.90481,0.800183,0.711756


In [7]:
df_ndcg

Unnamed: 0,Query ID,Ranking Method,User ID,NDCG@10,NDCG@5,NDCG@3
0,Q1,Average,U1,0.917250,0.805861,0.750000
1,Q1,Discounted Reward Only,U1,0.959709,0.860806,0.916667
2,Q1,Discounted Reward with Adjustment,U1,0.929303,0.797663,0.812500
3,Q2,Average,U1,0.981928,0.950301,0.923511
4,Q2,Discounted Reward Only,U1,0.909765,0.760480,0.755193
...,...,...,...,...,...,...
22,Q8,Discounted Reward Only,U3,0.979918,0.934031,0.949008
23,Q8,Discounted Reward with Adjustment,U3,0.958562,0.920419,0.881731
24,Q9,Average,U3,0.909075,0.750000,0.750000
25,Q9,Discounted Reward Only,U3,0.937164,0.858848,0.834138


In [8]:
df_ndcg.to_csv('../resources/eval/ndcg.csv', index=False)

In [9]:
# Function to calculate Mean Reciprocal Rank for each ranking method and each user.
def MRR(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path):
    
    # Create file lists for Users 1 and 2 since they have similar rating file formats.
    u1_file_paths=glob.glob(u1_rating_file_path)
    u2_file_paths=glob.glob(u2_rating_file_path)
    file_paths=[u1_file_paths, u2_file_paths]
    user_df_dict={}
    
    # Loop through each file in the folders for Users 1 and 2.
    for file_path in file_paths:
        mrr_dict={}
        for path in file_path:
            
            # Read each rating file as a dataframe and perform basic sanity checks.
            df=pd.read_excel(path)
            df.dropna(subset=["Query ID", "User ID"], inplace=True)
            # Ensure that each line item has product-level rating.
            df=df.loc[~(df["Detailed Rating (product level)"].isna())|(~(df["Relevance Indicator"].isna()))]
            # Ensure that the relevance indicator is present for at least one product for a given query and ranking method.
            df.loc[(df["product_id"].duplicated(keep=False))&(~(df["Detailed Rating (product level)"].isna())), "Relevance Indicator"]="Most relevant"
            df.dropna(subset=["Detailed Rating (product level)"], inplace=True)
            
            # Calculate the rank, the reciprocal rank, and the reciprocal rank of the most relevant product.
            df["rank"]=list(range(1, len(df)+1))
            df["reciprocal_rank"]=round(1/df["rank"], 3)
            rr=df.loc[df["Relevance Indicator"]=="Most relevant", "reciprocal_rank"].iloc[0]
            
            # Add relevant information as keys and values in a dictionary.
            if "Query ID" not in mrr_dict.keys():
                mrr_dict["Query ID"]=[df["Query ID"].iloc[0]]
            else:
                mrr_dict["Query ID"].append(df["Query ID"].iloc[0])
            if "Ranking Method" not in mrr_dict.keys():
                mrr_dict["Ranking Method"]=[df["Ranking Method"].iloc[0]]
            else:
                mrr_dict["Ranking Method"].append(df["Ranking Method"].iloc[0])
            if "User ID" not in mrr_dict.keys():
                mrr_dict["User ID"]=[df["User ID"].iloc[0]]
            else:
                mrr_dict["User ID"].append(df["User ID"].iloc[0])
            if "Reciprocal Rank" not in mrr_dict.keys():
                mrr_dict["Reciprocal Rank"]=[rr]
            else:
                mrr_dict["Reciprocal Rank"].append(rr)
        
        # Convert the dictionary to a dataframe and tidy it up.
        u_df=pd.DataFrame(mrr_dict)
        u_df1=u_df.loc[u_df["Query ID"]!="Q10"].copy()
        u_df2=u_df.loc[u_df["Query ID"]=="Q10"].copy()
        u_df=pd.concat([u_df1, u_df2])
        u_df.reset_index(drop=True, inplace=True)
        # Add each user's result dataframe as a value in a dictionary.
        user_df_dict[df["User ID"].iloc[0]]=u_df
        
    # Create a list of query IDs.
    queries=list(u_df["Query ID"].unique())
    
    # Loop through each query ID for User 3, whose rating file format differs from those of Users 1 and 2.
    mrr_dict={}
    for query in queries:
        
        # Read each sheet in the rating file as a dataframe and perform basic sanity checks.
        df=pd.read_excel(u3_rating_file_path, sheet_name=query)
        df.dropna(subset=["product_id","review_id"], inplace=True)

        # Separate the dataframe based on ranking method for ease of calculation.
        dfa=df.loc[df["Ranking Method"]=="Average"].copy()
        dfd=df.loc[df["Ranking Method"]=="Discounted Reward Only"].copy()
        dfda=df.loc[df["Ranking Method"]=="Discounted Reward with Adjustment"].copy()

        # Create a list of the dataframes corresponding to each ranking method.
        df_list=[dfa, dfd, dfda]

        # Loop through each dataframe in the list corresponding to a different ranking method.
        for dfx in df_list:
            
            # Calculate the rank, the reciprocal rank, and the reciprocal rank of the most relevant product.           
            dfx["rank"]=list(range(1, len(dfx)+1))
            dfx["reciprocal_rank"]=round(1/dfx["rank"], 3)
            rr=dfx.loc[dfx["Relevance Indicator"]=="Most relevant", "reciprocal_rank"].iloc[0]
            
            # Add relevant information as keys and values in a dictionary.
            if "Query ID" not in mrr_dict.keys():
                mrr_dict["Query ID"]=[dfx["Query ID"].iloc[0]]
            else:
                mrr_dict["Query ID"].append(dfx["Query ID"].iloc[0])
            if "Ranking Method" not in mrr_dict.keys():
                mrr_dict["Ranking Method"]=[dfx["Ranking Method"].iloc[0]]
            else:
                mrr_dict["Ranking Method"].append(dfx["Ranking Method"].iloc[0])
            if "User ID" not in mrr_dict.keys():
                mrr_dict["User ID"]=[dfx["User ID"].iloc[0]]
            else:
                mrr_dict["User ID"].append(dfx["User ID"].iloc[0])
            if "Reciprocal Rank" not in mrr_dict.keys():
                mrr_dict["Reciprocal Rank"]=[rr]
            else:
                mrr_dict["Reciprocal Rank"].append(rr)
                
    # Convert the dictionary to a dataframe and tidy it up.
    u_df=pd.DataFrame(mrr_dict)
    # Add User 3's result dataframe as a value in the dictionary that contains the result dataframes for Users 1 and 2.
    user_df_dict[u_df["User ID"].iloc[0]]=u_df
    
    # Concatenate all three users' result dataframes into one big dataframe.
    user_df=pd.DataFrame()
    for df in user_df_dict.keys():
        user_df=pd.concat([user_df, user_df_dict[df]])
            
    # Return the dictionary containing each user's result dataframes and the big dataframe containing all 3 users' MRR results.
    return user_df_dict, user_df

In [10]:
d, df=MRR(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path)

In [11]:
d.keys()

dict_keys(['U1', 'U2', 'U3'])

In [12]:
d['U3']

Unnamed: 0,Query ID,Ranking Method,User ID,Reciprocal Rank
0,Q1,Average,U3,1.0
1,Q1,Discounted Reward Only,U3,0.5
2,Q1,Discounted Reward with Adjustment,U3,1.0
3,Q2,Average,U3,0.2
4,Q2,Discounted Reward Only,U3,0.333
5,Q2,Discounted Reward with Adjustment,U3,0.1
6,Q3,Average,U3,0.25
7,Q3,Discounted Reward Only,U3,0.2
8,Q3,Discounted Reward with Adjustment,U3,0.333
9,Q4,Average,U3,0.333


In [13]:
df

Unnamed: 0,Query ID,Ranking Method,User ID,Reciprocal Rank
0,Q1,Average,U1,0.200
1,Q1,Discounted Reward Only,U1,0.500
2,Q1,Discounted Reward with Adjustment,U1,1.000
3,Q2,Average,U1,1.000
4,Q2,Discounted Reward Only,U1,0.500
...,...,...,...,...
22,Q8,Discounted Reward Only,U3,1.000
23,Q8,Discounted Reward with Adjustment,U3,0.200
24,Q9,Average,U3,0.500
25,Q9,Discounted Reward Only,U3,0.333


In [14]:
d['U3'].groupby(["Ranking Method"]).agg({"Reciprocal Rank": "mean"})

Unnamed: 0_level_0,Reciprocal Rank
Ranking Method,Unnamed: 1_level_1
Average,0.494333
Discounted Reward Only,0.329222
Discounted Reward with Adjustment,0.490667


In [17]:
# Function to calculate Krippendorff's alpha to evaluate inter-rater reliability.
def Krippendorff_alpha(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path):
    
    # Create file lists for Users 1 and 2 since they have similar rating file formats.
    u1_file_paths=glob.glob(u1_rating_file_path)
    u2_file_paths=glob.glob(u2_rating_file_path)
    file_paths=[u1_file_paths, u2_file_paths]
    user_df_dict={}
    
    # Loop through each file in the folders for Users 1 and 2.
    user_dict={}
    for file_path in file_paths:
        kripp_dict={}
        for path in file_path:
            
            # Read each rating file as a dataframe and perform basic sanity checks.
            df=pd.read_excel(path)
            df.dropna(subset=["Query ID", "User ID"], inplace=True)
            # Ensure that each line item has product-level rating.
            df=df.loc[~(df["Detailed Rating (product level)"].isna())|(~(df["Relevance Indicator"].isna()))]
            # Ensure that the relevance indicator is present for at least one product for a given query and ranking method.
            df.loc[(df["product_id"].duplicated(keep=False))&(~(df["Detailed Rating (product level)"].isna())), "Relevance Indicator"]="Most relevant"
            df.dropna(subset=["Detailed Rating (product level)"], inplace=True)
            
            # Keep relevant columns.
            df=df.loc[:,["Query ID", "Ranking Method", "product_id", "User ID", "Detailed Rating (product level)"]]
            # Create a new column which is a string combination of Query ID, Ranking Method and product_id.
            df["Query ID__Ranking Method__product_id"]=df["Query ID"]+"_"+df["Ranking Method"]+"_"+df["product_id"]
            # Add relevant information as keys and values in a dictionary.
            kripp_dict[df["User ID"].iloc[0]+"_"+df["Query ID"].iloc[0]+"_"+df["Ranking Method"].iloc[0]]=df
            
        # Add each user's dictionary as a value in a dictionary, creating a nested dictionary.
        user_dict[df["User ID"].iloc[0]]=kripp_dict
    
    # Create a list of query IDs.
    queries=["Q1","Q2","Q3","Q4","Q5","Q6","Q7","Q8","Q9"]
    
    # Loop through each query ID for User 3, whose rating file format differs from those of Users 1 and 2.
    kripp_dict={}
    for query in queries:
        
        # Read each sheet in the rating file as a dataframe and perform basic sanity checks.
        df=pd.read_excel(u3_rating_file_path, sheet_name=query)
        df.dropna(subset=["product_id","review_id"], inplace=True)

        # Separate the dataframe based on ranking method for ease of calculation.
        dfa=df.loc[df["Ranking Method"]=="Average"].copy()
        dfd=df.loc[df["Ranking Method"]=="Discounted Reward Only"].copy()
        dfda=df.loc[df["Ranking Method"]=="Discounted Reward with Adjustment"].copy()

        # Create a list of the dataframes corresponding to each ranking method.
        df_list=[dfa, dfd, dfda]

        # Loop through each dataframe in the list corresponding to a different ranking method.
        for dfx in df_list:
            
            # Keep relevant columns.
            dfx=dfx.loc[:,["Query ID", "Ranking Method", "product_id", "User ID", "Detailed Rating (product level)"]]
            # Create a new column which is a string combination of Query ID, Ranking Method and product_id.
            dfx["Query ID__Ranking Method__product_id"]=dfx["Query ID"]+"_"+dfx["Ranking Method"]+"_"+dfx["product_id"]
            # Add relevant information as keys and values in a dictionary.
            kripp_dict[dfx["User ID"].iloc[0]+"_"+dfx["Query ID"].iloc[0]+"_"+dfx["Ranking Method"].iloc[0]]=dfx
    
    # Add User 3's dictionary as a value in the dictionary that contains the result dictionaries for Users 1 and 2.
    user_dict[dfx["User ID"].iloc[0]]=kripp_dict
    
    # Combine all 3 users' dataframes by querying the nested dictionary.
    main_df=pd.DataFrame()
    for user in user_dict.keys():
        for df in user_dict[user]:
            dfx=user_dict[user][df]
            main_df=pd.concat([main_df, dfx])
    
    # Calculate Krippendorff's alpha.
    alpha=round(simpledorff.calculate_krippendorffs_alpha_for_df(df=main_df, 
                experiment_col="Query ID__Ranking Method__product_id", annotator_col="User ID", 
                class_col="Detailed Rating (product level)"), 3)
    
    # Return the Krippendorff's alpha value.
    return alpha

In [18]:
alpha=Krippendorff_alpha(u1_rating_file_path, u2_rating_file_path, u3_rating_file_path)

In [19]:
alpha

0.391