In [12]:
## Make relevant imports
import numpy as np
import pandas as pd
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser

In [13]:
## Import relevant python files
from evalMain import groupNDCG

In [14]:
## Train the individual rec.sys.
# get the data
data = pd.read_csv('../dataset/clean/user_ratings_neg_1000_20_20_1.csv', delimiter=',')
data = data.drop(columns=['Unnamed: 0'])

# construct dataframe in format (user, item, rating) via column addition
df_ui = data.rename(columns={"UserID": "user", "JobID": "item", "Rating":"rating"})
# check data being read properly
display(df_ui.head(10))

# train UserUser collaborative filterring
user_user = UserUser(10, min_nbrs=3)  # Minimum (3) and maximum (10) number of neighbors to consider
recsys = Recommender.adapt(user_user)
recsys.fit(df_ui)

Unnamed: 0,user,item,rating
0,554,196603,1
1,554,300053,1
2,554,1078274,1
3,554,146817,1
4,554,654538,1
5,554,336293,1
6,554,640492,1
7,554,271546,1
8,554,283949,1
9,554,1066757,1


<lenskit.algorithms.ranking.TopN at 0x7faca445da90>

In [15]:
## Read dataframe generated by group aggregation (groupAggregation.ipynb)
df_grpAggr = pd.read_csv('../groupAgg/grpAggr.csv', delimiter=',')
df_grpAggr = df_grpAggr.drop(columns=['Unnamed: 0']) # removing useless index column

In [16]:
## Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [17]:
## Compute ndcg for a group
grp_results = pd.DataFrame(columns=["mean","min","max"])

for i, row in df_grpAggr.iterrows():
    # convert strings to list of strings
    strMem = (row["Members"])[1:(len(row["Members"])-1)]
    strRec = (row["Recommendation"])[1:(len(row["Recommendation"])-1)]

    lstMem = strMem.split(", ")
    lstRec = strRec.split(", ")
    # convert lists of strings to int
    lstMem = list(map(int, lstMem))
    lstRec = list(map(int, lstRec))

    # run group ndcg
    g_mean, g_min, g_max = groupNDCG(lstMem, lstRec, recsys) # error inside
    grp_results.loc[len(grp_results.index)] = [g_mean, g_min, g_max] 
    
display(grp_results)
display(df_grpAggr)


0.9252808221548492 0.8499380881307591 0.9742432777711811
0.8947046991175306 0.8880608853132643 0.9031003683199064
0.9628304504829535 0.9571609570524648 0.9733815908581349
0.961706666137063 0.9538642031424249 0.9683369842271307
1.1653450105294874 0.4409322745890263 3.289994153058666


Unnamed: 0,mean,min,max
0,0.925281,0.849938,0.974243
1,0.894705,0.888061,0.9031
2,0.96283,0.957161,0.973382
3,0.961707,0.953864,0.968337
4,1.165345,0.440932,3.289994


Unnamed: 0,Members,Recommendation
0,"[1165352, 668454, 838185, 1056891, 578645, 950...","[771040, 26124, 1057611, 817674, 881913, 40949..."
1,"[549231, 583914, 941975, 294558, 803135, 99662...","[797357, 263988, 224907, 922573, 630951, 33102..."
2,"[410122, 277505, 692599, 783536, 1420225, 5545...","[496239, 268240, 332720, 496240, 1009096, 1021..."
3,"[192178, 850445, 439070, 1340921, 473967, 1327...","[448890, 623117, 870378, 447936, 97493, 527452..."
4,"[988920, 485759, 481324, 1294945, 669787, 1060...","[376559, 171383, 463247, 1015362, 176215, 1529..."
