In [30]:
## Make relevant imports
import numpy as np
import pandas as pd
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser

In [31]:
## Import relevant python files
from evalMain import groupNDCG

In [32]:
## Train the individual rec.sys.
# get the data
data = pd.read_csv('../dataset/clean/user_ratings_neg_1000_20_20_1.csv', delimiter=',')
data = data.drop(columns=['Unnamed: 0'])

# construct dataframe in format (user, item, rating) via column addition
df_ui = data.rename(columns={"UserID": "user", "JobID": "item", "Rating":"rating"})
# check data being read properly
display(df_ui.head(10))

# train UserUser collaborative filterring
user_user = UserUser(10, min_nbrs=3)  # Minimum (3) and maximum (10) number of neighbors to consider
recsys = Recommender.adapt(user_user)
recsys.fit(df_ui)

Unnamed: 0,user,item,rating
0,554,196603,1
1,554,300053,1
2,554,1078274,1
3,554,146817,1
4,554,654538,1
5,554,336293,1
6,554,640492,1
7,554,271546,1
8,554,283949,1
9,554,1066757,1


<lenskit.algorithms.ranking.TopN at 0x7faca865f0a0>

In [33]:
## Read dataframe generated by group aggregation (groupAggregation.ipynb)
df_grpAggr = pd.read_csv('../groupAgg/grpAggr.csv', delimiter=',')
df_grpAggr = df_grpAggr.drop(columns=['Unnamed: 0']) # removing useless index column

In [34]:
## Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [35]:
## Compute ndcg for a group
grp_results = pd.DataFrame(columns=["mean","min","max"])

for i, row in df_grpAggr.iterrows():
    # convert strings to list of strings
    strMem = (row["Members"])[1:(len(row["Members"])-1)]
    strRec = (row["Recommendation"])[1:(len(row["Recommendation"])-1)]

    lstMem = strMem.split(", ")
    lstRec = strRec.split(", ")
    # convert lists of strings to int
    lstMem = list(map(int, lstMem))
    lstRec = list(map(int, lstRec))

    # run group ndcg
    g_mean, g_min, g_max = groupNDCG(lstMem, lstRec, recsys) # error inside
    grp_results.loc[len(grp_results.index)] = [g_mean, g_min, g_max] 
    
display(grp_results)
display(df_grpAggr)


1.0 1.0 1.0
0.9968604350628641 0.9943625324898411 0.9985530680193084
0.9998776083216405 0.9993421396917048 1.0
0.9993919036699639 0.9989210998075511 0.999555458666022
0.8815499077536014 0.5531464700081438 0.9825150756390567


Unnamed: 0,mean,min,max
0,1.0,1.0,1.0
1,0.99686,0.994363,0.998553
2,0.999878,0.999342,1.0
3,0.999392,0.998921,0.999555
4,0.88155,0.553146,0.982515


Unnamed: 0,Members,Recommendation
0,"[1165352, 668454, 838185, 1056891, 578645, 950...","[771040, 26124, 1057611, 817674, 881913]"
1,"[549231, 583914, 941975, 294558, 803135, 99662...","[797357, 263988, 224907, 922573, 630951]"
2,"[410122, 277505, 692599, 783536, 1420225, 5545...","[496239, 268240, 332720, 496240, 1009096]"
3,"[192178, 850445, 439070, 1340921, 473967, 1327...","[448890, 623117, 870378, 447936, 97493]"
4,"[988920, 485759, 481324, 1294945, 669787, 1060...","[376559, 171383, 463247, 1015362, 176215]"
