In [19]:
## Make relevant imports
import numpy as np
import pandas as pd
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser

In [20]:
## Import relevant python files
from evalMain import groupNDCG

In [21]:
## Train the individual rec.sys.
# get the data
data = pd.read_csv('../dataset/clean/user_ratings_neg_1000_20_20_1.csv', delimiter=',')
data = data.drop(columns=['Unnamed: 0'])

# construct dataframe in format (user, item, rating) via column addition
df_ui = data.rename(columns={"UserID": "user", "JobID": "item", "Rating":"rating"})
# check data being read properly
display(df_ui.head(10))

# train UserUser collaborative filterring
user_user = UserUser(10, min_nbrs=3)  # Minimum (3) and maximum (10) number of neighbors to consider
recsys = Recommender.adapt(user_user)
recsys.fit(df_ui)

Unnamed: 0,user,item,rating
0,554,196603,1
1,554,300053,1
2,554,1078274,1
3,554,146817,1
4,554,654538,1
5,554,336293,1
6,554,640492,1
7,554,271546,1
8,554,283949,1
9,554,1066757,1


<lenskit.algorithms.ranking.TopN at 0x7fd8ff134070>

In [26]:
## Read dataframe generated by group aggregation (groupAggregation.ipynb)
df_grpAggr = pd.read_csv('../groupAgg/grpAggr_full.csv', delimiter=',')
df_grpAggr = df_grpAggr.drop(columns=['Unnamed: 0']) # removing useless index column

In [23]:
## Supress warnings
import warnings
warnings.filterwarnings('ignore')

In [27]:
## Compute ndcg for a group
grp_results = pd.DataFrame(columns=["mean","min","max"])

for i, row in df_grpAggr.iterrows():
    # convert strings to list of strings
    strMem = (row["Members"])[1:(len(row["Members"])-1)]
    strRec = (row["Recommendation"])[1:(len(row["Recommendation"])-1)]

    lstMem = strMem.split(", ")
    lstRec = strRec.split(", ")
    # convert lists of strings to int
    lstMem = list(map(int, lstMem))
    lstRec = list(map(int, lstRec))

    # run group ndcg
    g_mean, g_min, g_max = groupNDCG(lstMem, lstRec, recsys) # error inside
    grp_results.loc[len(grp_results.index)] = [g_mean, g_min, g_max] 
    
display(grp_results)
display(df_grpAggr)


Unnamed: 0,mean,min,max
0,0.099749,0.0,0.997489
1,0.1,0.0,1.0
2,0.1,0.0,1.0
3,0.097474,0.0,0.527883
4,0.099874,0.0,0.998743
5,0.099734,0.0,0.49126
6,0.1,0.0,0.553146
7,0.099894,0.0,0.99894
8,0.1,0.0,1.0
9,0.098033,0.0,0.533472


Unnamed: 0,Members,Recommendation
0,"[1165352, 668454, 838185, 1056891, 578645, 950...","[692024, 579894, 27261, 311691, 508864]"
1,"[549231, 583914, 941975, 294558, 803135, 99662...","[853314, 308908, 864097, 901476, 1032156]"
2,"[410122, 277505, 692599, 783536, 1420225, 5545...","[543918, 615880, 228396, 317252, 784650]"
3,"[192178, 850445, 439070, 1340921, 473967, 1327...","[1042869, 282364, 545615, 481315, 260713]"
4,"[988920, 485759, 481324, 1294945, 669787, 1060...","[156859, 263136, 940871, 1047376, 1040817]"
5,"[1360918, 1195068, 791001, 1132744, 1124402, 6...","[338737, 1055575, 184546, 284141, 297510]"
6,"[1249010, 643384, 448742, 187074, 588976, 2720...","[78579, 1040827, 1074185, 44194, 72524]"
7,"[1187753, 440304, 928954, 1082928, 142184, 118...","[451, 929404, 473965, 582118, 307079]"
8,"[172933, 1217103, 1430857, 1074051, 1020507, 1...","[1076994, 208481, 521315, 908559, 867261]"
9,"[1174914, 755837, 1176363, 286772, 478643, 146...","[201609, 980270, 25864, 165696, 195193]"


In [28]:
print(np.mean(grp_results["mean"]))
print(np.mean(grp_results["min"]))
print(np.mean(grp_results["max"]))

0.13475608561802563
0.022629919001549714
0.8318745603260542
