In [1]:
import numpy as np
import pandas as pd
import random
import pickle
import json

<h2>Tuning the # of candidate items<h2>

In [29]:
recs_folder = "/home/diego/chat-reranking/experiments/goodreads/recs/reranked"
recs_name = "chatgpt0613-div-p1-pzt-fold_0_50_20"
top_n = 10

# read JSON file
f = open(f"{recs_folder}/{recs_name}.json")
data = json.load(f)
f.close()

In [30]:
print("Tune the length of the candidate set: chatgpt")
max_pos = []
for entry in data:
    base = entry['recs']
    re_ranked = entry['reranked_recs']
    if len(re_ranked) == 0:
        continue

    def return_pos(j):
        if j in base:
            return base.index(j)
        else:
            return 0

    pos = [return_pos(r) for r in re_ranked]
    max_pos.append(max(pos))

print(f"Average max reranking pos: {np.mean(max_pos)}") 
print(f"Average std reranking pos: {np.std(max_pos)}")   

Tune the length of the candidate set: chatgpt
Average max reranking pos: 36.689655172413794
Average std reranking pos: 14.692487989764354


In [28]:
rerankers = ["MMR-pzt-fold_0_50_20", "RxQuAD-pzt-fold_0_50_20", "xQuAD-pzt-fold_0_50_20", "Random-pzt-fold_0_50_20"]

print(f"Tune the length of the candidate set: baseline reranker {rerankers[3]}")
base_recs = pd.read_csv(f"/home/diego/chat-reranking/experiments/goodreads/recs/baselines/pzt-fold_0_50_20", 
                            names=["userid", "itemid", "rating"], sep="\t")
re_ranked = pd.read_csv(f"/home/diego/chat-reranking/experiments/goodreads/recs/reranked/{rerankers[3]}", 
                            names=["userid", "itemid", "rating"], sep="\t")

max_pos = []
for userid in re_ranked["userid"].unique()[:300]:
    base = list(base_recs[base_recs["userid"] == userid]["itemid"].values[:50])
    recs = re_ranked[re_ranked["userid"] == userid]["itemid"].values.tolist()
        
    pos = [base.index(r) for r in recs]
    max_pos.append(max(pos))
print(f"Average max reranking pos: {np.mean(max_pos)}") 
print(f"Average std reranking pos: {np.std(max_pos)}")  

Tune the length of the candidate set: baseline reranker Random-pzt-fold_0_50_20
Average max reranking pos: 45.45333333333333
Average std reranking pos: 3.9800949179078073


<h2>Convert rec files for Ranksys</h2>

In [27]:
recs_folder = "/home/diego/chat-reranking/experiments/goodreads/recs/reranked_final"
recs_name = "Llama-2-13b-chat-hf-div-p12-pzt-fold_0"
top_n = 10

# read JSON file
f = open(f"{recs_folder}/{recs_name}.json")
data = json.load(f)
f.close()

In [28]:
train_folder = f"/home/diego/chat-reranking/experiments/goodreads/fold_0/train_data.csv"
training_data = pd.read_csv(f'{train_folder}', names=["userid", "itemid", "rating"], sep="\t")

Some recommendations might contain 
- less than 10 items
- items that are from the training

We need to remove these items from the recommenations (and count them)

In [29]:
# check whether some recommendations don't intersect with the candidate set
recs = {}
candidate_set = {}
tot_rec_before_pruning = 0
tot_rec_after_pruning = 0
for entry in data:
    userid = entry['userid']
    recs[userid] = entry['reranked_recs']
    candidate_set[userid] = entry['recs']
    tot_rec_before_pruning += len(recs[userid])
    user_train = training_data[training_data["userid"] == entry['userid']]["itemid"].values.tolist()
    to_prune = recs[userid]
    for i, item in enumerate(recs[userid]):
        if item not in candidate_set[userid]:
            del to_prune[i]
            # print(f"user {entry['userid']}: {item}")
            # print(item)
            # print(entry['recs'])
            # print("######")
    recs[userid] = to_prune
    tot_rec_after_pruning += len(to_prune)
print(f"# recs before pruning: {tot_rec_before_pruning}")
print(f"# recs after pruning: {tot_rec_after_pruning}")

# recs before pruning: 5000
# recs after pruning: 5000


In [30]:
# count users with less than 10 recommendations
count = 0
count_none = 0
avg_len = []
for userid in recs:
    avg_len.append(len(recs[userid]))
    if len(recs[userid]) < 10:
        count += 1
    if len(recs[userid]) == 0:
        count_none += 1
print(f"# of users with less than 10 recommendations: {count}")
print(f"# of users with no recommendations: {count_none}")
print(f"avg number of recommendations per user: {np.mean(avg_len)}")

# of users with less than 10 recommendations: 0
# of users with no recommendations: 0
avg number of recommendations per user: 10.0


In [31]:
# print final recommendations
exclude_violating_recs = True
excluded = 0
with open(f"{recs_folder}/{recs_name}", "w") as f:
    for userid in recs:
        if len(recs[userid]) < 10:
            excluded += 1
            continue
        score = float(top_n)
        usr_str = ""
        for r in recs[userid]:
            usr_str += f"{userid}\t{r}\t{score}\n"
            score -= 1.0
        f.write(usr_str)
print("Printed!")
print(f"# of excluded users: {excluded}")

Printed!
# of excluded users: 0


<h2>Results goodreads (dataset features)</h2>

In [136]:
results_path = "/home/diego/chat-reranking/experiments/goodreads/results/ranksys_eval"
baseline_recs = pd.read_csv(f"{results_path}/pzt-fold_0-@10.csv")
random_recs = pd.read_csv(f"{results_path}/Random-pzt-fold_0-@10.csv")
mmr_recs = pd.read_csv(f"{results_path}/MMR-pzt-fold_0-@10.csv")
xquad_recs = pd.read_csv(f"{results_path}/xQuAD-pzt-fold_0-@10.csv")
rxquad_recs = pd.read_csv(f"{results_path}/RxQuAD-pzt-fold_0-@10.csv")

In [137]:
d = {"reranker": ["baseline (MF)", "Random", "MMR", "xQuAD", "RxQuAD"]}
for col in baseline_recs.columns:
    d[col] = [baseline_recs[col][0], random_recs[col][0], mmr_recs[col][0], xquad_recs[col][0], rxquad_recs[col][0]]
results = pd.DataFrame(data=d)

In [138]:
results = results[["prec", "ndcg", "a_ndcg", "eild_rel", "eild_norel", "s_recall_rel", "s_recall_norel"]]
results["reranker"] = ["baseline (MF)", "Random", "MMR", "xQuAD", "RxQuAD"]
results

Unnamed: 0,prec,ndcg,a_ndcg,eild_rel,eild_norel,s_recall_rel,s_recall_norel,reranker
0,0.1602,0.248728,0.270909,0.092359,0.688506,0.35475,0.871,baseline (MF)
1,0.0728,0.083711,0.109312,0.026492,0.702416,0.199375,0.889,Random
2,0.1508,0.237113,0.25301,0.09009,0.810339,0.335875,0.93375,MMR
3,0.1524,0.229247,0.308145,0.083743,0.631831,0.386,0.95825,xQuAD
4,0.1528,0.228629,0.303107,0.084458,0.63534,0.380875,0.945875,RxQuAD


In [139]:
results_path = "/home/diego/chat-reranking/experiments/goodreads/results/ranksys_eval"
chat_p1 = pd.read_csv(f"{results_path}/chatgpt0613-div-p1-pzt-fold_0-@10.csv")
chat_p2 = pd.read_csv(f"{results_path}/chatgpt0613-div-p2-pzt-fold_0-@10.csv")
chat_p3 = pd.read_csv(f"{results_path}/chatgpt0613-div-p3-pzt-fold_0-@10.csv")
chat_p4 = pd.read_csv(f"{results_path}/chatgpt0613-div-p4-pzt-fold_0-@10.csv")
chat_p5 = pd.read_csv(f"{results_path}/chatgpt0613-div-p5-pzt-fold_0-@10.csv")
chat_p6 = pd.read_csv(f"{results_path}/chatgpt0613-div-p6-pzt-fold_0-@10.csv")
d = {}
for col in chat_p1.columns:
    d[col] = [chat_p1[col][0], 
              chat_p2[col][0], 
              chat_p3[col][0], 
              chat_p4[col][0],
              chat_p5[col][0],
              chat_p6[col][0],
              ]
results = pd.DataFrame(data=d)
results = results[["prec", "ndcg", "a_ndcg", "eild_rel", "eild_norel", "s_recall_rel", "s_recall_norel"]]
results["reranker"] = ["chat-gpt-p1", "chat-gpt-p2", "chat-gpt-p3", "chat-gpt-p4", 
                       "chat-gpt-p5", "chat-gpt-p6"]
results

Unnamed: 0,prec,ndcg,a_ndcg,eild_rel,eild_norel,s_recall_rel,s_recall_norel,reranker
0,0.1012,0.160837,0.210004,0.045334,0.655494,0.268625,0.877375,chat-gpt-p1
1,0.116,0.179655,0.227201,0.058571,0.672305,0.296375,0.886125,chat-gpt-p2
2,0.0972,0.155974,0.208587,0.045594,0.651504,0.265625,0.882625,chat-gpt-p3
3,0.0924,0.149393,0.203716,0.038615,0.650217,0.261,0.883625,chat-gpt-p4
4,0.1194,0.188387,0.242346,0.061045,0.661821,0.31825,0.893,chat-gpt-p5
5,0.1072,0.17444,0.233803,0.051542,0.651506,0.3015,0.902625,chat-gpt-p6


In [140]:
results_path = "/home/diego/chat-reranking/experiments/goodreads/results/ranksys_eval"
instruct_p1 = pd.read_csv(f"{results_path}/instructgpt-div-p1-pzt-fold_0-@10.csv")
instruct_p2 = pd.read_csv(f"{results_path}/instructgpt-div-p2-pzt-fold_0-@10.csv")
instruct_p3 = pd.read_csv(f"{results_path}/instructgpt-div-p3-pzt-fold_0-@10.csv")
instruct_p4 = pd.read_csv(f"{results_path}/instructgpt-div-p4-pzt-fold_0-@10.csv")
instruct_p5 = pd.read_csv(f"{results_path}/instructgpt-div-p5-pzt-fold_0-@10.csv")
instruct_p6 = pd.read_csv(f"{results_path}/instructgpt-div-p6-pzt-fold_0-@10.csv")

d = {}
for col in chat_p1.columns:
    d[col] = [instruct_p1[col][0], 
              instruct_p2[col][0], 
              instruct_p3[col][0], 
              instruct_p4[col][0],
              instruct_p5[col][0],
              instruct_p6[col][0],
              ]
results = pd.DataFrame(data=d)
results = results[["prec", "ndcg", "a_ndcg", "eild_rel", "eild_norel", "s_recall_rel", "s_recall_norel"]]
results["reranker"] = ["instruct-gpt-p1", "instruct-gpt-p2", "instruct-gpt-p3", "instruct-gpt-p4", 
                       "instruct-gpt-p5", "instruct-gpt-p6"]
results

Unnamed: 0,prec,ndcg,a_ndcg,eild_rel,eild_norel,s_recall_rel,s_recall_norel,reranker
0,0.0738,0.113704,0.171466,0.028628,0.657708,0.22875,0.87875,instruct-gpt-p1
1,0.083,0.125341,0.17891,0.036538,0.66357,0.235125,0.888625,instruct-gpt-p2
2,0.0782,0.118541,0.173185,0.033652,0.657504,0.2285,0.881625,instruct-gpt-p3
3,0.0714,0.108988,0.163489,0.027856,0.658583,0.224625,0.882125,instruct-gpt-p4
4,0.0806,0.122949,0.18372,0.032771,0.652392,0.24125,0.88675,instruct-gpt-p5
5,0.0802,0.121771,0.180183,0.032782,0.651839,0.244,0.892125,instruct-gpt-p6


In [141]:
results_path = "/home/diego/chat-reranking/experiments/goodreads/results/ranksys_eval"
llama_p7 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p7-pzt-fold_0-@10.csv")
llama_p8 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p8-pzt-fold_0-@10.csv")
llama_p9 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p9-pzt-fold_0-@10.csv")
llama_p10 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p10-pzt-fold_0-@10.csv")
llama_p11 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p11-pzt-fold_0-@10.csv")
llama_p12 = pd.read_csv(f"{results_path}/Llama-2-7b-chat-hf-div-p12-pzt-fold_0-@10.csv")

d = {}
for col in chat_p1.columns:
    d[col] = [llama_p7[col][0], 
              llama_p8[col][0], 
              llama_p9[col][0], 
              llama_p10[col][0],
              llama_p11[col][0],
              llama_p12[col][0],
              ]
results = pd.DataFrame(data=d)
results = results[["prec", "ndcg", "a_ndcg", "eild_rel", "eild_norel", "s_recall_rel", "s_recall_norel"]]
results["reranker"] = ["llama-p7", "llama-p8", "llama-p9", "llama-p10", 
                       "llama-p11", "llama-p12"]
results

Unnamed: 0,prec,ndcg,a_ndcg,eild_rel,eild_norel,s_recall_rel,s_recall_norel,reranker
0,0.076,0.116526,0.159599,0.029694,0.674222,0.215125,0.88525,llama-p7
1,0.068,0.09986,0.14335,0.025016,0.674884,0.199875,0.875125,llama-p8
2,0.0646,0.08402,0.113013,0.020622,0.681182,0.17575,0.87925,llama-p9
3,0.0748,0.117595,0.156011,0.026004,0.678364,0.21475,0.8915,llama-p10
4,0.0782,0.110596,0.141796,0.034682,0.672678,0.20975,0.8755,llama-p11
5,0.079,0.111764,0.145816,0.032566,0.67362,0.209125,0.871625,llama-p12


In [4]:
results_dir = "/home/diego/chat-reranking/experiments/goodreads/results/ranksys_eval/"
rankers = ["chatgpt0613-div-p{}-pzt-fold_0-@10"]
prompts = ["1", "2", "3", "4", "5", "6"]
metrics = ["ndcg","a_ndcg","eild_rel","eild_norel","s_recall_rel","s_recall_norel"]
for ranker in rankers:
    for prompt_id in prompts:
        name = ranker.format(prompt_id)
        # read results file
        df = pd.read_csv(f"{results_dir}/{name}.csv", sep=",")
        df = df[metrics]
        # print(prompt_id)
        print(df.iloc[2].values)
        # print("######")

[0.01809988 0.02232658 0.00808304 0.00699123 0.02465946 0.00856547]
[0.01839946 0.02195506 0.00872737 0.00651224 0.02472889 0.00844862]
[0.01744318 0.02201203 0.00748719 0.00682517 0.02434978 0.00923737]
[0.01696868 0.0218325  0.00691286 0.0065114  0.02416265 0.00807192]
[0.01864291 0.02254626 0.00873262 0.00668937 0.02551504 0.00783436]
[0.01825141 0.02257929 0.00808794 0.00669338 0.02530568 0.00761993]
