In [63]:
import math
import json
from time import time
from random import shuffle
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter
from imblearn.over_sampling import SMOTE
import seaborn as sns
import matplotlib.pyplot as plt

In [64]:
# /content/drive/MyDrive/data/
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [65]:
!mkdir /content/drive/MyDrive/data/models-KLDivLoss/

mkdir: cannot create directory ‘/content/drive/MyDrive/data/models-KLDivLoss/’: File exists


In [66]:
def PrecisionAtK(pred_ranking_scores, K):
    pred_ranking_scores = pred_ranking_scores[:K]
    K = len(pred_ranking_scores)
    return len([_ for _ in pred_ranking_scores if _ > 1]) / K


def MeanPrecisionAtK(baseline, facet, K=20, need_list=False):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)
    rank_data_file.close()

    model_name = baseline + '/' + facet + '.qbe'
    model = torch.load('models-KLDivLoss/' + model_name)
    model.eval()

    query_pids = rank_data.keys()
    values = []

    for pid in query_pids:
        true_labels = rank_data[pid]['relevance_adju']

        qf = GetQueryFeature(baseline, facet, pid)
        cand_pids, cand_f = GetCandidateFeatures(baseline, facet, pid)
        cand_c = len(cand_f)

        ins1, ins2 = [qf] * cand_c, cand_f
        ins1, ins2 = torch.tensor(ins1).float(), torch.tensor(ins2).float()

        pred = model(ins1, ins2)
        pred_labels = list(torch.argmax(pred, dim=1).numpy())

        pred_true = list(zip(pred_labels, true_labels))
        pred_true.sort(key=lambda x: -1 * x[0])

        __, pred_ranking_scores = zip(*pred_true)

        prec_at_k = PrecisionAtK(pred_ranking_scores, K)
        values.append(prec_at_k)

    if (not need_list):
        return sum(values) / len(values)
    return values

In [67]:
def DCG(rank_scores, K=100):
    score = rank_scores[0]
    l = int(len(rank_scores) * (K/100))
    for i in range(1, l):
        score += rank_scores[i] / math.log2(i + 1)
    return score


def NDCG(pred_scores, ideal_scores, K=100):
    return DCG(pred_scores, K) / DCG(ideal_scores, K)

In [68]:
def GetQueryFeature(baseline, facet, paper_id):
    query_feature_data_file_name = '/content/drive/MyDrive/data/' + baseline + '/' + facet + '.json'
    query_feature_data_file = open(query_feature_data_file_name)
    query_feature_data = json.load(query_feature_data_file)
    query_feature_data_file.close()
    return query_feature_data[paper_id]


def GetCandidateFeatures(baseline, facet, paper_id):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)

    cand_feature_data_file_name = '/content/drive/MyDrive/data/' + baseline + '/all.json'
    cand_feature_data_file = open(cand_feature_data_file_name)
    cand_feature_data = json.load(cand_feature_data_file)

    rank_data_file.close()
    cand_feature_data_file.close()

    cand_features = [cand_feature_data[cand_pid]
                     for cand_pid in rank_data[paper_id]['cands']]
    return rank_data[paper_id]['cands'], cand_features

In [69]:
def AllNDCG(baseline, facet, K=100):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)
    rank_data_file.close()

    model_name = baseline + '/' + facet + '.qbe'
    model = torch.load('/content/drive/MyDrive/data/models-KLDivLoss/' + model_name)
    model.eval()

    query_pids = rank_data.keys()
    all_ndcg = []

    for pid in query_pids:
        true_labels = rank_data[pid]['relevance_adju']

        qf = GetQueryFeature(baseline, facet, pid)
        cand_pids, cand_f = GetCandidateFeatures(baseline, facet, pid)
        cand_c = len(cand_f)

        ins1, ins2 = [qf] * cand_c, cand_f
        ins1, ins2 = torch.tensor(ins1).float(), torch.tensor(ins2).float()

        pred = model(ins1, ins2)
        pred_labels = list(torch.argmax(pred, dim=1).numpy())

        pred_true = list(zip(pred_labels, true_labels))
        pred_true.sort(key=lambda x: -1 * x[0])

        __, pred_ranking_scores = zip(*pred_true)
        ideal_ranking_scores = true_labels
        ideal_ranking_scores.sort(reverse=True)

        ndcg = NDCG(pred_ranking_scores, ideal_ranking_scores, K)
        all_ndcg.append(ndcg)

    return np.array(all_ndcg)

In [70]:
def AveragePrecision(pred_ranking_scores):
    precs = []
    count = 0
    for it, rel in enumerate(pred_ranking_scores):
        if (rel == 1):
            continue
        count += 1
        precs.append(count / (it + 1))
    return sum(precs) / len(precs)


def MeanAveragePrecision(baseline, facet, need_list=False):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)
    rank_data_file.close()

    model_name = baseline + '/' + facet + '.qbe'
    model = torch.load('models-KLDivLoss/' + model_name)
    model.eval()

    query_pids = rank_data.keys()
    values = []

    for pid in query_pids:
        true_labels = rank_data[pid]['relevance_adju']

        qf = GetQueryFeature(baseline, facet, pid)
        cand_pids, cand_f = GetCandidateFeatures(baseline, facet, pid)
        cand_c = len(cand_f)

        ins1, ins2 = [qf] * cand_c, cand_f
        ins1, ins2 = torch.tensor(ins1).float(), torch.tensor(ins2).float()

        pred = model(ins1, ins2)
        pred_labels = list(torch.argmax(pred, dim=1).numpy())

        pred_true = list(zip(pred_labels, true_labels))
        pred_true.sort(key=lambda x: -1 * x[0])

        __, pred_ranking_scores = zip(*pred_true)

        avg_prec = AveragePrecision(pred_ranking_scores)
        values.append(avg_prec)

    if (not need_list):
        return sum(values) / len(values)
    return values

In [71]:
def EnsembleNDCG(baselines, K=100):
    facets = ['background', 'result', 'method']
    all_ndcgs = np.array([])
    for t, facet in enumerate(facets):
        all_ndcgs = np.concatenate((all_ndcgs, AllNDCG(baselines[t], facet, K)))
    return all_ndcgs

In [72]:
def RecallAtK(pred_ranking_scores, K):
    tot = len([_ for _ in pred_ranking_scores if _ > 1])
    pred_ranking_scores = pred_ranking_scores[:K]
    return len([_ for _ in pred_ranking_scores if _ > 1]) / tot


def MeanRecallAtK(baseline, facet, K=20, need_list=False):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)
    rank_data_file.close()

    model_name = baseline + '/' + facet + '.qbe'
    model = torch.load('/content/drive/MyDrive/data/models-KLDivLoss/' + model_name)
    model.eval()

    query_pids = rank_data.keys()
    values = []

    for pid in query_pids:
        true_labels = rank_data[pid]['relevance_adju']

        qf = GetQueryFeature(baseline, facet, pid)
        cand_pids, cand_f = GetCandidateFeatures(baseline, facet, pid)
        cand_c = len(cand_f)

        ins1, ins2 = [qf] * cand_c, cand_f
        ins1, ins2 = torch.tensor(ins1).float(), torch.tensor(ins2).float()

        pred = model(ins1, ins2)
        pred_labels = list(torch.argmax(pred, dim=1).numpy())

        pred_true = list(zip(pred_labels, true_labels))
        pred_true.sort(key=lambda x: -1 * x[0])

        __, pred_ranking_scores = zip(*pred_true)

        recall_at_k = RecallAtK(pred_ranking_scores, K)
        values.append(recall_at_k)

    if (not need_list):
        return sum(values) / len(values)
    return values

In [73]:
def RPrecision(pred_ranking_scores):
    tot = len([_ for _ in pred_ranking_scores if _ > 1])
    pred_ranking_scores = pred_ranking_scores[:tot]
    return len([_ for _ in pred_ranking_scores if _ > 1]) / tot


def MeanRPrecision(baseline, facet, need_list=False):
    rank_data_file_name = '/content/drive/MyDrive/data/test-pid2anns-csfcube-' + facet + '.json'
    rank_data_file = open(rank_data_file_name)
    rank_data = json.load(rank_data_file)
    rank_data_file.close()

    model_name = baseline + '/' + facet + '.qbe'
    model = torch.load('/content/drive/MyDrive/data/models-KLDivLoss/' + model_name)
    model.eval()

    query_pids = rank_data.keys()
    values = []

    for pid in query_pids:
        true_labels = rank_data[pid]['relevance_adju']

        qf = GetQueryFeature(baseline, facet, pid)
        cand_pids, cand_f = GetCandidateFeatures(baseline, facet, pid)
        cand_c = len(cand_f)

        ins1, ins2 = [qf] * cand_c, cand_f
        ins1, ins2 = torch.tensor(ins1).float(), torch.tensor(ins2).float()

        pred = model(ins1, ins2)
        pred_labels = list(torch.argmax(pred, dim=1).numpy())

        pred_true = list(zip(pred_labels, true_labels))
        pred_true.sort(key=lambda x: -1 * x[0])

        __, pred_ranking_scores = zip(*pred_true)

        r_prec = RPrecision(pred_ranking_scores)
        values.append(r_prec)

    if (not need_list):
        return sum(values) / len(values)
    return values

In [74]:
def EnsembleMeanPrecisionAtK(baselines, K=20):
    facets = ['background', 'result', 'method']
    values = []
    for t, facet in enumerate(facets):
        values.extend(MeanPrecisionAtK(baselines[t], facet, K, True))
    return sum(values) / len(values)


def EnsembleMeanRecallAtK(baselines, K=20):
    facets = ['background', 'result', 'method']
    values = []
    for t, facet in enumerate(facets):
        values.extend(MeanRecallAtK(baselines[t], facet, K, True))
    return sum(values) / len(values)


def EnsembleMeanRPrecision(baselines):
    facets = ['background', 'result', 'method']
    values = []
    for t, facet in enumerate(facets):
        values.extend(MeanRPrecision(baselines[t], facet, True))
    return sum(values) / len(values)


def EnsembleMeanAveragePrecision(baselines):
    facets = ['background', 'result', 'method']
    values = []
    for t, facet in enumerate(facets):
        values.extend(MeanAveragePrecision(baselines[t], facet, True))
    return sum(values) / len(values)

In [75]:
def EnsembleEvaluateOnAllQueries(baselines):
    ndcg_all_queries = EnsembleNDCG(baselines)
    print(' > (AGG) MEAN NDCG (@100%) : {:.2f} %'.format(100*ndcg_all_queries.mean()))
    print(
        ' > (AGG) STD-DEV in NDCG (@100%) : {:.2f} %'.format(100*ndcg_all_queries.std()))
    ndcg_all_queries = EnsembleNDCG(baselines, K=20)
    print(' > (AGG) MEAN NDCG (@20%) : {:.2f} %'.format(100*ndcg_all_queries.mean()))
    print(
        ' > (AGG) STD-DEV in NDCG (@20%) : {:.2f} %'.format(100*ndcg_all_queries.std()))
    print(' > (AGG) MEAN PRECISION@20 : {:.2f} %'.format(
        100*EnsembleMeanPrecisionAtK(baselines, K=20)))
    print(' > (AGG) MEAN RECALL@20 : {:.2f} %'.format(
        100*EnsembleMeanRecallAtK(baselines, K=20)))
    print(' > (AGG) MEAN R PRECISION : {:.2f} %'.format(
        100*EnsembleMeanRPrecision(baselines)))
    print(' > (AGG) MEAN AVG. PRECISION : {:.2f} %'.format(
        100*EnsembleMeanAveragePrecision(baselines)))

In [76]:
EnsembleEvaluateOnAllQueries(['unsimcse', 'unsimcse', 'specter'])

AttributeError: ignored