Skip to content

Commit

Permalink
Switch to argparse (#3)
Browse files Browse the repository at this point in the history
* switch to argparse for more standard CLI arguments
* new measures: Pearson rank, Spearman, and KL divergence
  • Loading branch information
thongnt99 committed May 27, 2021
1 parent bf26ec6 commit e6152b3
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 183 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/python-package.yml
@@ -0,0 +1,24 @@
name: test
on: [push]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install -r requirements.txt
- name: Test with pytest
run: |
export PYTHONPATH=${PYTHONPATH}:/home/runner/work/capreolus/diffir
pytest
4 changes: 4 additions & 0 deletions diffir/__init__.py
@@ -1,4 +1,8 @@
__version__ = "0.1.0"

from diffir.weight import Weight
from diffir.weight.custom import CustomWeight
from diffir.weight.unsupervised import ExactMatchWeight
from diffir.measure import Measure
from diffir.measure.qrels import QrelMeasure
from diffir.measure.unsupervised import TopkMeasure
10 changes: 7 additions & 3 deletions diffir/batchrun.py
Expand Up @@ -29,9 +29,13 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("directory")
parser.add_argument("-o", "--output", dest="output_dir")
parser.add_argument("--config", dest="config", nargs="*")

parser.add_argument("--dataset", dest="dataset", type=str, help="dataset from ir_datasets")
parser.add_argument("--measure", dest="measure", type=str, default="tauap", help="measure for ranking difference (qrel, tauap,weightedtau)")
parser.add_argument("--metric", dest="metric", type=str, default="MAP", help="metric used with qrel measure")
parser.add_argument("--topk", dest="topk", type=int, default=10)
args = parser.parse_args()
config = {"dataset": args.dataset, "measure": args.measure, "metric": args.metric, "topk": args.topk,
"weight": {"weights_1": None, "weights_2": None}}
indir = Path(args.directory)
output = Path(args.output_dir) if args.output_dir else indir / "diffir"
output.mkdir(exist_ok=True)
Expand All @@ -50,7 +54,7 @@ def main():

single_runs = sorted(single_runs) # sorted needed for itertools ordering
queue = [(fn,) for fn in single_runs] + list(itertools.combinations(single_runs, 2))
f = partial(process_runs, config=args.config, output=output)
f = partial(process_runs, config=config, output=output)
with multiprocessing.Pool(8) as p:
outdirs = p.map(f, queue)

Expand Down
33 changes: 17 additions & 16 deletions diffir/measure/__init__.py
@@ -1,26 +1,27 @@
from profane import ModuleBase, import_all_modules, ConfigOption
class Measure:
def __init__(self, metric="ndcg_20", topk=5):
'''
Measure construction
:param metric: The metric used for selecting queries.
:param topk: How many queries to retrieve
'''
self.metric = metric
self.topk = topk


class Measure(ModuleBase):
module_type = "measure"
config_spec = [
ConfigOption(key="metric", default_value="ndcg_20", description="The metric to use for selecting queries"),
ConfigOption(key="topk", default_value=5, description="How many queries to retrieve"),
]

# TODO finalize API
def query_differences(self, run1, run2, *args, **kwargs):
'''
:param run1: the first run
:param run2: the second run
:param args:
:param kwargs:
:return:
'''
if run1 and run2:
return self._query_differences(run1, run2, *args, **kwargs)
elif run1 and run2 is None:
qids = sorted(list(run1.keys()))[: self.config["topk"]]
qids = sorted(list(run1.keys()))[: self.topk]
id2diff = {qid: 0 for qid in qids}
return qids, id2diff, "singlerun"

def _query_differences(self, run1, run2, *args, **kwargs):
raise NotImplementedError


# TODO this is going to break once we introduce optional modules. need a way for them to fail gracefully.
# or to enumerate/register them without importing the py file?
import_all_modules(__file__, __package__)
44 changes: 28 additions & 16 deletions diffir/measure/qrels.py
@@ -1,17 +1,10 @@
import pytrec_eval
from profane import ModuleBase, Dependency, ConfigOption
from ir_measures import iter_calc, parse_measure
import sys
from diffir.measure import Measure


@Measure.register
class QrelMeasure(Measure):
module_name = "qrel"

config_spec = [
ConfigOption(key="topk", default_value=10, description="The number of differing queries to return"),
ConfigOption(key="metric", default_value="ndcg_cut_20", description="TODO"),
]

def _query_differences(self, run1, run2, *args, **kwargs):
"""
:param run1: TREC run. Has the format {qid: {docid: score}, ...}
Expand All @@ -28,13 +21,32 @@ def _query_differences(self, run1, run2, *args, **kwargs):
run2 = {qid: doc_id_to_score for qid, doc_id_to_score in run2.items() if qid in overlapping_keys}

qrels = dataset.qrels_dict()
metric = self.config["metric"]
topk = self.config["topk"]
evaluator = pytrec_eval.RelevanceEvaluator(qrels, {metric})
eval_run_1 = evaluator.evaluate(run1)
eval_run_2 = evaluator.evaluate(run2)
try:
metric = parse_measure(self.metric)
except NameError:
print("Unknown measure: {}. Please provide a measure supported by https://ir-measur.es/".format(self.metric))
sys.exit(1)

topk = self.topk
eval_run_1 = self.convert_to_nested_dict(iter_calc([metric], qrels, run1))
eval_run_2 = self.convert_to_nested_dict(iter_calc([metric], qrels, run2))

query_ids = eval_run_1.keys() & eval_run_2.keys()
query_ids = sorted(query_ids, key=lambda x: abs(eval_run_1[x][metric] - eval_run_2[x][metric]), reverse=True)
query_ids = query_ids[:topk]
id2diff = {x:abs(eval_run_1[x][metric] - eval_run_2[x][metric]) for x in query_ids}
return query_ids, id2diff, metric
id2diff = {x: abs(eval_run_1[x][metric] - eval_run_2[x][metric]) for x in query_ids}
id2qrelscores = {x: [eval_run_1[x][metric], eval_run_2[x][metric]] for x in query_ids}
return query_ids, id2diff, self.metric, id2qrelscores

def convert_to_nested_dict(self, ir_measures_iterator):
"""
Util method to convert the results from ir_measures.iter_calc to a dict.
TODO: We can probably refactor so that this method won't be needed
"""
eval_dict = {}

for x in ir_measures_iterator:
# TODO: This assumes that there would be only one measure/metric to handle.
eval_dict[x.query_id] = {x.measure: x.value}

return eval_dict
62 changes: 28 additions & 34 deletions diffir/measure/unsupervised.py
@@ -1,17 +1,10 @@
from profane import ModuleBase, Dependency, ConfigOption
from diffir.measure import Measure
from scipy import stats
import numpy as np
import math


@Measure.register
class TopkMeasure(Measure):
module_name = "topk"
config_spec = [
ConfigOption(key="topk", default_value=3, description="TODO"),
ConfigOption(key="metric", default_value="weightedtau", description="Metric to measure the rank correaltion"),
]

def tauap(self, x, y, decreasing=True):
"""
Expand Down Expand Up @@ -47,8 +40,8 @@ def tauap_fast(self, x, y):
n = len(ry)
if n == 1:
return 1
ordered_idx = sorted(list(range(n)), key=lambda i: rx[i])
ry_ordered_by_rx = [(ry[idx], i) for i, idx in enumerate(ordered_idx)]
ordered_idx = sorted(list(range(n)), key=lambda i: ry[i])
rx_ordered_by_ry = [(rx[idx], i) for i, idx in enumerate(ordered_idx)]

def merge_sort(arr):
if len(arr) <= 1:
Expand Down Expand Up @@ -82,38 +75,37 @@ def merge_sort(arr):
j += 1
k += 1
return tauAP

res = (2 - 2 * merge_sort(ry_ordered_by_rx) / (n - 1)) - 1
res = (2 - 2 * merge_sort(rx_ordered_by_ry) / (n - 1)) - 1
return res

def pearson_rank(self, x, y):
x = np.interp(x, (min(x), max(x)), (0,1))
y = np.interp(y, (min(y), max(y)), (0,1))
indices = sorted(list(range(len(x))), key=lambda idx : x[idx], reverse=True)
x = np.interp(x, (min(x), max(x)), (0, 1))
y = np.interp(y, (min(y), max(y)), (0, 1))
indices = sorted(list(range(len(x))), key=lambda idx: x[idx], reverse=True)
x = x[indices]
y = y[indices]
x_diff = x.reshape(1,-1) - x.reshape(-1,1)
y_diff = y.reshape(1,-1) - y.reshape(-1,1)
x_diff = x.reshape(1, -1) - x.reshape(-1, 1)
y_diff = y.reshape(1, -1) - y.reshape(-1, 1)
den = x[1:].sum()
pr = 0
mask = np.tril(np.ones((len(x),len(x))),k=-1)
xy = x_diff*y_diff*mask
xx = x_diff*x_diff*mask
yy = y_diff*y_diff*mask
mask = np.tril(np.ones((len(x), len(x))), k=-1)
xy = x_diff * y_diff * mask
xx = x_diff * x_diff * mask
yy = y_diff * y_diff * mask
xy = xy.sum(axis=1)[1:]
xx = xx.sum(axis=1)[1:]
yy = yy.sum(axis=1)[1:]
den_i = np.sqrt(xx)*np.sqrt(yy)
den_i[den_i==0]=1e-5
res = (xy*x[1:]/den_i).sum()/den
den_i = np.sqrt(xx) * np.sqrt(yy)
den_i[den_i == 0] = 1e-5
res = (xy * x[1:] / den_i).sum() / den
return res

def kl_div(self, x, y):
x = np.array(x) - min(x) + 1e-5
y = np.array(y) - min(y) + 1e-5
x = x/x.sum()
y = y/y.sum()
return -(stats.entropy(x,y)+stats.entropy(y,x))/2
x = x / x.sum()
y = y / y.sum()
return -(stats.entropy(x, y) + stats.entropy(y, x)) / 2

def _query_differences(self, run1, run2, *args, **kwargs):
"""
Expand All @@ -124,22 +116,23 @@ def _query_differences(self, run1, run2, *args, **kwargs):
:return: The union of top k qids in both runs, sorted by the order in which the queries appear in run 1
^ This is because run 1 appears on the left hand side in the web ui
"""
topk = self.config["topk"]
metric = self.config["metric"]
topk = self.topk
metric = self.metric
qids = run1.keys() & run2.keys()
if not qids:
raise ValueError("run1 and run2 have no shared qids")

id2measure = {}
for qid in qids:
from collections import defaultdict
min_value = min(min(run1[qid].values()), min(run2[qid].values()))-1e-5
min_value = min(min(run1[qid].values()), min(run2[qid].values())) - 1e-5
doc_score_1 = defaultdict(lambda: min_value, run1[qid])
doc_score_2 = defaultdict(lambda: min_value, run2[qid])
doc_ids_1 = doc_score_1.keys()
doc_ids_2 = doc_score_2.keys()
doc_ids_union = set(doc_ids_1).union(set(doc_ids_2))
doc_ids_union = sorted(list(doc_ids_union), key=lambda id: (doc_score_1[id] + doc_score_2[id]), reverse=True)
doc_ids_union = sorted(list(doc_ids_union), key=lambda id: (doc_score_1[id] + doc_score_2[id]),
reverse=True)
union_score1 = [doc_score_1[doc_id] for doc_id in doc_ids_union]
union_score2 = [doc_score_2[doc_id] for doc_id in doc_ids_union]
if metric == "weightedtau":
Expand All @@ -148,14 +141,15 @@ def _query_differences(self, run1, run2, *args, **kwargs):
tau = self.tauap_fast(union_score1, union_score2)
elif metric == "spearmanr":
tau, p_value = stats.spearmanr(union_score1, union_score2)
elif metric == "pearsonr":
tau = (self.pearson_rank(union_score1, union_score2)+self.pearson_rank(union_score2, union_score1))/2
elif metric == "pearsonrank":
tau = (self.pearson_rank(union_score1, union_score2) + self.pearson_rank(union_score2,
union_score1)) / 2
elif metric == "kldiv":
tau = self.kl_div(union_score1, union_score2)
else:
raise ValueError("Metric {} not supported for the measure {}".format(self.config["metric"], self.module_name))
raise ValueError("Metric {} not supported for the measure {}".format(self.metric, "metric"))
id2measure[qid] = tau
qids = sorted(qids, key=lambda x: id2measure[x])
qids = qids[:topk]
id2measure = {idx: id2measure[idx] for idx in qids}
return qids, id2measure, metric
return qids, id2measure, metric, None

0 comments on commit e6152b3

Please sign in to comment.