From edde557ef75e7ee72133d67898f71e7a2cc253ee Mon Sep 17 00:00:00 2001 From: Thong Nguyen Date: Sat, 13 Mar 2021 11:08:15 +0100 Subject: [PATCH] vectorize pearson rank and make it symmetric --- diffir/measure/unsupervised.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/diffir/measure/unsupervised.py b/diffir/measure/unsupervised.py index 07296bb..84a6eb0 100644 --- a/diffir/measure/unsupervised.py +++ b/diffir/measure/unsupervised.py @@ -92,16 +92,21 @@ def pearson_rank(self, x, y): indices = sorted(list(range(len(x))), key=lambda idx : x[idx], reverse=True) x = x[indices] y = y[indices] + x_diff = x.reshape(1,-1) - x.reshape(-1,1) + y_diff = y.reshape(1,-1) - y.reshape(-1,1) den = x[1:].sum() pr = 0 - for i in range(1,len(x)): - num_i = sum([(x[j]-x[i])*(y[j]-y[i]) for j in range(0,i)]) - den_i = math.sqrt(sum([(x[j]-x[i])*(x[j]-x[i]) for j in range(0,i)])) * math.sqrt(sum([(y[j]-y[i])*(y[j]-y[i]) for j in range(0,i)])) - if den_i == 0: - den_i = 1e-5 - pr += x[i]*num_i/den_i - pr = pr/den - return pr + mask = np.tril(np.ones((len(x),len(x))),k=-1) + xy = x_diff*y_diff*mask + xx = x_diff*x_diff*mask + yy = y_diff*y_diff*mask + xy = xy.sum(axis=1)[1:] + xx = xx.sum(axis=1)[1:] + yy = yy.sum(axis=1)[1:] + den_i = np.sqrt(xx)*np.sqrt(yy) + den_i[den_i==0]=1e-5 + res = (xy*x[1:]/den_i).sum()/den + return res def _query_differences(self, run1, run2, *args, **kwargs): """ @@ -137,7 +142,7 @@ def _query_differences(self, run1, run2, *args, **kwargs): elif metric == "spearmanr": tau, p_value = stats.spearmanr(union_score1, union_score2) elif metric == "pearsonr": - tau = self.pearson_rank(union_score1, union_score2) + tau = (self.pearson_rank(union_score1, union_score2)+self.pearson_rank(union_score2, union_score1))/2 else: raise ValueError("Metric {} not supported for the measure {}".format(self.config["metric"], self.module_name)) id2measure[qid] = tau