Skip to content

Commit

Permalink
vectorize pearson rank and make it symmetric
Browse files Browse the repository at this point in the history
  • Loading branch information
thongnt99 committed Mar 13, 2021
1 parent 796a77b commit edde557
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions diffir/measure/unsupervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,21 @@ def pearson_rank(self, x, y):
indices = sorted(list(range(len(x))), key=lambda idx : x[idx], reverse=True)
x = x[indices]
y = y[indices]
x_diff = x.reshape(1,-1) - x.reshape(-1,1)
y_diff = y.reshape(1,-1) - y.reshape(-1,1)
den = x[1:].sum()
pr = 0
for i in range(1,len(x)):
num_i = sum([(x[j]-x[i])*(y[j]-y[i]) for j in range(0,i)])
den_i = math.sqrt(sum([(x[j]-x[i])*(x[j]-x[i]) for j in range(0,i)])) * math.sqrt(sum([(y[j]-y[i])*(y[j]-y[i]) for j in range(0,i)]))
if den_i == 0:
den_i = 1e-5
pr += x[i]*num_i/den_i
pr = pr/den
return pr
mask = np.tril(np.ones((len(x),len(x))),k=-1)
xy = x_diff*y_diff*mask
xx = x_diff*x_diff*mask
yy = y_diff*y_diff*mask
xy = xy.sum(axis=1)[1:]
xx = xx.sum(axis=1)[1:]
yy = yy.sum(axis=1)[1:]
den_i = np.sqrt(xx)*np.sqrt(yy)
den_i[den_i==0]=1e-5
res = (xy*x[1:]/den_i).sum()/den
return res

def _query_differences(self, run1, run2, *args, **kwargs):
"""
Expand Down Expand Up @@ -137,7 +142,7 @@ def _query_differences(self, run1, run2, *args, **kwargs):
elif metric == "spearmanr":
tau, p_value = stats.spearmanr(union_score1, union_score2)
elif metric == "pearsonr":
tau = self.pearson_rank(union_score1, union_score2)
tau = (self.pearson_rank(union_score1, union_score2)+self.pearson_rank(union_score2, union_score1))/2
else:
raise ValueError("Metric {} not supported for the measure {}".format(self.config["metric"], self.module_name))
id2measure[qid] = tau
Expand Down

0 comments on commit edde557

Please sign in to comment.