This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
/
pearson_correlation.py
96 lines (77 loc) · 3.47 KB
/
pearson_correlation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import logging
from typing import Optional
import math
import numpy as np
from overrides import overrides
import torch
from allennlp.common.util import is_distributed
from allennlp.training.metrics.covariance import Covariance
from allennlp.training.metrics.metric import Metric
logger = logging.getLogger(__name__)
@Metric.register("pearson_correlation")
class PearsonCorrelation(Metric):
"""
This `Metric` calculates the sample Pearson correlation coefficient (r)
between two tensors. Each element in the two tensors is assumed to be
a different observation of the variable (i.e., the input tensors are
implicitly flattened into vectors and the correlation is calculated
between the vectors).
This implementation is mostly modeled after the streaming_pearson_correlation function in Tensorflow. See
<https://github.com/tensorflow/tensorflow/blob/v1.10.1/tensorflow/contrib/metrics/python/ops/metric_ops.py#L3267>.
This metric delegates to the Covariance metric the tracking of three [co]variances:
- `covariance(predictions, labels)`, i.e. covariance
- `covariance(predictions, predictions)`, i.e. variance of `predictions`
- `covariance(labels, labels)`, i.e. variance of `labels`
If we have these values, the sample Pearson correlation coefficient is simply:
r = covariance / (sqrt(predictions_variance) * sqrt(labels_variance))
if predictions_variance or labels_variance is 0, r is 0
"""
def __init__(self) -> None:
self._predictions_labels_covariance = Covariance()
self._predictions_variance = Covariance()
self._labels_variance = Covariance()
def __call__(
self,
predictions: torch.Tensor,
gold_labels: torch.Tensor,
mask: Optional[torch.BoolTensor] = None,
):
"""
# Parameters
predictions : `torch.Tensor`, required.
A tensor of predictions of shape (batch_size, ...).
gold_labels : `torch.Tensor`, required.
A tensor of the same shape as `predictions`.
mask : `torch.BoolTensor`, optional (default = `None`).
A tensor of the same shape as `predictions`.
"""
predictions, gold_labels, mask = self.detach_tensors(predictions, gold_labels, mask)
if not is_distributed():
self._predictions_labels_covariance(predictions, gold_labels, mask)
self._predictions_variance(predictions, predictions, mask)
self._labels_variance(gold_labels, gold_labels, mask)
def get_metric(self, reset: bool = False):
"""
# Returns
The accumulated sample Pearson correlation.
"""
if is_distributed():
raise RuntimeError(
"Distributed aggregation for PearsonCorrelation is currently not supported."
)
covariance = self._predictions_labels_covariance.get_metric(reset=reset)
predictions_variance = self._predictions_variance.get_metric(reset=reset)
labels_variance = self._labels_variance.get_metric(reset=reset)
denominator = math.sqrt(predictions_variance) * math.sqrt(labels_variance)
if reset:
self.reset()
if np.around(denominator, decimals=5) == 0:
pearson_r = 0
else:
pearson_r = covariance / denominator
return pearson_r
@overrides
def reset(self):
self._predictions_labels_covariance.reset()
self._predictions_variance.reset()
self._labels_variance.reset()