This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
/
bias_direction.py
302 lines (232 loc) · 12.6 KB
/
bias_direction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""
A suite of differentiable methods to compute the bias direction
or concept subspace representing binary protected variables.
"""
import torch
import sklearn
import numpy as np
from allennlp.common.checks import ConfigurationError
class BiasDirection:
"""
Parent class for bias direction classes.
# Parameters
requires_grad : `bool`, optional (default=`False`)
Option to enable gradient calculation.
"""
def __init__(self, requires_grad: bool = False):
self.requires_grad = requires_grad
def _normalize_bias_direction(self, bias_direction: torch.Tensor):
return bias_direction / torch.linalg.norm(bias_direction)
class PCABiasDirection(BiasDirection):
"""
PCA-based bias direction. Computes one-dimensional subspace that is the span
of a specific concept (e.g. gender) using PCA. This subspace minimizes the sum of
squared distances from all seed word embeddings.
!!! Note
It is uncommon to utilize more than one direction to represent a concept.
Implementation and terminology based on Rathore, A., Dev, S., Phillips, J.M., Srikumar,
V., Zheng, Y., Yeh, C.M., Wang, J., Zhang, W., & Wang, B. (2021).
[VERB: Visualizing and Interpreting Bias Mitigation Techniques for
Word Representations](https://api.semanticscholar.org/CorpusID:233168618).
ArXiv, abs/2104.02797.
"""
def __call__(self, seed_embeddings: torch.Tensor):
"""
# Parameters
!!! Note
In the examples below, we treat gender identity as binary, which does not accurately
characterize gender in real life.
seed_embeddings : `torch.Tensor`
A tensor of size (batch_size, ..., dim) containing seed word embeddings related to
a concept. For example, if the concept is gender, seed_embeddings could contain embeddings
for words like "man", "king", "brother", "woman", "queen", "sister", etc.
# Returns
bias_direction : `torch.Tensor`
A unit tensor of size (dim, ) representing the concept subspace.
"""
# Some sanity checks
if seed_embeddings.ndim < 2:
raise ConfigurationError("seed_embeddings1 must have at least two dimensions.")
with torch.set_grad_enabled(self.requires_grad):
# pca_lowrank centers the embeddings by default
# There will be two dimensions when applying PCA to
# definitionally-gendered words: 1) the gender direction,
# 2) all other directions, with the gender direction being principal.
_, _, V = torch.pca_lowrank(seed_embeddings, q=2)
# get top principal component
bias_direction = V[:, 0]
return self._normalize_bias_direction(bias_direction)
class PairedPCABiasDirection(BiasDirection):
"""
Paired-PCA-based bias direction. Computes one-dimensional subspace that is the span
of a specific concept (e.g. gender) as the first principle component of the
difference vectors between seed word embedding pairs.
!!! Note
It is uncommon to utilize more than one direction to represent a concept.
Based on: T. Bolukbasi, K. W. Chang, J. Zou, V. Saligrama, and A. Kalai. [Man is to
computer programmer as woman is to homemaker? debiasing word embeddings]
(https://api.semanticscholar.org/CorpusID:1704893).
In ACM Transactions of Information Systems, 2016.
Implementation and terminology based on Rathore, A., Dev, S., Phillips, J.M., Srikumar,
V., Zheng, Y., Yeh, C.M., Wang, J., Zhang, W., & Wang, B. (2021).
[VERB: Visualizing and Interpreting Bias Mitigation Techniques for
Word Representations](https://api.semanticscholar.org/CorpusID:233168618).
ArXiv, abs/2104.02797.
"""
def __call__(self, seed_embeddings1: torch.Tensor, seed_embeddings2: torch.Tensor):
"""
# Parameters
!!! Note
In the examples below, we treat gender identity as binary, which does not accurately
characterize gender in real life.
seed_embeddings1 : `torch.Tensor`
A tensor of size (batch_size, ..., dim) containing seed word
embeddings related to a concept group. For example, if the concept is gender,
seed_embeddings1 could contain embeddings for linguistically masculine words, e.g.
"man", "king", "brother", etc.
seed_embeddings2: `torch.Tensor`
A tensor of the same size as seed_embeddings1 containing seed word
embeddings related to a different group for the same concept. For example,
seed_embeddings2 could contain embeddings for linguistically feminine words, e.g.
"woman", "queen", "sister", etc.
!!! Note
For Paired-PCA, the embeddings at the same positions in each of seed_embeddings1 and
seed_embeddings2 are expected to form seed word pairs. For example, if the concept
is gender, the embeddings for ("man", "woman"), ("king", "queen"), ("brother", "sister"), etc.
should be at the same positions in seed_embeddings1 and seed_embeddings2.
!!! Note
All tensors are expected to be on the same device.
# Returns
bias_direction : `torch.Tensor`
A unit tensor of size (dim, ) representing the concept subspace.
"""
# Some sanity checks
if seed_embeddings1.size() != seed_embeddings2.size():
raise ConfigurationError("seed_embeddings1 and seed_embeddings2 must be the same size.")
if seed_embeddings1.ndim < 2:
raise ConfigurationError(
"seed_embeddings1 and seed_embeddings2 must have at least two dimensions."
)
with torch.set_grad_enabled(self.requires_grad):
paired_embeddings = seed_embeddings1 - seed_embeddings2
_, _, V = torch.pca_lowrank(
paired_embeddings,
q=min(paired_embeddings.size(0), paired_embeddings.size(1)) - 1,
center=False,
)
bias_direction = V[:, 0]
return self._normalize_bias_direction(bias_direction)
class TwoMeansBiasDirection(BiasDirection):
"""
Two-means bias direction. Computes one-dimensional subspace that is the span
of a specific concept (e.g. gender) as the normalized difference vector of the
averages of seed word embedding sets.
!!! Note
It is uncommon to utilize more than one direction to represent a concept.
Based on: Dev, S., & Phillips, J.M. (2019). [Attenuating Bias in Word Vectors]
(https://api.semanticscholar.org/CorpusID:59158788). AISTATS.
Implementation and terminology based on Rathore, A., Dev, S., Phillips, J.M., Srikumar,
V., Zheng, Y., Yeh, C.M., Wang, J., Zhang, W., & Wang, B. (2021).
[VERB: Visualizing and Interpreting Bias Mitigation Techniques for
Word Representations](https://api.semanticscholar.org/CorpusID:233168618).
ArXiv, abs/2104.02797.
"""
def __call__(self, seed_embeddings1: torch.Tensor, seed_embeddings2: torch.Tensor):
"""
# Parameters
!!! Note
In the examples below, we treat gender identity as binary, which does not accurately
characterize gender in real life.
seed_embeddings1 : `torch.Tensor`
A tensor of size (embeddings1_batch_size, ..., dim) containing seed word
embeddings related to a specific concept group. For example, if the concept is gender,
seed_embeddings1 could contain embeddings for linguistically masculine words, e.g.
"man", "king", "brother", etc.
seed_embeddings2: `torch.Tensor`
A tensor of size (embeddings2_batch_size, ..., dim) containing seed word
embeddings related to a different group for the same concept. For example,
seed_embeddings2 could contain embeddings for linguistically feminine words, , e.g.
"woman", "queen", "sister", etc.
!!! Note
seed_embeddings1 and seed_embeddings2 need NOT be the same size. Furthermore,
the embeddings at the same positions in each of seed_embeddings1 and seed_embeddings2
are NOT expected to form seed word pairs.
!!! Note
All tensors are expected to be on the same device.
# Returns
bias_direction : `torch.Tensor`
A unit tensor of size (dim, ) representing the concept subspace.
"""
# Some sanity checks
if seed_embeddings1.ndim < 2 or seed_embeddings2.ndim < 2:
raise ConfigurationError(
"seed_embeddings1 and seed_embeddings2 must have at least two dimensions."
)
if seed_embeddings1.size(-1) != seed_embeddings2.size(-1):
raise ConfigurationError("All seed embeddings must have same dimensionality.")
with torch.set_grad_enabled(self.requires_grad):
seed_embeddings1_mean = torch.mean(seed_embeddings1, dim=0)
seed_embeddings2_mean = torch.mean(seed_embeddings2, dim=0)
bias_direction = seed_embeddings1_mean - seed_embeddings2_mean
return self._normalize_bias_direction(bias_direction)
class ClassificationNormalBiasDirection(BiasDirection):
"""
Classification normal bias direction. Computes one-dimensional subspace that is the span
of a specific concept (e.g. gender) as the direction perpendicular to the classification
boundary of a linear support vector machine fit to classify seed word embedding sets.
!!! Note
It is uncommon to utilize more than one direction to represent a concept.
Based on: Ravfogel, S., Elazar, Y., Gonen, H., Twiton, M., & Goldberg, Y. (2020).
[Null It Out: Guarding Protected Attributes by Iterative Nullspace Projection]
(https://api.semanticscholar.org/CorpusID:215786522). ArXiv, abs/2004.07667.
Implementation and terminology based on Rathore, A., Dev, S., Phillips, J.M., Srikumar,
V., Zheng, Y., Yeh, C.M., Wang, J., Zhang, W., & Wang, B. (2021).
[VERB: Visualizing and Interpreting Bias Mitigation Techniques for
Word Representations](https://api.semanticscholar.org/CorpusID:233168618).
ArXiv, abs/2104.02797.
"""
def __init__(self):
super().__init__()
def __call__(self, seed_embeddings1: torch.Tensor, seed_embeddings2: torch.Tensor):
"""
# Parameters
!!! Note
In the examples below, we treat gender identity as binary, which does not accurately
characterize gender in real life.
seed_embeddings1 : `torch.Tensor`
A tensor of size (embeddings1_batch_size, ..., dim) containing seed word
embeddings related to a specific concept group. For example, if the concept is gender,
seed_embeddings1 could contain embeddings for linguistically masculine words, e.g.
"man", "king", "brother", etc.
seed_embeddings2: `torch.Tensor`
A tensor of size (embeddings2_batch_size, ..., dim) containing seed word
embeddings related to a different group for the same concept. For example,
seed_embeddings2 could contain embeddings for linguistically feminine words, , e.g.
"woman", "queen", "sister", etc.
!!! Note
seed_embeddings1 and seed_embeddings2 need NOT be the same size. Furthermore,
the embeddings at the same positions in each of seed_embeddings1 and seed_embeddings2
are NOT expected to form seed word pairs.
!!! Note
All tensors are expected to be on the same device.
!!! Note
This bias direction method is NOT differentiable.
# Returns
bias_direction : `torch.Tensor`
A unit tensor of size (dim, ) representing the concept subspace.
"""
# Some sanity checks
if seed_embeddings1.ndim < 2 or seed_embeddings2.ndim < 2:
raise ConfigurationError(
"seed_embeddings1 and seed_embeddings2 must have at least two dimensions."
)
if seed_embeddings1.size(-1) != seed_embeddings2.size(-1):
raise ConfigurationError("All seed embeddings must have same dimensionality.")
device = seed_embeddings1.device
seed_embeddings1 = seed_embeddings1.flatten(end_dim=-2).detach().cpu().numpy()
seed_embeddings2 = seed_embeddings2.flatten(end_dim=-2).detach().cpu().numpy()
X = np.vstack([seed_embeddings1, seed_embeddings2])
Y = np.concatenate([[0] * seed_embeddings1.shape[0], [1] * seed_embeddings2.shape[0]])
classifier = sklearn.svm.SVC(kernel="linear").fit(X, Y)
bias_direction = torch.Tensor(classifier.coef_[0]).to(device)
return self._normalize_bias_direction(bias_direction)