In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import *
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import sys
sys.path.append("../lib")

In [4]:
from bert_utils import Config, BertPreprocessor

In [5]:
config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

In [6]:
processor = BertPreprocessor(config.model_type, config.max_seq_len)

### Prepare model

In [7]:
from pytorch_pretrained_bert import BertConfig, BertForMaskedLM
model = BertForMaskedLM.from_pretrained(config.model_type)
model.eval()

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
   

In [8]:
sequence_output, pooled_output = model.bert(processor.to_bert_model_input("hello world"),
                                            output_all_encoded_layers=False)

In [9]:
def get_word_vector(sentence: str, word: str, n_calc: int=10):
    idx = processor.get_index(sentence, word)
    outputs = None
    with torch.no_grad():
        for _ in range(n_calc):
            sequence_output, _ = model.bert(processor.to_bert_model_input(sentence),
                                            output_all_encoded_layers=False)
            sequence_output.squeeze_(0)
            if outputs is None: outputs = torch.zeros_like(sequence_output)
            outputs = sequence_output + outputs
    return outputs.detach().cpu().numpy()[idx] / n_calc

In [10]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

In [11]:
vec1 = get_word_vector("he is a programmer.", "programmer")

In [12]:
vec2 = get_word_vector("she is a programmer.", "programmer")

In [13]:
np.linalg.norm(vec1 - vec2)

3.1893053

In [14]:
diff1 = vec1 - vec2

In [15]:
out_softmax = model.cls.predictions.decoder.weight.data.cpu().numpy()

In [16]:
out_softmax.shape

(30522, 768)

In [17]:
ordering = ((out_softmax @ vec1)).argsort()

In [18]:
(out_softmax @ vec1).shape

(30522,)

In [19]:
processor.token_to_index("programmer")

20273

In [20]:
ordering = (-(out_softmax @ vec1)).argsort()

In [21]:
(out_softmax @ vec1)[(out_softmax @ vec1).argsort()]

array([-1.8011818, -1.7252083, -1.6536064, ...,  3.104794 ,  4.3238416,
        4.811603 ], dtype=float32)

In [22]:
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{29024: 'programmer',
 22224: '[CLS]',
 19482: '##kato',
 7975: 'programmers',
 12416: 'computational',
 24599: 'mathematicians',
 14957: 'hindwings',
 24939: 'constructions',
 8656: 'mathematical',
 16697: 'planner'}

In [23]:
ordering = (-(out_softmax @ vec2)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{20919: '[CLS]',
 12791: 'programmer',
 21291: '##kato',
 6802: 'mathematical',
 6215: 'programmers',
 10774: 'computational',
 7348: 'constructions',
 5195: 'mathematicians',
 29172: 'mathematician',
 16189: 'nguyen'}

In [24]:
ordering = (-(out_softmax @ (vec1 - vec2))).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{29632: 'lublin',
 13620: 'princess',
 25972: '##lika',
 8296: '##sell',
 20209: 'selma',
 23939: '##rgen',
 12656: '##ouk',
 8478: '##rock',
 1013: '##sant',
 18130: 'joan'}

In [25]:
vec1 = get_word_vector("he is a person", "person")

In [26]:
vec2 = get_word_vector("she is a person", "person")

In [27]:
np.linalg.norm(vec1 - vec2)

2.6573544

In [28]:
diff2 = vec1 - vec2

In [29]:
np.dot(diff1, diff2) / (np.linalg.norm(diff1) * np.linalg.norm(diff2))

0.14555529

In [30]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
vec2 = get_word_vector("she is [MASK].", "[MASK]")
diff3 = vec1 - vec2

ValueError: No [MASK] tokenn tokens [he, is, [, [UNK]] found

In [31]:
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{2478: '[CLS]',
 1001: '[MASK]',
 5380: 'a',
 6562: 'person',
 2202: 'an',
 1164: 'the',
 7948: '.',
 28087: 'pilgrim',
 4535: 'something',
 24484: '##erving'}

In [32]:
ordering = (-(out_softmax @ vec2)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{17933: '[CLS]',
 19149: '[MASK]',
 27416: 'a',
 2832: 'an',
 17111: 'the',
 10499: 'person',
 8820: '.',
 13042: 'is',
 4283: ',',
 8700: 'and'}

In [33]:
ordering = (-(out_softmax @ (vec1 - vec2))).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{22855: '##odies',
 11857: 'turing',
 25162: 'nipples',
 10123: 'brotherhood',
 14365: '##duced',
 18019: '##boys',
 8307: '##verted',
 23766: '##gues',
 16504: 'bikini',
 16188: 'beaux'}

## Softmax layer analysis

In [34]:
processor.full_vocab

{0: '[PAD]',
 1: '[unused0]',
 2: '[unused1]',
 3: '[unused2]',
 4: '[unused3]',
 5: '[unused4]',
 6: '[unused5]',
 7: '[unused6]',
 8: '[unused7]',
 9: '[unused8]',
 10: '[unused9]',
 11: '[unused10]',
 12: '[unused11]',
 13: '[unused12]',
 14: '[unused13]',
 15: '[unused14]',
 16: '[unused15]',
 17: '[unused16]',
 18: '[unused17]',
 19: '[unused18]',
 20: '[unused19]',
 21: '[unused20]',
 22: '[unused21]',
 23: '[unused22]',
 24: '[unused23]',
 25: '[unused24]',
 26: '[unused25]',
 27: '[unused26]',
 28: '[unused27]',
 29: '[unused28]',
 30: '[unused29]',
 31: '[unused30]',
 32: '[unused31]',
 33: '[unused32]',
 34: '[unused33]',
 35: '[unused34]',
 36: '[unused35]',
 37: '[unused36]',
 38: '[unused37]',
 39: '[unused38]',
 40: '[unused39]',
 41: '[unused40]',
 42: '[unused41]',
 43: '[unused42]',
 44: '[unused43]',
 45: '[unused44]',
 46: '[unused45]',
 47: '[unused46]',
 48: '[unused47]',
 49: '[unused48]',
 50: '[unused49]',
 51: '[unused50]',
 52: '[unused51]',
 53: '[unused52]',

In [35]:
word_vectors ={
    word: out_softmax[i, :]
    for i, word in processor.full_vocab.items()
}

In [36]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

from heapq import heappush, heappop
def nearest_neighbors(x, n=10):
    if isinstance(x, str):
        x = word_vectors[x]
    heap = []
    for w, v in word_vectors.items():
        sim = cosine_similarity(x, v)
        if len(heap) < n:
            heappush(heap, (sim, w))
        else:
            if heap[0] < (sim, w):
                heappop(heap)
                heappush(heap, (sim, w))
    return sorted(heap, reverse=True)

In [37]:
nearest_neighbors("hello")

[(0.99999994, 'hello'),
 (0.61714774, 'goodbye'),
 (0.59412915, 'goodnight'),
 (0.57801807, 'greeting'),
 (0.5480656, 'farewell'),
 (0.5474026, 'hey'),
 (0.5347895, 'hi'),
 (0.52100366, 'ひ'),
 (0.50768197, 'congratulations'),
 (0.50653183, '人')]

In [38]:
nearest_neighbors("programmer")

[(1.0, 'programmer'),
 (0.8326575, 'programmers'),
 (0.6889887, 'keyboardist'),
 (0.6881369, 'mathematician'),
 (0.6862794, 'mathematicians'),
 (0.68078387, '[unused8]'),
 (0.6806002, '[unused782]'),
 (0.68054837, 'র'),
 (0.6805353, '1756'),
 (0.680412, '[unused59]')]

In [39]:
nearest_neighbors("doctor")

[(1.0000001, 'doctor'),
 (0.7093963, 'doctors'),
 (0.588878, 'physician'),
 (0.5472802, 'physicians'),
 (0.52723026, 'psychiatrist'),
 (0.5178553, 'dentist'),
 (0.51108694, 'surgeon'),
 (0.5001953, 'healer'),
 (0.4947291, 'medical'),
 (0.4873745, 'lawyer')]

In [40]:
nearest_neighbors("queen")

[(1.0, 'queen'),
 (0.64685124, 'king'),
 (0.60091084, 'queens'),
 (0.59095985, 'princess'),
 (0.5419779, 'empress'),
 (0.50120705, 'prince'),
 (0.50038207, 'duchess'),
 (0.49041915, 'countess'),
 (0.4852578, 'monarch'),
 (0.4709278, 'lady')]

In [41]:
nearest_neighbors(word_vectors["man"] - word_vectors["woman"] + word_vectors["king"])

[(0.79005355, 'king'),
 (0.5477464, 'man'),
 (0.41445062, 'kings'),
 (0.41256273, 'prince'),
 (0.35672778, 'queen'),
 (0.3516159, '##man'),
 (0.34965998, '336'),
 (0.34959438, '670'),
 (0.34730172, '268'),
 (0.3471633, '263')]

In [42]:
nearest_neighbors(word_vectors["man"])

[(1.0, 'man'),
 (0.63370425, 'woman'),
 (0.5670583, 'men'),
 (0.53630894, '##man'),
 (0.50406426, 'boy'),
 (0.49270344, 'girl'),
 (0.48984453, 'person'),
 (0.46555227, 'guy'),
 (0.46152157, '229'),
 (0.45967817, '228')]

In [43]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
vec2 = get_word_vector("she is [MASK].", "[MASK]")

ValueError: No [MASK] tokenn tokens [he, is, [, [UNK]] found

In [44]:
diff = (-(out_softmax @ (vec1 - vec2)))
ordering = diff.argsort()
{
    diff[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{0.4538675546646118: '##odies',
 0.4861065149307251: 'turing',
 0.49317944049835205: 'nipples',
 0.5031433701515198: 'brotherhood',
 0.516745537519455: '##duced',
 0.5172170400619507: '##boys',
 0.526495099067688: '##verted',
 0.5401962995529175: '##gues',
 0.5412156581878662: 'bikini',
 0.5476875901222229: 'beaux'}

In [45]:
vec1 = get_word_vector("he is [MASK].", "is")
vec2 = get_word_vector("she is [MASK].", "is")
np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

0.9580987

In [48]:
vec3 = get_word_vector("he is [MASK].", "[MASK]")
vec4 = get_word_vector("she is [MASK].", "[MASK]")
np.dot(vec3, vec4) / (np.linalg.norm(vec3) * np.linalg.norm(vec4))

ValueError: No [MASK] tokenn tokens [he, is, [, [UNK]] found

In [49]:
cosine_similarity((vec1 - vec2), (vec3 - vec4))

NameError: name 'vec3' is not defined

In [50]:
vec1 = get_word_vector("she is [MASK].", "[MASK]")
aaaa = out_softmax @ vec1
ordering = (-aaaa).argsort()
{
    aaaa[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

ValueError: No [MASK] tokenn tokens [she, is, [, [UNK]] found

In [51]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

ValueError: No [MASK] tokenn tokens [he, is, [, [UNK]] found

### Comparing words in context

In [52]:
vec1 = get_word_vector("he is a programmer.", "programmer")
vec2 = get_word_vector("she is a programmer.", "programmer")
vec3 = get_word_vector("the programmer wrote code on the board.", "programmer")

In [53]:
cosine_similarity(vec1, vec2)

0.9778098

In [54]:
cosine_similarity(vec1, vec3)

0.70002395

In [55]:
vec1 = get_word_vector("he is a nurse.", "nurse")
vec2 = get_word_vector("she is a nurse.", "nurse")
vec3 = get_word_vector("the nurse wrote code on the board.", "nurse")

In [56]:
cosine_similarity(vec1, vec2)

0.97982275

In [57]:
cosine_similarity(vec1, vec3)

0.722002

In [58]:
vecs = []
vecs.append(get_word_vector("he is a programmer.", "programmer"))
vecs.append(get_word_vector("he is a programmer.", "he"))
vecs.append(get_word_vector("she is a programmer.", "programmer"))
vecs.append(get_word_vector("she is a programmer.", "she"))

In [59]:
def construct_sim_matrix(vecs):
    sim_matrix = np.zeros((len(vecs), len(vecs)))
    for i, v in enumerate(vecs):
        for j, w in enumerate(vecs):
            sim_matrix[i, j] = cosine_similarity(v, w)
    return sim_matrix

In [60]:
construct_sim_matrix(vecs)

array([[1.        , 0.26702088, 0.97780979, 0.25233993],
       [0.26702088, 1.00000012, 0.30498534, 0.78304923],
       [0.97780979, 0.30498534, 1.        , 0.29841116],
       [0.25233993, 0.78304923, 0.29841116, 1.        ]])

In [61]:
vecs = []
vecs.append(get_word_vector("he is a programmer.", "he"))
vecs.append(get_word_vector("she is a programmer.", "she"))
vecs.append(get_word_vector("his profession is a programmer.", "his"))
vecs.append(get_word_vector("her profession is a programmer.", "her"))
vecs.append(get_word_vector("please talk to him.", "him"))
vecs.append(get_word_vector("please talk to her.", "her"))
vecs.append(get_word_vector("I work as a programmer.", "programmer"))
vecs.append(get_word_vector("I work as a nurse.", "nurse"))
vecs.append(get_word_vector("I work as a doctor.", "doctor"))
vecs.append(get_word_vector("I work as a nurse.", "nurse"))
vecs.append(get_word_vector("I am your father.", "father"))
vecs.append(get_word_vector("I am your mother.", "mother"))

In [62]:
cosine_similarity(vecs[1]- vecs[0], vecs[3] - vecs[2])

0.8649077

In [63]:
cosine_similarity(vecs[1]- vecs[0], vecs[5] - vecs[4])

0.705075

In [64]:
cosine_similarity(vecs[3]- vecs[2], vecs[5] - vecs[4])

0.6863628

In [65]:
cosine_similarity(vecs[3]- vecs[2], vecs[7] - vecs[6])

0.15071443

In [66]:
cosine_similarity(vecs[3]- vecs[2], vecs[9] - vecs[8])

0.22179426

So, there does seem to be a gender subspace...?

In [65]:
cosine_similarity(vecs[3]- vecs[2], vecs[11] - vecs[10])

0.5433057

### Checking for similarity

Can't find much of a difference...

In [67]:
prog_vec = get_word_vector("[MASK] is a programmer.", "programmer")
she_vec = get_word_vector("she is a programmer.", "she")
he_vec = get_word_vector("he is a programmer.", "he")
construct_sim_matrix([prog_vec, she_vec, he_vec])

array([[1.00000012, 0.2496486 , 0.24885978],
       [0.2496486 , 1.        , 0.78304923],
       [0.24885978, 0.78304923, 1.00000012]])

In [68]:
prog_vec = get_word_vector("[MASK] is a programmer.", "programmer")
she_vec = get_word_vector("she is a programmer.", "programmer")
he_vec = get_word_vector("he is a programmer.", "programmer")
construct_sim_matrix([prog_vec, she_vec, he_vec])

array([[1.00000012, 0.88541687, 0.90504026],
       [0.88541687, 1.        , 0.97780979],
       [0.90504026, 0.97780979, 1.        ]])

### Checking the distance between words in neutral contexts

Programmer is slightly more similar to father than to mother

In [69]:
def construct_sim_matrix_df(sentences: List[str],
                           words: List[str]):
    sim = construct_sim_matrix([get_word_vector(sent, word) for sent, word in zip(sentences, words)])
    return pd.DataFrame(data=sim, index=words, columns=words)

In [70]:
construct_sim_matrix_df(["That person is a programmer.", "That person is my mother.", 
                         "That person is my father."],
                       ["programmer", "mother", "father"])

Unnamed: 0,programmer,mother,father
programmer,1.0,0.42088,0.444445
mother,0.42088,1.0,0.733642
father,0.444445,0.733642,1.0


Nurse is closer to mother

In [71]:
construct_sim_matrix_df(["That person is a nurse.", "That person is my mother.", 
                         "That person is my father."],
                       ["nurse", "mother", "father"])

Unnamed: 0,nurse,mother,father
nurse,1.0,0.484853,0.392041
mother,0.484853,1.0,0.733642
father,0.392041,0.733642,1.0


In [72]:
construct_sim_matrix_df(["My nurse will not allow that.", "That person is my mother.", 
                         "That person is my father."],
                       ["nurse", "mother", "father"])

Unnamed: 0,nurse,mother,father
nurse,1.0,0.488195,0.418121
mother,0.488195,1.0,0.733642
father,0.418121,0.733642,1.0


Even the same word can have pretty different embeddings

In [73]:
construct_sim_matrix_df(["Please don't let your mother eat that cookie.", "That person is my mother.", 
                         "That person is my father."],
                       ["mother", "mother", "father"])

Unnamed: 0,mother,mother.1,father
mother,1.0,0.765978,0.559372
mother,0.765978,1.0,0.733642
father,0.559372,0.733642,1.0


Different parts of speech lead to vastly different embeddings

In [74]:
construct_sim_matrix_df(["The cat could mother that dog.", "That person is my mother.", 
                         "That person is my father."],
                       ["mother", "mother", "father"])

Unnamed: 0,mother,mother.1,father
mother,1.0,0.582067,0.446689
mother,0.582067,1.0,0.733642
father,0.446689,0.733642,1.0


Comparsions between different parts of speech: Still roughly the same pattern

In [75]:
construct_sim_matrix_df(["That person is a nurse.", "What is she doing?", 
                         "What is he doing?"],
                       ["nurse", "she", "he"])

Unnamed: 0,nurse,she,he
nurse,1.0,0.450963,0.338125
she,0.450963,1.0,0.726877
he,0.338125,0.726877,1.0


# Gendered Subspace

### Construct gender subspace

A simple test (TODO: Automate construction)

In [76]:
male_vecs, female_vecs = [], []
def add_word_vecs(s: str, male_w: str, female_w: str):
    male_vecs.append(get_word_vector(s.replace("XXX", male_w), male_w))
    female_vecs.append(get_word_vector(s.replace("XXX", female_w), female_w))

for prof in ["musician", "magician", "nurse", "doctor", "teacher"]:
    add_word_vecs("XXX is a YYY".replace("YYY", prof), "he", "she")
    add_word_vecs("XXX works as a YYY".replace("YYY", prof), "he", "she")

for action in ["talk to", "hit", "ignore", "please", "remove"]:
    add_word_vecs("please YYY XXX".replace("YYY", action), "him", "her")
    add_word_vecs("don't YYY XXX".replace("YYY", action), "him", "her")

for thing in ["food", "music", "work", "running", "cooking"]:
    add_word_vecs("XXX likes YYY".replace("YYY", thing), "he", "she")
    add_word_vecs("XXX enjoys YYY".replace("YYY", thing), "he", "she")

In [77]:
male_vecs = np.r_[male_vecs]
female_vecs = np.r_[female_vecs]

In [78]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=5)

In [79]:
(male_vecs - female_vecs).shape

(30, 768)

In [80]:
diff_vecs = male_vecs - female_vecs

In [81]:
X = svd.fit_transform(diff_vecs / (diff_vecs ** 2).sum(1, keepdims=True))

In [82]:
svd.explained_variance_ratio_

array([0.02218353, 0.33863246, 0.15979412, 0.07528188, 0.06134707],
      dtype=float32)

In [83]:
svd.explained_variance_ratio_.sum()

0.657239

In [84]:
svd.components_.shape

(5, 768)

### Try eliminating this subspace and checking outputs softmax

In [85]:
svd.components_

array([[-0.00213644,  0.06697969,  0.01185373, ...,  0.01891251,
         0.05929657, -0.05238242],
       [-0.03966811,  0.00047857, -0.00887874, ..., -0.01447054,
        -0.01458254, -0.0467876 ],
       [ 0.0187734 ,  0.02606242, -0.05707034, ..., -0.02682133,
         0.00981187,  0.00205974],
       [ 0.04818484,  0.01296756, -0.02450339, ..., -0.04148804,
         0.01490192,  0.03740549],
       [ 0.03615659,  0.04309639,  0.00483598, ...,  0.0155314 ,
        -0.04863234, -0.0272552 ]], dtype=float32)

In [86]:
vec = get_word_vector("[MASK] is a nurse.", "[MASK]")

In [87]:
logits_before = (out_softmax @ vec)

In [88]:
logits_before[processor.token_to_index("she")]

1.6940118

In [89]:
logits_before[processor.token_to_index("he")]

1.2605883

In [90]:
def eliminate_subspace(v, subspace):
    # TODO: Is there a better way?
    V = subspace
    beta = (np.linalg.inv(V @ V.T) @ V) @ v
    res = (v - (V.T @ beta))
    return res

In [91]:
vec_after = eliminate_subspace(vec, svd.components_)

In [92]:
logits_after = (out_softmax @ vec_after)

The difference is indeed reduced

In [93]:
logits_after[processor.token_to_index("she")]

1.35022

In [94]:
logits_after[processor.token_to_index("he")]

1.2542218

Not quite working here...

In [95]:
sentence = "[MASK] is a programmer."
vec = get_word_vector(sentence, "[MASK]")
logits_before = (out_softmax @ vec)
vec_after = eliminate_subspace(vec, svd.components_)
logits_after = (out_softmax @ vec_after)
print(f"Logit diff before: {logits_before[processor.token_to_index('she')] - logits_before[processor.token_to_index('he')]}")
print(f"Logit diff after: {logits_after[processor.token_to_index('she')] - logits_after[processor.token_to_index('he')]}")

Logit diff before: 0.08666396141052246
Logit diff after: 0.17084109783172607


Hmm...

In [96]:
sentence = "[MASK] is a housewife."
vec = get_word_vector(sentence, "[MASK]")
logits_before = (out_softmax @ vec)
vec_after = eliminate_subspace(vec, svd.components_)
logits_after = (out_softmax @ vec_after)
print(f"Logit diff before: {logits_before[processor.token_to_index('she')] - logits_before[processor.token_to_index('he')]}")
print(f"Logit diff after: {logits_after[processor.token_to_index('she')] - logits_after[processor.token_to_index('he')]}")

Logit diff before: 0.21036386489868164
Logit diff after: -0.13858234882354736


Hmm...

In [97]:
sentence = "[MASK] is my mother."
vec = get_word_vector(sentence, "[MASK]")
logits_before = (out_softmax @ vec)
vec_after = eliminate_subspace(vec, svd.components_)
logits_after = (out_softmax @ vec_after)
print(f"Logit diff before: {logits_before[processor.token_to_index('she')] - logits_before[processor.token_to_index('he')]}")
print(f"Logit diff after: {logits_after[processor.token_to_index('she')] - logits_after[processor.token_to_index('he')]}")

Logit diff before: 0.36542224884033203
Logit diff after: 0.06998968124389648
