# Imports

In [2]:
import neurox.data.extraction.transformers_extractor as transformers_extractor
import neurox.data.loader as data_loader
import neurox.interpretation.utils as utils
import neurox.interpretation.linear_probe as linear_probe
import neurox.interpretation.probeless as probeless
import neurox.interpretation.gaussian_probe as gaussian_probe
import neurox.interpretation.iou_probe as iou_probe

# Data

In [3]:
# define paths to your word and label files 
train_sentences = "data/pos_train.word"
train_labels = "data/pos_train.label"
dev_sentences = "data/pos_dev.word"
dev_labels = "data/pos_dev.label" 
test_sentences = "data/pos_test.word"
test_labels = "data/pos_test.label"

In [4]:
!cat "data/pos_train.word"

Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .
Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .
Rudolph Agnew , 55 years old and former chairman of Consolidated Gold Fields PLC , was named a nonexecutive director of this British industrial conglomerate .
A form of asbestos once used to make Kent cigarette filters has caused a high percentage of cancer deaths among a group of workers exposed to it more than 30 years ago , researchers reported .
The asbestos fiber , crocidolite , is unusually resilient once it enters the lungs , with even brief exposures to it causing symptoms that show up decades later , researchers said .
Lorillard Inc. , the unit of New York-based Loews Corp. that makes Kent cigarettes , stopped using crocidolite in its Micronite cigarette filters in 1956 .
Although preliminary findings were reported more than a year ago , the latest results appear in today 's New England Journal of Medicine , a foru

In [5]:
!cat "data/pos_train.label"

NNP NNP , CD NNS JJ , MD VB DT NN IN DT JJ NN NNP CD .
NNP NNP VBZ NN IN NNP NNP , DT NNP VBG NN .
NNP NNP , CD NNS JJ CC JJ NN IN NNP NNP NNP NNP , VBD VBN DT JJ NN IN DT JJ JJ NN .
DT NN IN NN RB VBN TO VB NNP NN NNS VBZ VBN DT JJ NN IN NN NNS IN DT NN IN NNS VBN TO PRP RBR IN CD NNS IN , NNS VBD .
DT NN NN , NN , VBZ RB JJ IN PRP VBZ DT NNS , IN RB JJ NNS TO PRP VBG NNS WDT VBP RP NNS JJ , NNS VBD .
NNP NNP , DT NN IN JJ JJ NNP NNP WDT VBZ NNP NNS , VBD VBG NN IN PRP$ NN NN NNS IN CD .
IN JJ NNS VBD VBN RBR IN DT NN IN , DT JJS NNS VBP IN NN POS NNP NNP NNP IN NNP , DT NN JJ TO VB JJ NN TO DT NN .
DT NNP NN VBD , `` DT VBZ DT JJ NN .
PRP VBP VBG IN NNS IN IN NN VBD IN NN VBG DT JJ NNS .
EX VBZ DT NN IN PRP$ NNS RB . ''


# Extract Representations

In [6]:
# extract activations for the train sentences 
transformers_extractor.extract_representations('bert-base-uncased',
    train_sentences,
    'train_activations.json',
    aggregation="average" #last, first
)

Loading model: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reading input corpus
Preparing output file
Extracting representations from model
Sentence         : "Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 ."
Original    (018): ['Pierre', 'Vinken', ',', '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'nonexecutive', 'director', 'Nov.', '29', '.']
Tokenized   (025): ['[CLS]', 'pierre', 'vin', '##ken', ',', '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'none', '##x', '##ec', '##utive', 'director', 'nov', '.', '29', '.', '[SEP]']
Filtered   (023): ['pierre', 'vin', '##ken', ',', '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'none', '##x', '##ec', '##utive', 'director', 'nov', '.', '29', '.']
Detokenized (018): ['pierre', 'vin##ken', ',', '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'none##x##ec##utive', 'director', 'nov.', '29', '.']
Counter: 23
Hidden states:  (13, 18, 768)
# Extracted words:  18
Sentence        

Sentence         : "A Lorillard spokewoman said , `` This is an old story ."
Original    (012): ['A', 'Lorillard', 'spokewoman', 'said', ',', '``', 'This', 'is', 'an', 'old', 'story', '.']
Tokenized   (018): ['[CLS]', 'a', 'lori', '##llar', '##d', 'spoke', '##woman', 'said', ',', '`', '`', 'this', 'is', 'an', 'old', 'story', '.', '[SEP]']
Filtered   (016): ['a', 'lori', '##llar', '##d', 'spoke', '##woman', 'said', ',', '`', '`', 'this', 'is', 'an', 'old', 'story', '.']
Detokenized (012): ['a', 'lori##llar##d', 'spoke##woman', 'said', ',', '``', 'this', 'is', 'an', 'old', 'story', '.']
Counter: 16
Hidden states:  (13, 12, 768)
# Extracted words:  12
Sentence         : "We 're talking about years ago before anyone heard of asbestos having any questionable properties ."
Original    (016): ['We', "'re", 'talking', 'about', 'years', 'ago', 'before', 'anyone', 'heard', 'of', 'asbestos', 'having', 'any', 'questionable', 'properties', '.']
Tokenized   (019): ['[CLS]', 'we', "'", 're', 'talking

In [7]:
# extract activations for the dev sentences 
transformers_extractor.extract_representations('bert-base-uncased',
    dev_sentences,
    'dev_activations.json',
    aggregation="average" #last, first
)

Loading model: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reading input corpus
Preparing output file
Extracting representations from model
Sentence         : "The Arizona Corporations Commission authorized an 11.5 % rate increase at Tucson Electric Power Co. , substantially lower than recommended last month by a commission hearing officer and barely half the rise sought by the utility ."
Original    (037): ['The', 'Arizona', 'Corporations', 'Commission', 'authorized', 'an', '11.5', '%', 'rate', 'increase', 'at', 'Tucson', 'Electric', 'Power', 'Co.', ',', 'substantially', 'lower', 'than', 'recommended', 'last', 'month', 'by', 'a', 'commission', 'hearing', 'officer', 'and', 'barely', 'half', 'the', 'rise', 'sought', 'by', 'the', 'utility', '.']
Tokenized   (042): ['[CLS]', 'the', 'arizona', 'corporations', 'commission', 'authorized', 'an', '11', '.', '5', '%', 'rate', 'increase', 'at', 'tucson', 'electric', 'power', 'co', '.', ',', 'substantially', 'lower', 'than', 'recommended', 'last', 'month', 'by', 'a', 'commission', 'hearing', 'officer', '

Sentence         : "South Korean President Roh Tae Woo , brushing aside suggestions that the won be revalued again , said the currency 's current level against the dollar is `` appropriate . ''"
Original    (031): ['South', 'Korean', 'President', 'Roh', 'Tae', 'Woo', ',', 'brushing', 'aside', 'suggestions', 'that', 'the', 'won', 'be', 'revalued', 'again', ',', 'said', 'the', 'currency', "'s", 'current', 'level', 'against', 'the', 'dollar', 'is', '``', 'appropriate', '.', "''"]
Tokenized   (040): ['[CLS]', 'south', 'korean', 'president', 'ro', '##h', 'tae', 'woo', ',', 'brushing', 'aside', 'suggestions', 'that', 'the', 'won', 'be', 'rev', '##al', '##ue', '##d', 'again', ',', 'said', 'the', 'currency', "'", 's', 'current', 'level', 'against', 'the', 'dollar', 'is', '`', '`', 'appropriate', '.', "'", "'", '[SEP]']
Filtered   (038): ['south', 'korean', 'president', 'ro', '##h', 'tae', 'woo', ',', 'brushing', 'aside', 'suggestions', 'that', 'the', 'won', 'be', 'rev', '##al', '##ue', '##d', 

In [8]:
# extract activations for the test sentences 
transformers_extractor.extract_representations('bert-base-uncased',
    test_sentences,
    'test_activations.json',
    aggregation="average" #last, first
)

Loading model: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reading input corpus
Preparing output file
Extracting representations from model
Sentence         : "Rockwell International Corp. 's Tulsa unit said it signed a tentative agreement extending its contract with Boeing Co. to provide structural parts for Boeing 's 747 jetliners ."
Original    (028): ['Rockwell', 'International', 'Corp.', "'s", 'Tulsa', 'unit', 'said', 'it', 'signed', 'a', 'tentative', 'agreement', 'extending', 'its', 'contract', 'with', 'Boeing', 'Co.', 'to', 'provide', 'structural', 'parts', 'for', 'Boeing', "'s", '747', 'jetliners', '.']
Tokenized   (036): ['[CLS]', 'rockwell', 'international', 'corp', '.', "'", 's', 'tulsa', 'unit', 'said', 'it', 'signed', 'a', 'tentative', 'agreement', 'extending', 'its', 'contract', 'with', 'boeing', 'co', '.', 'to', 'provide', 'structural', 'parts', 'for', 'boeing', "'", 's', '747', 'jet', '##liner', '##s', '.', '[SEP]']
Filtered   (034): ['rockwell', 'international', 'corp', '.', "'", 's', 'tulsa', 'unit', 'said', 'it', 'signed', '

Sentence         : "In January , he accepted the position of vice chairman of Carlyle Group , a merchant banking concern ."
Original    (019): ['In', 'January', ',', 'he', 'accepted', 'the', 'position', 'of', 'vice', 'chairman', 'of', 'Carlyle', 'Group', ',', 'a', 'merchant', 'banking', 'concern', '.']
Tokenized   (022): ['[CLS]', 'in', 'january', ',', 'he', 'accepted', 'the', 'position', 'of', 'vice', 'chairman', 'of', 'carly', '##le', 'group', ',', 'a', 'merchant', 'banking', 'concern', '.', '[SEP]']
Filtered   (020): ['in', 'january', ',', 'he', 'accepted', 'the', 'position', 'of', 'vice', 'chairman', 'of', 'carly', '##le', 'group', ',', 'a', 'merchant', 'banking', 'concern', '.']
Detokenized (019): ['in', 'january', ',', 'he', 'accepted', 'the', 'position', 'of', 'vice', 'chairman', 'of', 'carly##le', 'group', ',', 'a', 'merchant', 'banking', 'concern', '.']
Counter: 20
Hidden states:  (13, 19, 768)
# Extracted words:  19
Sentence         : "SHEARSON LEHMAN HUTTON Inc ."
Original  

In [9]:
activations, num_layers = data_loader.load_activations('train_activations.json', 768)
tokens = data_loader.load_data(train_sentences, train_labels, activations, 512)
X, y, mapping = utils.create_tensors(tokens, activations, 'NN')
label2idx, idx2label, src2idx, idx2src = mapping

Loading json activations from train_activations.json...
10 13.0
Number of tokens:  224
length of source dictionary:  142
length of target dictionary:  29
224
Total instances: 224
['likely', 'high', 'causing', 'show', 'decades', 'forum', 'later', 'Corp.', 'enters', 'as', 'Pierre', 'Lorillard', 'having', 'former', 'cigarettes', 'stopped', 'any', 'questionable', "''", 'properties']
Number of samples:  224
Stats: Labels with their frequencies in the final set
POS 1
MD 1
VBD 7
RBR 2
VB 3
JJ 18
. 10
IN 25
CD 5
RP 1
'' 1
JJS 1
`` 1
VBZ 7
NNS 19
VBN 5
PRP$ 2
DT 20
CC 1
NN 30
EX 1
TO 5
, 15
VBP 3
WDT 2
VBG 5
NNP 25
PRP 4
RB 4


# LCA

In [10]:
probe = linear_probe.train_logistic_regression_probe(
            X, y, lambda_l2=0.1, lambda_l1=0.1
        )

Training classification probe
Creating model...
Number of training instances: 224
Number of classes: 29


epoch [1/10]: 0it [00:00, ?it/s]

Epoch: [1/10], Loss: 2.6957


epoch [2/10]: 0it [00:00, ?it/s]

Epoch: [2/10], Loss: 0.9210


epoch [3/10]: 0it [00:00, ?it/s]

Epoch: [3/10], Loss: 0.6735


epoch [4/10]: 0it [00:00, ?it/s]

Epoch: [4/10], Loss: 0.4566


epoch [5/10]: 0it [00:00, ?it/s]

Epoch: [5/10], Loss: 0.3408


epoch [6/10]: 0it [00:00, ?it/s]

Epoch: [6/10], Loss: 0.2813


epoch [7/10]: 0it [00:00, ?it/s]

Epoch: [7/10], Loss: 0.2486


epoch [8/10]: 0it [00:00, ?it/s]

Epoch: [8/10], Loss: 0.2314


epoch [9/10]: 0it [00:00, ?it/s]

Epoch: [9/10], Loss: 0.2229


epoch [10/10]: 0it [00:00, ?it/s]

Epoch: [10/10], Loss: 0.2189


In [11]:
top_neurons, top_neurons_per_class = linear_probe.get_top_neurons(probe, 0.01, label2idx)

In [12]:
print(top_neurons_per_class)

{'POS': array([8756, 4148]), 'MD': array([8756]), 'VBD': array([8756, 7988, 6452, 6894, 7220, 1076, 1183, 1383, 3603,  385, 8234,
       2771, 7368, 6006, 2168, 1232]), 'RBR': array([4916, 4148]), 'VB': array([3013, 8731, 7535, 2574, 1988,  858, 9347, 6852, 2469,  863, 5614,
       5169, 3380, 3551, 9644,  631, 8084, 3266, 7629, 7473, 4518, 1170,
       3359, 4892]), 'JJ': array([8756]), '.': array([7988]), 'IN': array([1844]), 'CD': array([6909, 3029,  530, 6571, 6855, 5547, 2341, 1514, 7085, 7745, 3002,
        265, 1694, 5796, 6433, 9824,  159, 5673,   66, 9241, 3285, 3599,
       7218, 5769]), 'RP': array([4148, 4916]), "''": array([8756, 4916]), 'JJS': array([8756, 4148]), '``': array([4916]), 'VBZ': array([4916, 4148, 8756, 3380, 5684, 6611, 7988, 6577, 7607, 6326, 3387]), 'NNS': array([8756]), 'VBN': array([5215, 2776, 5814, 9910, 2496, 7979, 6941, 2763, 4917,  895, 6376,
       8917, 4225, 1735, 7683, 2186, 6368,  169, 7246, 9230, 5771,  892,
       3648, 6223]), 'PRP$': array(

# No Reg

In [13]:
probe = linear_probe.train_logistic_regression_probe(
            X, y, lambda_l2=0.0, lambda_l1=0.0
        )

Training classification probe
Creating model...
Number of training instances: 224
Number of classes: 29


epoch [1/10]: 0it [00:00, ?it/s]

Epoch: [1/10], Loss: 0.0651


epoch [2/10]: 0it [00:00, ?it/s]

Epoch: [2/10], Loss: 0.0017


epoch [3/10]: 0it [00:00, ?it/s]

Epoch: [3/10], Loss: 0.0001


epoch [4/10]: 0it [00:00, ?it/s]

Epoch: [4/10], Loss: 0.0001


epoch [5/10]: 0it [00:00, ?it/s]

Epoch: [5/10], Loss: 0.0001


epoch [6/10]: 0it [00:00, ?it/s]

Epoch: [6/10], Loss: 0.0000


epoch [7/10]: 0it [00:00, ?it/s]

Epoch: [7/10], Loss: 0.0000


epoch [8/10]: 0it [00:00, ?it/s]

Epoch: [8/10], Loss: 0.0000


epoch [9/10]: 0it [00:00, ?it/s]

Epoch: [9/10], Loss: 0.0000


epoch [10/10]: 0it [00:00, ?it/s]

Epoch: [10/10], Loss: 0.0000


In [14]:
top_neurons, top_neurons_per_class = linear_probe.get_top_neurons(probe, 0.01, label2idx)

In [15]:
print(top_neurons_per_class)

{'POS': array([2867,   10, 1752, 1067, 3050, 3582, 6112, 8540, 9013, 4404, 5062,
       1416, 8594, 6705, 5151, 2065, 3297, 5839, 2723, 5714, 4377,  321,
        782, 3680, 5137,  972, 4910, 2061, 3648, 8764, 8760, 3080, 9220,
       7883]), 'MD': array([4208, 3270, 5093, 2529, 6318, 2421, 4330, 8219, 8123,  506, 5481,
       3604, 6623, 6915,   20, 8924, 9755, 5460,  935, 7451, 3994, 5194,
       9959, 9200,  653,   63, 3276, 5370, 4249, 3029, 8150, 4025, 7168,
       8045]), 'VBD': array([9472, 7338, 7051, 4805, 6044, 3313, 4527, 5573, 1962, 8949, 8230,
       3699, 5596, 3951, 4191, 4985,  687, 9379, 4832, 5140, 5276, 3206,
       4237, 9242, 9612,  770, 5949, 8802, 6563, 7685, 7075]), 'RBR': array([3050, 6855, 5679, 4163,  809,  232, 2156, 3352, 2850, 7581, 1081,
       4568, 7264, 7159, 1970, 9146, 2433, 5631, 2343, 9181, 8968, 7428,
       4116, 8649, 7764, 4704, 5325, 9851, 3375, 2708, 5808, 1490, 4327]), 'VB': array([1199, 9555, 7484,  109, 9630, 4322, 9423, 8526, 5697, 3227, 8

# Lasso-01

In [16]:
probe = linear_probe.train_logistic_regression_probe(
            X, y, lambda_l2=0.0, lambda_l1=0.1
        )

Training classification probe
Creating model...
Number of training instances: 224
Number of classes: 29


epoch [1/10]: 0it [00:00, ?it/s]

Epoch: [1/10], Loss: 2.6902


epoch [2/10]: 0it [00:00, ?it/s]

Epoch: [2/10], Loss: 0.9195


epoch [3/10]: 0it [00:00, ?it/s]

Epoch: [3/10], Loss: 0.6726


epoch [4/10]: 0it [00:00, ?it/s]

Epoch: [4/10], Loss: 0.4562


epoch [5/10]: 0it [00:00, ?it/s]

Epoch: [5/10], Loss: 0.3408


epoch [6/10]: 0it [00:00, ?it/s]

Epoch: [6/10], Loss: 0.2811


epoch [7/10]: 0it [00:00, ?it/s]

Epoch: [7/10], Loss: 0.2482


epoch [8/10]: 0it [00:00, ?it/s]

Epoch: [8/10], Loss: 0.2310


epoch [9/10]: 0it [00:00, ?it/s]

Epoch: [9/10], Loss: 0.2225


epoch [10/10]: 0it [00:00, ?it/s]

Epoch: [10/10], Loss: 0.2183


In [17]:
top_neurons, top_neurons_per_class = linear_probe.get_top_neurons(probe, 0.01, label2idx)

In [18]:
print(top_neurons_per_class)

{'POS': array([8756, 4148]), 'MD': array([8756, 4916]), 'VBD': array([8756, 5684, 7988, 7220, 4148, 3380, 5889, 4936, 8080, 7578, 4916,
       2188, 2585, 6657, 5667, 4107]), 'RBR': array([4916, 4148, 3380]), 'VB': array([5027, 3086, 1559, 5072,  511, 3149, 3777, 2035, 1312, 4143,  799,
        242, 3907, 7629, 7705,   23, 7330, 5553, 3408, 8735, 4704, 2858,
       2712, 2222, 6350]), 'JJ': array([8756]), '.': array([8756]), 'IN': array([8756]), 'CD': array([6415, 2322, 8903, 4107, 5737,   36, 5187, 7227,  966, 5030, 6711,
       3998, 4342, 5007, 8850, 4727, 3058,  953,  245, 1439, 6984,  282,
       1705]), 'RP': array([4916]), "''": array([5684, 8756]), 'JJS': array([8756, 4916]), '``': array([4916]), 'VBZ': array([4148, 4916, 3380, 8756, 5684, 5423, 9690, 4777, 5650, 5956, 4856,
       6761]), 'NNS': array([8756]), 'VBN': array([4644,  614, 7324, 1218, 4673, 5220, 4116, 5949, 1006, 4819, 6841,
       5763, 2803, 9861, 5361, 3792, 8092, 6560, 8198, 5973, 2328, 9971,
       3086, 554

# Ridge-01

In [19]:
probe = linear_probe.train_logistic_regression_probe(
            X, y, lambda_l2=0.1, lambda_l1=0.0
        )

Training classification probe
Creating model...
Number of training instances: 224
Number of classes: 29


epoch [1/10]: 0it [00:00, ?it/s]

Epoch: [1/10], Loss: 0.0756


epoch [2/10]: 0it [00:00, ?it/s]

Epoch: [2/10], Loss: 0.0134


epoch [3/10]: 0it [00:00, ?it/s]

Epoch: [3/10], Loss: 0.0127


epoch [4/10]: 0it [00:00, ?it/s]

Epoch: [4/10], Loss: 0.0130


epoch [5/10]: 0it [00:00, ?it/s]

Epoch: [5/10], Loss: 0.0130


epoch [6/10]: 0it [00:00, ?it/s]

Epoch: [6/10], Loss: 0.0127


epoch [7/10]: 0it [00:00, ?it/s]

Epoch: [7/10], Loss: 0.0123


epoch [8/10]: 0it [00:00, ?it/s]

Epoch: [8/10], Loss: 0.0119


epoch [9/10]: 0it [00:00, ?it/s]

Epoch: [9/10], Loss: 0.0114


epoch [10/10]: 0it [00:00, ?it/s]

Epoch: [10/10], Loss: 0.0109


In [20]:
top_neurons, top_neurons_per_class = linear_probe.get_top_neurons(probe, 0.01, label2idx)

In [21]:
print(top_neurons_per_class)

{'POS': array([7572, 5668, 4500, 9153, 7459, 5600, 6326, 6719, 4672, 8311, 7315,
       5789, 7651, 9959, 7383, 8880, 8502, 6148, 3950, 3136, 7621, 6336,
       5997]), 'MD': array([6683, 7220, 7909, 5199,  821, 6811, 4905, 9396, 5394, 9339, 8829,
       8271, 7367, 8931, 9038, 7832, 8973, 6253, 2278, 8270, 8054,   18,
       9115, 1109, 8130, 8314]), 'VBD': array([1124, 8080, 6544, 6110, 9198, 6124, 5852, 8998, 7580, 8734, 4468,
       4081, 6385, 3500, 5573, 1697, 1892, 3056, 8894, 4154, 3108, 5665,
       8823, 2545, 5140, 2094, 6044, 1096, 6368]), 'RBR': array([2342, 2627, 8590, 2721, 6093, 7822, 2979, 4111, 2654, 3657, 2080,
       7001, 4029, 5629,  589, 5460,  272, 6749, 8530, 1601, 2653, 8901,
       3627, 1212, 3504, 3079, 2964,  333]), 'VB': array([8789, 4682, 6014, 2781, 2296, 5340, 6735,  341, 6719, 3549, 1020,
       1980, 2544, 3144, 5214,  325, 9039, 8143, 3706, 8735, 1877, 4924,
       1245, 3510, 1061, 1500, 8486, 5700, 3064]), 'JJ': array([4991, 6453, 7295, 3764, 1662

# Probeless

In [22]:
top_neurons, top_neurons_per_class = probeless.get_neuron_ordering_for_all_tags(X, y, idx2label)

In [23]:
print(top_neurons_per_class)

{'NNP': [7988, 7220, 8756, 4148, 3380, 6452, 4916, 1844, 5684, 2612, 1076, 308, 9524, 6590, 5822, 5054, 4286, 3518, 5804, 8126, 3914, 4682, 5036, 5450, 2750, 7358, 9396, 8894, 1982, 6572, 8876, 8108, 4268, 5541, 5383, 4666, 7340, 3146, 6218, 6151, 4773, 3804, 5228, 2123, 5434, 2268, 3500, 6919, 8962, 3898, 3036, 4572, 4390, 5926, 6309, 2641, 6098, 1987, 4291, 7754, 5750, 8282, 5171, 5158, 7035, 1332, 4885, 2362, 8219, 4005, 1195, 5431, 1500, 6202, 9968, 8194, 6986, 3622, 2854, 7687, 5422, 8987, 1810, 5340, 1963, 7967, 6707, 6764, 6002, 3729, 5330, 5122, 7077, 8037, 8402, 8313, 2731, 2100, 7199, 9050, 4177, 5996, 2586, 7864, 5663, 6882, 2050, 7019, 9098, 2086, 6251, 8530, 5346, 7634, 3659, 6866, 6406, 4562, 5234, 7762, 4466, 2732, 6431, 6131, 1931, 4578, 5748, 1214, 7972, 3810, 5939, 9205, 4663, 8805, 7984, 3130, 4615, 2193, 3847, 2197, 9035, 1355, 5890, 1873, 6114, 8881, 4427, 2883, 5680, 2961, 5915, 7216, 8113, 4819, 7096, 6448, 3651, 3523, 4354, 6022, 5282, 8648, 1818, 4690, 5827, 63

In [24]:
print(top_neurons_per_class.keys())

dict_keys(['NNP', ',', 'CD', 'NNS', 'JJ', 'MD', 'VB', 'DT', 'NN', 'IN', '.', 'VBZ', 'VBG', 'CC', 'VBD', 'VBN', 'RB', 'TO', 'PRP', 'RBR', 'WDT', 'VBP', 'RP', 'PRP$', 'JJS', 'POS', '``', 'EX', "''"])


# IOU

For guassian and iou probes we need to binarize the labels file before running the probes

In [34]:
# binarize the labels file 
def binarize_labels(labels_source_file, binarized_labels_file, label): 
    with open(labels_source_file, "r") as reader, open(binarized_labels_file, "w") as writer: 
        for line in reader: 
            elements = line.strip().split() 
            labels = [] 
            for elem in elements: 
                if elem == label: 
                    labels.append(label) 
                else: 
                    labels.append("N\A")
            labels = " ".join(labels)
            writer.write(labels + "\n")
            
                    

In [35]:
binarize_labels(train_labels, "data/train_labels_binarized.txt", "JJ")

In [36]:
!cat "data/train_labels_binarized.txt"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A
N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A
N\A N\A N\A N\A N\A JJ N\A JJ N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A JJ JJ N\A N\A
N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A
N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A N\A N\A
N\A N\A N\A N\A N\A N\A JJ JJ N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A
N\A JJ N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A N\A JJ N\A N\A JJ N\A N\A N

In [37]:
activations, num_layers = data_loader.load_activations('train_activations.json', 768)
tokens = data_loader.load_data(train_sentences, "data/train_labels_binarized.txt", activations, 512)
X, y, mapping = utils.create_tensors(tokens, activations, 'NN')
label2idx, idx2label, src2idx, idx2src = mapping

Loading json activations from train_activations.json...
10 13.0
Number of tokens:  224
length of source dictionary:  142
length of target dictionary:  2
224
Total instances: 224
['likely', 'high', 'causing', 'show', 'decades', 'forum', 'later', 'Corp.', 'enters', 'as', 'Pierre', 'Lorillard', 'having', 'former', 'cigarettes', 'stopped', 'any', 'questionable', "''", 'properties']
Number of samples:  224
Stats: Labels with their frequencies in the final set
N\A 206
JJ 18


In [38]:
ranking = iou_probe.get_neuron_ordering(X, y)

In [39]:
print(ranking)

[6362 2085 4803 ... 7286  303 6518]


# Guassian

In [41]:
probe = gaussian_probe.train_probe(X, y)
ranking = gaussian_probe.get_neuron_ordering(probe, 10)

In [42]:
print(ranking)

[3771, 3003, 5055, 2876, 3644, 2108, 5823, 8054, 7286, 5065]
