# Using LM-inspector with a WSD-classifier

Import libraries

In [1]:
import torch
from transformers import AutoConfig, AutoModel, AutoTokenizer

from lm_inspect import LanguageModelInspector
from word_sense_disambiguation import bert_encoder, label_encoder, Xval as Xtest, Yval as Ytest



Load trained classifier from binary file.

In [2]:
seq = torch.nn.Sequential(
            bert_encoder,
            torch.nn.Dropout(0.2),
            torch.nn.Linear(bert_encoder.output_size, out_features=358)
        ).to('cpu')
state_dict = torch.load('models/KB-bert-swedish-cased-wsd.pt', map_location=torch.device('cpu'))
seq.load_state_dict(state_dict)

<All keys matched successfully>

Load transformers config and tokenizer

In [3]:
config = AutoConfig.from_pretrained('KB/bert-base-swedish-cased',
                                    output_hidden_states=True,
                                    output_attentions=True
                                    )
tokenizer = AutoTokenizer.from_pretrained('KB/bert-base-swedish-cased', config=config)

Get the positions of the ambigious words

In [4]:
input_ids = [x['pos'] for x in Xtest]

Initialize LM-inspector object and set the configuration.

In [None]:
inspector = LanguageModelInspector(seq, Xtest, Ytest, tokenizer, label_encoder, device = 'cpu')
inspector.configure(label='följa_1_3_a', layer=[0, 6, 11], head=[0,6], input_id=input_ids)

Evaluating data
1520 / 10000
Done.


Apply method on the current configuration

In [None]:
inspector.topk_most_attended_to(k=5, return_type="all")

Visualize results scope-wise.

In [None]:
inspector.topk_most_attended_to(k=5, return_type="scope", visualize=True)