In [4]:
import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model_scibert_from_api = AutoModel.from_pretrained("allenai/scibert_scivocab_cased")

Downloading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 385/385 [00:00<00:00, 123kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 442M/442M [01:04<00:00, 6.84MB/s]


In [7]:
specter_from_api = AutoModel.from_pretrained("allenai/specter")

Downloading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 612/612 [00:00<00:00, 433kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 440M/440M [01:11<00:00, 6.11MB/s]


In [9]:
finbert_from_api = AutoModel.from_pretrained("TurkuNLP/bert-base-finnish-cased-v1")

Downloading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 433/433 [00:00<00:00, 268kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 501M/501M [01:13<00:00, 6.79MB/s]


In [13]:
finbert_config = AutoConfig.from_pretrained('./hf_model/bert_config.json')
finbert_binary = AutoModel.from_pretrained('./hf_model/pytorch_model.bin', config=finbert_config)

Some weights of BertModel were not initialized from the model checkpoint at ./hf_model/pytorch_model.bin and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
scibert_config = AutoConfig.from_pretrained('./data/scibert_extracted/bert_config.json')
scibert_binary = AutoModel.from_pretrained('./data/scibert_extracted/pytorch_model.bin', config=scibert_config)

In [17]:
read_layer_names(finbert_binary)

(200,
 ['embeddings.position_ids',
  'embeddings.word_embeddings.weight',
  'embeddings.position_embeddings.weight',
  'embeddings.token_type_embeddings.weight',
  'embeddings.LayerNorm.weight',
  'embeddings.LayerNorm.bias',
  'encoder.layer.0.attention.self.query.weight',
  'encoder.layer.0.attention.self.query.bias',
  'encoder.layer.0.attention.self.key.weight',
  'encoder.layer.0.attention.self.key.bias'])

In [21]:
read_layer_names(scibert_binary)

(200,
 ['embeddings.position_ids',
  'embeddings.word_embeddings.weight',
  'embeddings.position_embeddings.weight',
  'embeddings.token_type_embeddings.weight',
  'embeddings.LayerNorm.weight',
  'embeddings.LayerNorm.bias',
  'encoder.layer.0.attention.self.query.weight',
  'encoder.layer.0.attention.self.query.bias',
  'encoder.layer.0.attention.self.key.weight',
  'encoder.layer.0.attention.self.key.bias'])

In [5]:
def read_layer_names(model):
    state_dict = model.state_dict()
    layer_count = len(list(state_dict.keys()))
    first_10_layer_names = list(state_dict.keys())[:10]
    return layer_count, first_10_layer_names

In [6]:
read_layer_names(model_scibert_from_api)

(200,
 ['embeddings.position_ids',
  'embeddings.word_embeddings.weight',
  'embeddings.position_embeddings.weight',
  'embeddings.token_type_embeddings.weight',
  'embeddings.LayerNorm.weight',
  'embeddings.LayerNorm.bias',
  'encoder.layer.0.attention.self.query.weight',
  'encoder.layer.0.attention.self.query.bias',
  'encoder.layer.0.attention.self.key.weight',
  'encoder.layer.0.attention.self.key.bias'])

In [8]:
read_layer_names(specter_from_api)

(200,
 ['embeddings.position_ids',
  'embeddings.word_embeddings.weight',
  'embeddings.position_embeddings.weight',
  'embeddings.token_type_embeddings.weight',
  'embeddings.LayerNorm.weight',
  'embeddings.LayerNorm.bias',
  'encoder.layer.0.attention.self.query.weight',
  'encoder.layer.0.attention.self.query.bias',
  'encoder.layer.0.attention.self.key.weight',
  'encoder.layer.0.attention.self.key.bias'])

In [10]:
read_layer_names(finbert_from_api)

(200,
 ['embeddings.position_ids',
  'embeddings.word_embeddings.weight',
  'embeddings.position_embeddings.weight',
  'embeddings.token_type_embeddings.weight',
  'embeddings.LayerNorm.weight',
  'embeddings.LayerNorm.bias',
  'encoder.layer.0.attention.self.query.weight',
  'encoder.layer.0.attention.self.query.bias',
  'encoder.layer.0.attention.self.key.weight',
  'encoder.layer.0.attention.self.key.bias'])

# Scibert last 10 layers

In [2]:
layers = ['text_field_embedder.token_embedder_bert._scalar_mix.scalar_parameters.11', 'venue_field_embedder.token_embedder_tokens.weight', 'feedforward._linear_layers.0.weight', 'feedforward._linear_layers.0.bias', 'layer_norm.gamma', 'layer_norm.beta', 'layer_norm_word_embedding.gamma', 'layer_norm_word_embedding.beta', 'layer_norm_word_embedding_venue.gamma', 'layer_norm_word_embedding_venue.beta']

for f in layers:
    print(f)

text_field_embedder.token_embedder_bert._scalar_mix.scalar_parameters.11
venue_field_embedder.token_embedder_tokens.weight
feedforward._linear_layers.0.weight
feedforward._linear_layers.0.bias
layer_norm.gamma
layer_norm.beta
layer_norm_word_embedding.gamma
layer_norm_word_embedding.beta
layer_norm_word_embedding_venue.gamma
layer_norm_word_embedding_venue.beta


# Finbert last 10 layers 

In [3]:
layers =  ['bert.encoder.layer.11.output.dense.bias', 'bert.encoder.layer.11.output.LayerNorm.weight', 'bert.encoder.layer.11.output.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias']

for f in layers:
    print(f)

bert.encoder.layer.11.output.dense.bias
bert.encoder.layer.11.output.LayerNorm.weight
bert.encoder.layer.11.output.LayerNorm.bias
cls.predictions.bias
cls.predictions.transform.dense.weight
cls.predictions.transform.dense.bias
cls.predictions.transform.LayerNorm.weight
cls.predictions.transform.LayerNorm.bias
cls.predictions.decoder.weight
cls.predictions.decoder.bias


# Load Finbert weights.th

In [28]:
model_state_fb = torch.load("/home/olli/UniHY/thesis_specter/hf_model/pytorch_model.bin", map_location=torch.device("cpu"))

In [24]:
torch.save(model_state, "/home/olli/UniHY/thesis_specter/hf_model/my_weights.th")

In [25]:
len(model_state_fb.keys())

205

In [38]:
layer_set = set()
for i in  model_state_fb.keys():
    layer_set.add(i.split(".")[0])

print(layer_set)

{'bert', 'cls'}


In [32]:
model_state_sb = torch.load("/home/olli/UniHY/thesis_specter/model_data/weights.th", map_location=torch.device("cpu"))

In [37]:
layer_set = set()
for i in  model_state_sb.keys():
    layer_set.add(i.split(".")[0])

print(layer_set)

{'layer_norm_word_embedding', 'text_field_embedder', 'layer_norm', 'layer_norm_word_embedding_venue', 'feedforward', 'venue_field_embedder'}


In [27]:
len(model_state.keys())

221