In [8]:
import torch
from transformers import *

import time

# demo script from huggingface:
https://github.com/huggingface/transformers#quick-tour

In [9]:
# Transformers has a unified API
# for 8 transformer architectures and 30 pretrained weights.
#          Model          | Tokenizer          | Pretrained weights shortcut


# To use TensorFlow 2.0 versions of the models, simply prefix the class names with 'TF', e.g. `TFRobertaModel` is the TF 2.0 counterpart of the PyTorch model `RobertaModel`

# Let's encode some text in a sequence of hidden-states using each model:
for model_class, tokenizer_class, pretrained_weights in MODELS:
    # Load pretrained model/tokenizer
    print(model_class)
    start_time = time.time()
    
    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
    model = model_class.from_pretrained(pretrained_weights)

    end_time = time.time()
    print('tokenizer & transformer-model initialization: {}s'.format(end_time - start_time))
    start_time=time.time()
    
    abstract = 'Representation and analysis of complex biological and engineered systems as directed networks is useful for understanding their global structure/function organization. Enrichment of network motifs, which are over-represented subgraphs in real networks, can be used for topological analysis. Because counting network motifs is computationally expensive, only characterization of 3- to 5-node motifs has been previously reported. In this study we used a supercomputer to analyze cyclic motifs made of 3–20 nodes for 6 biological and 3 technological networks. Using tools from statistical physics, we developed a theoretical framework for characterizing the ensemble of cyclic motifs in real networks. We have identified a generic property of real complex networks, antiferromagnetic organization, which is characterized by minimal directional coherence of edges along cyclic subgraphs, such that consecutive links tend to have opposing direction. As a consequence, we find that the lack of directional coherence in cyclic motifs leads to depletion in feedback loops, where the number of nodes affected by feedback loops appears to be at a local minimum compared with surrogate shuffled networks. This topology provides more dynamic stability in large networks.'
    # Encode text
    input_ids = torch.tensor([tokenizer.encode(abstract, add_special_tokens=True)])  # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
    with torch.no_grad():
        last_hidden_states = model(input_ids)[0]  # Models outputs are now tuples
        
    end_time = time.time()
    print('encode single abstract: {}s'.format(end_time - start_time))

    
run_codebook = False  
if run_codebook:
    
    # Each architecture is provided with several class for fine-tuning on down-stream tasks, e.g.
    BERT_MODEL_CLASSES = [BertModel, BertForPreTraining, BertForMaskedLM, BertForNextSentencePrediction,
                          BertForSequenceClassification, BertForMultipleChoice, BertForTokenClassification,
                          BertForQuestionAnswering]

    # All the classes for an architecture can be initiated from pretrained weights for this architecture
    # Note that additional weights added for fine-tuning are only initialized
    # and need to be trained on the down-stream task
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    for model_class in BERT_MODEL_CLASSES:
        # Load pretrained model/tokenizer
        model = model_class.from_pretrained('bert-base-uncased')

        # Models can return full list of hidden-states & attentions weights at each layer
        model = model_class.from_pretrained(pretrained_weights,
                                            output_hidden_states=True,
                                            output_attentions=True)
        input_ids = torch.tensor([tokenizer.encode("Let's see all hidden-states and attentions on this text")])
        all_hidden_states, all_attentions = model(input_ids)[-2:]

        # Simple serialization for models and tokenizers
        model.save_pretrained('./directory/to/save/')  # save
        model = model_class.from_pretrained('./directory/to/save/')  # re-load
        tokenizer.save_pretrained('./directory/to/save/')  # save
        tokenizer = BertTokenizer.from_pretrained('./directory/to/save/')  # re-load


  0%|          | 0/830122454 [00:00<?, ?B/s][A
  0%|          | 69632/830122454 [00:00<22:29, 614927.09B/s][A
  0%|          | 243712/830122454 [00:00<18:09, 761443.22B/s][A
  0%|          | 400384/830122454 [00:00<15:24, 897705.17B/s][A
  0%|          | 609280/830122454 [00:00<13:21, 1034338.34B/s][A
  0%|          | 748544/830122454 [00:00<12:45, 1083550.13B/s][A
  0%|          | 1027072/830122454 [00:00<10:25, 1325942.27B/s][A
  0%|          | 1235968/830122454 [00:00<09:40, 1428619.77B/s][A
  0%|          | 1402880/830122454 [00:00<09:42, 1423314.30B/s][A
  0%|          | 1562624/830122454 [00:01<09:31, 1450950.83B/s][A
  0%|          | 1928192/830122454 [00:01<07:52, 1753746.40B/s][A
  0%|          | 2255872/830122454 [00:01<11:17, 1222764.52B/s][A
  0%|          | 3605504/830122454 [00:01<08:11, 1681500.49B/s][A
  0%|          | 4129792/830122454 [00:01<06:43, 2045383.72B/s][A
  1%|          | 4621312/830122454 [00:01<06:00, 2288902.31B/s][A
  1%|          | 50606

KeyboardInterrupt: 