# Practice of different models in huggingface

In [1]:
import torch

## RAG

In [None]:
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration

In [None]:
# Initialize model
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever = retriever)

In [None]:
# Initialize data
inputs = tokenizer(["How many people live in Paris?", 'how old are you?'], return_tensors="pt", padding=True, truncation=True)
with tokenizer.as_target_tokenizer():
   targets = tokenizer(["In Paris, there are 10 million people.", 'I am 22 years old.'], return_tensors="pt", padding=True, truncation=True)
input_ids = inputs["input_ids"]
labels = targets["input_ids"]
print(input_ids.size())

In [None]:
# 1. Encode
question_hidden_states = model.question_encoder(input_ids)[0]
print(question_hidden_states.size())

In [None]:
# 2. Retrieve
docs_dict = retriever(input_ids.numpy(), question_hidden_states.detach().numpy(), return_tensors="pt")
doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), docs_dict["retrieved_doc_embeds"].float().transpose(1, 2)).squeeze(1)
print(doc_scores.size())

In [None]:
# 3. Forward to generator
outputs = model(context_input_ids=docs_dict["context_input_ids"], context_attention_mask=docs_dict["context_attention_mask"], doc_scores=doc_scores, decoder_input_ids=labels)

## Bert

In [2]:
from transformers import BertTokenizer, BertModel

In [3]:
# Initialize model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
# Initialize data
inputs = tokenizer(["How many people live in Paris?", 'how old are you?'], return_tensors="pt", padding=True, truncation=True)
input_ids = inputs["input_ids"]
print(input_ids.size())

torch.Size([2, 9])


In [5]:
outputs = model(**inputs, output_hidden_states=True)
print(outputs)

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.2846,  0.2654,  0.0621,  ..., -0.5076,  0.4604,  0.4020],
         [ 0.5797, -0.0389, -0.1691,  ..., -0.2428,  0.4190,  0.2785],
         [ 0.3598, -0.6328,  0.5946,  ..., -0.6814,  0.5046,  0.0212],
         ...,
         [ 0.6149, -0.3343, -0.1552,  ..., -0.8227, -0.4112, -0.0663],
         [ 0.0534, -0.3793, -1.0612,  ..., -0.0713,  0.6246, -0.3960],
         [ 0.7939,  0.0290, -0.2404,  ...,  0.0031, -0.5633, -0.2443]],

        [[ 0.1435,  0.2915, -0.1897,  ..., -0.2568,  0.2299,  0.2998],
         [ 0.8528,  0.2125,  0.2806,  ...,  0.0611,  0.3585,  0.3833],
         [ 0.6508,  0.4176,  0.7111,  ..., -0.6729,  0.3827, -1.2437],
         ...,
         [ 0.8221,  0.0168, -0.1774,  ...,  0.1684, -0.5292, -0.2830],
         [ 0.0785,  0.4297,  0.4029,  ...,  0.0484,  0.1668, -0.2065],
         [-0.1175,  0.2349,  0.1610,  ...,  0.3380,  0.1549, -0.0944]]],
       grad_fn=<NativeLayerNormBackward>), pooler_out

In [10]:
a = torch.rand((2,3))

In [12]:
torch.cat((a, a))

tensor([[0.1035, 0.5856, 0.9475],
        [0.0679, 0.0193, 0.4326],
        [0.1035, 0.5856, 0.9475],
        [0.0679, 0.0193, 0.4326]])