In [1]:
## Mymodel definition
from transformers import GPT2Model, GPT2LMHeadModel, GPT2Config, PreTrainedModel
import torch
from torch.nn import CrossEntropyLoss



class MyModel(PreTrainedModel):
    config_class = GPT2Config

    def __init__(self, config):
        super().__init__(config)
        self.encoder = GPT2Model(config)
        self.second_encoder = GPT2Model(config)
        self.decoder = GPT2LMHeadModel(config)

    def forward(self, input_ids, labels=None, attention_mask=None):
        encoder_outputs = self.encoder(input_ids)
        hidden_embedding = encoder_outputs.last_hidden_state[:,-1,:].unsqueeze(1)
        # just to obtain the hidden embeddings
        with torch.no_grad():
            decoder_hidden_inputs = self.second_encoder(input_ids, output_hidden_states=True).hidden_states[0]
        #hidden_embedding_dim = hidden_embedding.shape[2]
        updated_input = torch.cat((hidden_embedding, decoder_hidden_inputs), dim=1)
        logits = self.decoder(inputs_embeds=updated_input)['logits']
        logits = F.log_softmax(logits, dim=-1)
        shifted_prediction_scores = logits[:, 1:-1, :]
        
        labels[attention_mask == 0] = -100 
        labels = labels[:, 1:]
        loss_fct = CrossEntropyLoss()
        lm_loss = loss_fct(shifted_prediction_scores.contiguous().view(-1, self.config.vocab_size), labels.contiguous().view(-1))
        return {'loss': lm_loss, 'logits':logits[:,1:,:]}
    



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from transformers import AutoTokenizer
checkpoint = './model3weights_2024-07-04--16:34:15'
model = MyModel.from_pretrained(checkpoint)
tokenizer = tokenizer = AutoTokenizer.from_pretrained('google-t5/t5-small')



Some weights of MyModel were not initialized from the model checkpoint at ./model3weights_2024-07-04--16:34:15 and are newly initialized: ['decoder.lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
import torch
input_ids1 = tokenizer('Hi I am a student!', return_tensors='pt')
input_ids2 = tokenizer('Hi I am a professor!', return_tensors='pt')
hidden_embedding1 = model.encoder(**input_ids1).last_hidden_state[0,-1,:]
hidden_embedding2 = model.encoder(**input_ids2).last_hidden_state[0,-1,:]
print('norm is ', torch.norm(input_ids1 - input_ids2))


tensor([-6.1158e-01, -7.1719e-01,  2.4815e+00, -1.7552e-01, -8.0174e-01,
        -9.5027e-02, -5.3310e-01, -8.0494e-01,  1.8842e+00,  1.7158e+00,
         2.8770e-01, -1.3433e+00, -1.5648e+00,  1.7946e+00,  5.7618e-01,
        -2.1223e-01,  9.9359e-01,  2.1210e+00,  6.5841e-01,  1.8674e-01,
        -1.1252e+00,  8.2801e-01,  1.3153e+00, -2.2609e+00, -8.5337e-01,
        -8.1833e-01,  1.1305e+00, -1.6083e+00, -2.0259e+00,  5.0636e-01,
         1.5517e+00, -1.4428e+00,  2.1106e+00, -3.2455e-01, -3.4911e+00,
        -8.5512e-01,  1.0983e+00, -5.8201e-01, -8.6580e-01, -1.8299e+00,
        -9.5475e-01, -9.0942e-01, -6.6502e-01, -1.4318e+00, -3.7600e-01,
        -9.7263e-01, -1.8523e-01, -7.5073e-01, -1.0162e+00, -5.6599e-01,
        -8.2322e-01,  1.1130e-02,  2.2469e+00, -6.2895e-01,  2.7641e-02,
         2.5279e-01,  8.8550e-01,  1.6353e-01,  4.4655e-01,  3.8632e-01,
        -8.5231e-01,  8.2634e-01,  8.0934e-02, -1.3880e+00,  4.0966e-01,
        -1.3839e+00, -5.8625e-01,  1.3884e+00, -5.8