In [1]:
from transformers import RobertaModel, PreTrainedModel
from torch import nn
class CustomROBERTAModel(PreTrainedModel):
      def __init__(self, config, transformer_model_name_or_path, num_feats):
            super(CustomROBERTAModel, self).__init__(config)
            self.roberta = RobertaModel.from_pretrained(
                  transformer_model_name_or_path,
                  config=config
            )
            ### New layers:
            self.classification_layer = nn.Sequential(
                  nn.Linear(config.hidden_size, 512),
                  nn.GELU(),
                  nn.Linear(512, 256),
                  nn.GELU(),
                  nn.Linear(256, 128),
                  nn.GELU(),
                  nn.Linear(128, 54),
                  nn.GELU(),
                  nn.Unflatten(1, (6, 9)),
                  nn.Softmax(dim=2),
            )

      def forward(self, **inputs):
            roberta_outputs = self.roberta(**inputs)
            logits = self.classification_layer(roberta_outputs.pooler_output)
            return logits

      def _init_weights(self, module):
            self.bert._init_weights(module)

[2024-01-08 21:18:44,658] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
from transformers import AutoConfig, AutoTokenizer
import torch
base_model = 'roberta-large'
checkpoint = '/home/yuuhanase/FPTU/EXE101/PaperClipAI_EnglishGrading/EnglishGradingModel'
config = AutoConfig.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = CustomROBERTAModel(
    config=config, 
    transformer_model_name_or_path=base_model, 
    num_feats=6
).to("cuda") ## can be gpu
# model = CustomROBERTAModel.from_pretrained(
#     checkpoint, 
#     config=config, 
#     transformer_model_name_or_path=base_model,
#     num_feats=6
# )

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
text = "Hello"
encoded_input = tokenizer(text, return_tensors='pt').to("cuda")
output = model(**encoded_input)
output[0], output.size()

(tensor([[0.1151, 0.1140, 0.1108, 0.1122, 0.1086, 0.1109, 0.1099, 0.1106, 0.1077],
         [0.1125, 0.1080, 0.1074, 0.1111, 0.1139, 0.1173, 0.1129, 0.1095, 0.1074],
         [0.1073, 0.1065, 0.1095, 0.1168, 0.1089, 0.1137, 0.1130, 0.1107, 0.1135],
         [0.1120, 0.1141, 0.1169, 0.1081, 0.1164, 0.1079, 0.1084, 0.1079, 0.1083],
         [0.1094, 0.1113, 0.1128, 0.1068, 0.1083, 0.1132, 0.1113, 0.1116, 0.1154],
         [0.1095, 0.1151, 0.1142, 0.1116, 0.1063, 0.1101, 0.1133, 0.1115, 0.1085]],
        device='cuda:0', grad_fn=<SelectBackward0>),
 torch.Size([1, 6, 9]))

In [3]:
from transformers import RobertaTokenizer, RobertaModel, AutoConfig
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
config = AutoConfig.from_pretrained('roberta-large')
model = RobertaModel.from_pretrained('roberta-large')
text = "Hello"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
output

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0384, -0.0209, -0.0141,  ..., -0.0523,  0.0802,  0.0632],
         [ 0.3952,  0.0993, -0.6829,  ...,  0.1277, -0.4668,  0.4225],
         [-0.0097, -0.0010,  0.0426,  ..., -0.0275,  0.0668,  0.0367]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-0.8011, -0.4364,  0.2403,  ..., -0.4839,  0.1486, -0.3779]],
       grad_fn=<TanhBackward0>), hidden_states=None, past_key_values=None, attentions=None, cross_attentions=None)

In [4]:
output.

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0384, -0.0209, -0.0141,  ..., -0.0523,  0.0802,  0.0632],
         [ 0.3952,  0.0993, -0.6829,  ...,  0.1277, -0.4668,  0.4225],
         [-0.0097, -0.0010,  0.0426,  ..., -0.0275,  0.0668,  0.0367]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-0.8011, -0.4364,  0.2403,  ..., -0.4839,  0.1486, -0.3779]],
       grad_fn=<TanhBackward0>), hidden_states=None, past_key_values=None, attentions=None, cross_attentions=None)

In [10]:
inp_text = "When a problem is a change you have to let it do the best on you no matter what is happening it can change your mind. sometimes you need to wake up and look what is around you because problems are the best way to change what you want to change along time ago. A problem is a change for you because it can make you see different and help you to understand how tings wok. First of all it can make you see different then the others. For example i remember that when i came to the United States i think that nothing was going to change me because i think that nothing was going to change me because everything was different that my country and then i realist that wrong because a problem may change you but sometimes can not change the way it is, but i remember that i was really shy but i think that change a lot because sometimes my problems make me think that there is more thing that i never see in my life but i just need to see it from a different way and dont let nothing happened and ruing the change that i want to make because of just a problem. For example i think that nothing was going to change me and that i dont need to be shy anymore became i need to start seeing everything in a different ways because you can get mad at every one but you need to know what is going to happened after, people may see you different but the only way that you know how to change is to do the best and don't let nothing or not body to change nothing about you. The way you want to change not one have that and can't do nothing about it because is your choice and your problems and you can decide what to do with it. second of all can help you to understand how things work. For instance my mom have a lot of problems but she have faith when she is around people, my mom is scare of high and i'm not scare of high i did not understand why my mos is scare of high and in not scare of high and every time i see my mom in a airplane it make me laugh because she is scare and is funny, but i see it from a different way and i like the high but also she have to understand that hoe things work in other people because it can no be the same as you. For example i think that my mom and me are different because we are and i have to understand that she does not like high and i need to understand that. to help someone to understand how things work you need to start to see how things work in that persons life. A problem is a change for you and can make you a different and help you to understand. Everyone has a different opinion and a different was to understand then others. everyone can see the different opinion and what other people think."
#2.5,2.5,3.0,2.0,2.0,2.5
tokenized_input = tokenizer(inp_text, return_tensors='pt', truncation=True)#.to(torch.device("cuda"))
output = model(**tokenized_input)[0]
feats = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']
result = {}
for i in range(6):
    result[feats[i]] = output[i].item()*5.0
result

{'cohesion': 2.383091002702713,
 'syntax': 2.0666779577732086,
 'vocabulary': 2.884359359741211,
 'phraseology': 2.844240665435791,
 'grammar': 2.9789209365844727,
 'conventions': 2.2683973610401154}

In [10]:
from transformers import Trainer
from torch import nn
class CustomTrainer(Trainer):
    def __int__(self, *args, **kwargs):
        super().__int__(*args, **kwargs)
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        # forward pass
        logits = model(**inputs)
        loss_fct = nn.MSELoss()
        loss = loss_fct(logits, labels)
        return (loss, logits) if return_outputs else loss

In [23]:
import torch
loss = torch.nn.CrossEntropyLoss(reduction='none')
input = torch.randn(6, 9, requires_grad=True)
target = torch.randn(6, 9).softmax(dim=1)
output = loss(input, target)
uf = torch.nn.Unflatten(0, (1, 6))
input1 = uf(input)
target1 = uf(target)
output1 = loss(input1, target1)
input, target, output, input1, target1, output1

(tensor([[-0.3714, -1.2579,  0.3615,  0.9567,  0.4323,  0.4703, -0.3416, -0.5227,
          -0.4308],
         [-0.3717, -0.5112,  0.0627,  0.5598,  0.2792, -0.2005,  0.3077,  1.1931,
          -0.8187],
         [-1.5454,  0.8219,  0.2950, -0.8463,  1.3661, -1.5724, -1.1458, -1.3411,
          -1.5289],
         [-0.1115,  0.4887,  2.2682,  0.0564,  2.0264, -0.2742,  0.0707,  1.0282,
           0.6832],
         [-0.1112,  0.6495,  1.8468,  1.8657,  2.2472, -0.4522, -1.1466,  1.2399,
           0.1270],
         [-0.4857, -0.3033,  0.0491, -0.7390,  1.1182, -0.7626, -0.1415,  0.9473,
          -1.7455]], requires_grad=True),
 tensor([[0.0267, 0.1060, 0.0501, 0.3237, 0.2265, 0.0965, 0.0599, 0.0485, 0.0621],
         [0.0632, 0.0759, 0.2928, 0.0727, 0.0313, 0.1128, 0.1833, 0.0972, 0.0708],
         [0.4313, 0.0229, 0.0747, 0.1114, 0.0550, 0.0088, 0.1499, 0.0586, 0.0873],
         [0.0360, 0.0761, 0.0481, 0.0489, 0.0835, 0.0247, 0.1768, 0.1233, 0.3827],
         [0.0760, 0.5801, 0.0519, 

In [3]:
from sentence_transformers import SentenceTransformer

# INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
sentences = ["Cô ấy là một người vui_tính .", 
             "Cô ấy cười nói suốt cả ngày ."]

model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
embeddings = model.encode(sentences)
print(embeddings)

input = 'con cac'
input_e = model.encode(input)
print(input_e)
print()

[[ 0.11881639 -0.32826933 -0.28465217 ... -0.30702746 -0.27665678
  -0.03120262]
 [ 0.33210805 -0.17807008 -0.39799976 ... -0.33134732 -0.44188038
  -0.18214278]]
[ 3.16442847e-01  1.40082613e-01 -1.96205527e-01 -2.87030876e-01
  1.14599153e-01  2.62114614e-01  6.62056580e-02  1.17777489e-01
 -3.89765769e-01 -3.14105675e-02 -1.52859405e-01  1.16654970e-01
 -9.76452082e-02 -1.49781913e-01  9.58365053e-02  4.44398344e-01
  3.83993052e-02 -2.84109056e-01 -4.92166936e-01 -2.86633193e-01
  2.18549252e-01 -2.27224678e-01  1.43126875e-01 -1.14250042e-01
  2.11132258e-01 -9.97589156e-02  2.91066110e-01 -1.85742468e-01
  1.93034619e-01 -2.28425302e-02  1.00717500e-01 -1.31035596e-01
  7.27137774e-02 -3.23491871e-01  6.61543086e-02  7.55714625e-02
 -1.79670528e-01  7.10257739e-02 -1.51189238e-01  4.73104529e-02
 -3.85309160e-01  8.64219479e-03  1.28303215e-01 -1.08192824e-01
 -2.66683966e-01  2.98992068e-01 -1.57157451e-01  4.21793619e-03
  4.66052927e-02  1.72685653e-01  1.26308694e-01  2.32042