In [1]:
!pip install transformers==4.15.0 sentencepiece
!pip install datasets==1.17.0 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.15.0
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 3.8 MB/s 
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 74.3 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 26.9 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.7.0-py3-none-any.whl (86 kB)
[K     |████████████████████████████████| 86 kB 4.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 71.9 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from transformers import AutoModelForMaskedLM, pipeline
from transformers import AutoTokenizer, BertForTokenClassification
import pandas as pd
from datasets import load_dataset, load_metric, Dataset, DatasetDict
import torch
import pickle

In [None]:
# change the input directory to your own preferences
tokenizer = pickle.load(open('drive/MyDrive/AIBuilders/tokenizer.pkl', 'rb'))

In [None]:
model_checkpoint = "airesearch/wangchanberta-base-att-spm-uncased"
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
model.resize_token_embeddings(len(tokenizer))

Downloading:   0%|          | 0.00/546 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/404M [00:00<?, ?B/s]

Embedding(33660, 768)

In [None]:
model

CamembertForMaskedLM(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(33660, 768)
      (position_embeddings): Embedding(512, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps

# Data Prep

In [None]:
# change the input directory to your own preferences
ds = pickle.load(open('drive/MyDrive/AIBuilders/mlm_ds.pkl', 'rb'))
NUM_SAMPLE = ds.shape[0]

In [None]:
ds

Unnamed: 0,input_ids,attention_mask,labels
0,"[5, 10, 9337, 10, 21993, 688, 2902, 874, 350, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 9337, 10, 21993, 688, 2902, 874, 350, ..."
1,"[5, 10, 2004, 10, 38, 293, 15809, 4076, 10, 14...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 2004, 10, 38, 293, 15809, 4076, 10, 14..."
2,"[5, 984, 23, 545, 1223, 6419, 10, 3133, 5033, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 984, 23, 545, 1223, 6419, 10, 3133, 5033, ..."
3,"[5, 10, 38, 5242, 869, 17440, 10, 25004, 2004,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 38, 5242, 869, 17440, 10, 357, 2004, 2..."
4,"[5, 10, 11617, 12071, 27, 10478, 221, 2405, 34...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 11617, 12071, 27, 10478, 221, 2405, 34..."
...,...,...,...
42885,"[5, 10, 10508, 102, 32729, 2306, 15933, 368, 1...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 10508, 102, 32729, 2306, 15933, 368, 1..."
42886,"[5, 10, 1417, 26, 10980, 36, 491, 612, 21, 200...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 1417, 26, 10980, 36, 491, 612, 21, 200..."
42887,"[5, 10, 25004, 25004, 25004, 25004, 10, 2004, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 10, 6850, 1448, 265, 265, 10, 2004, 10, 22..."
42888,"[5, 13276, 5948, 320, 1259, 88, 154, 5364, 33,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[5, 13276, 5948, 320, 1259, 88, 154, 5364, 33,..."


In [None]:
print(ds.iloc[0]['input_ids'])
print(ds.iloc[0]['attention_mask'])
print(ds.iloc[0]['labels'])

[5, 10, 9337, 10, 21993, 688, 2902, 874, 350, 13382, 123, 10, 2001, 24, 1428, 1070, 270, 10467, 168, 212, 10, 11, 11, 4682, 88, 3664, 4446, 10, 4419, 714, 101, 5841, 9284, 10, 72, 10, 15243, 1469, 235, 10, 12226, 10, 32, 132, 350, 13382, 123, 222, 25004, 10, 20, 1428, 1070, 270, 10467, 168, 212, 1007, 26, 4748, 3047, 2033, 7456, 7440, 803, 1028, 25004, 25004, 21, 4288, 212, 10, 2722, 10, 1428, 1070, 270, 10467, 168, 212, 10, 442, 217, 10, 12499, 10, 15436, 22021, 10, 11439, 10, 205, 24835, 10, 607, 10, 1916, 10, 16533, 10, 18692, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
combined_dataset = Dataset.from_pandas(ds)

In [None]:
from sklearn.model_selection import train_test_split
# 90% train, 10% test + validation
train_testvalid = combined_dataset.train_test_split(test_size=0.1)
# Split the 10% test + valid in half test, half valid
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)
# # gather everyone if you want to have a single DatasetDict
combined_dataset = DatasetDict({
    'train': train_testvalid['train'],
    'test': test_valid['test'],
    'valid': test_valid['train']})

In [None]:
combined_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 38601
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2145
    })
    valid: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2144
    })
})

In [None]:
from huggingface_hub import notebook_login

notebook_login() # เอาไว้โยนโมเดลขึ้น hugging face ได้เลย

Login successful
Your token has been saved to /root/.huggingface/token
[1m[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default

git config --global credential.helper store[0m


In [None]:
from transformers import TrainingArguments

batch_size = 8
# Show the training loss with every epoch
logging_steps = len(combined_dataset["train"]) // batch_size
model_name = model_checkpoint.split("/")[-1]

training_args = TrainingArguments(
    output_dir=f"{model_name}",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    push_to_hub=True,
    num_train_epochs = 15,
    fp16=True, # สำหรับคนใช้ GPU
    logging_steps=logging_steps,
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=combined_dataset["train"],
    eval_dataset=combined_dataset["test"],
)

Cloning https://huggingface.co/bookpanda/wangchanberta-base-att-spm-uncased-finetuned-imdb into local empty directory.


Download file pytorch_model.bin:   0%|          | 3.48k/427M [00:00<?, ?B/s]

Download file runs/May28_08-33-40_79554af90de7/events.out.tfevents.1653726827.79554af90de7.141.2:  34%|###3   …

Download file runs/Jun09_15-07-57_b2edc6a86459/events.out.tfevents.1654787444.b2edc6a86459.94.0:  31%|###1    …

Download file runs/May28_08-22-00_79554af90de7/events.out.tfevents.1653726142.79554af90de7.141.0:   8%|7      …

Download file runs/Jun08_09-11-27_b7c18beb4215/events.out.tfevents.1654679676.b7c18beb4215.76.0:  60%|######  …

Download file runs/May29_15-35-54_3bb2012f31f8/events.out.tfevents.1653838698.3bb2012f31f8.78.0:   3%|3       …

Clean file runs/May28_08-33-40_79554af90de7/events.out.tfevents.1653726827.79554af90de7.141.2:  18%|#8        …

Clean file runs/Jun09_15-07-57_b2edc6a86459/events.out.tfevents.1654787444.b2edc6a86459.94.0:  17%|#7        |…

Clean file runs/Jun08_09-11-27_b7c18beb4215/events.out.tfevents.1654679676.b7c18beb4215.76.0:  17%|#7        |…

Clean file runs/May28_08-22-00_79554af90de7/events.out.tfevents.1653726142.79554af90de7.141.0:  18%|#8        …

Clean file runs/May29_15-35-54_3bb2012f31f8/events.out.tfevents.1653838698.3bb2012f31f8.78.0:   7%|7         |…

Download file runs/May31_04-37-20_9638c112737c/events.out.tfevents.1653971993.9638c112737c.71.0:  13%|#3      …

Clean file runs/May31_04-37-20_9638c112737c/events.out.tfevents.1653971993.9638c112737c.71.0:   8%|8         |…

Download file runs/May31_05-35-55_9638c112737c/events.out.tfevents.1653975436.9638c112737c.71.4:  35%|###5    …

Clean file runs/May31_05-35-55_9638c112737c/events.out.tfevents.1653975436.9638c112737c.71.4:   8%|8         |…

Download file runs/May28_08-33-40_79554af90de7/1653726827.3569858/events.out.tfevents.1653726827.79554af90de7.…

Download file runs/May28_15-45-23_82e69eacb50c/events.out.tfevents.1653752913.82e69eacb50c.71.0:  64%|######4 …

Clean file runs/May28_08-33-40_79554af90de7/1653726827.3569858/events.out.tfevents.1653726827.79554af90de7.141…

Clean file runs/May28_15-45-23_82e69eacb50c/events.out.tfevents.1653752913.82e69eacb50c.71.0:  18%|#8        |…

Download file runs/Jun08_09-11-27_b7c18beb4215/1654679676.0413399/events.out.tfevents.1654679676.b7c18beb4215.…

Clean file runs/Jun08_09-11-27_b7c18beb4215/1654679676.0413399/events.out.tfevents.1654679676.b7c18beb4215.76.…

Download file runs/May28_15-45-23_82e69eacb50c/1653752913.5912647/events.out.tfevents.1653752913.82e69eacb50c.…

Clean file runs/May28_15-45-23_82e69eacb50c/1653752913.5912647/events.out.tfevents.1653752913.82e69eacb50c.71.…

Download file runs/May29_15-35-54_3bb2012f31f8/1653838698.8586526/events.out.tfevents.1653838698.3bb2012f31f8.…

Clean file runs/May29_15-35-54_3bb2012f31f8/1653838698.8586526/events.out.tfevents.1653838698.3bb2012f31f8.78.…

Download file runs/May31_04-37-20_9638c112737c/1653971993.934937/events.out.tfevents.1653971993.9638c112737c.7…

Clean file runs/May31_04-37-20_9638c112737c/1653971993.934937/events.out.tfevents.1653971993.9638c112737c.71.1…

Download file runs/Jun09_15-07-57_b2edc6a86459/1654787444.7555273/events.out.tfevents.1654787444.b2edc6a86459.…

Clean file runs/Jun09_15-07-57_b2edc6a86459/1654787444.7555273/events.out.tfevents.1654787444.b2edc6a86459.94.…

Download file runs/May31_05-35-55_9638c112737c/events.out.tfevents.1653975378.9638c112737c.71.2:  91%|########…

Clean file runs/May31_05-35-55_9638c112737c/events.out.tfevents.1653975378.9638c112737c.71.2:  26%|##6       |…

Download file runs/May31_05-35-55_9638c112737c/1653975378.1649833/events.out.tfevents.1653975378.9638c112737c.…

Clean file runs/May31_05-35-55_9638c112737c/1653975378.1649833/events.out.tfevents.1653975378.9638c112737c.71.…

Download file runs/May31_05-35-55_9638c112737c/1653975436.6367764/events.out.tfevents.1653975436.9638c112737c.…

Clean file runs/May31_05-35-55_9638c112737c/1653975436.6367764/events.out.tfevents.1653975436.9638c112737c.71.…

Download file runs/May28_08-22-00_79554af90de7/1653726142.92538/events.out.tfevents.1653726142.79554af90de7.14…

Clean file runs/May28_08-22-00_79554af90de7/1653726142.92538/events.out.tfevents.1653726142.79554af90de7.141.1…

Download file training_args.bin:  63%|######2   | 1.84k/2.92k [00:00<?, ?B/s]

Clean file training_args.bin:  34%|###4      | 1.00k/2.92k [00:00<?, ?B/s]

Clean file pytorch_model.bin:   0%|          | 1.00k/427M [00:00<?, ?B/s]

Using amp half precision backend


In [None]:
trainer.train()

***** Running training *****
  Num examples = 38601
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 14478


Epoch,Training Loss,Validation Loss
1,0.0416,0.042819
2,0.0417,0.042819
3,0.0415,0.042819


Saving model checkpoint to wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-500
Configuration saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-500/config.json
Model weights saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-500/pytorch_model.bin
Saving model checkpoint to wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1000
Configuration saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1000/config.json
Model weights saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1500
Configuration saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1500/config.json
Model weights saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to wangchanberta-base-att-spm-uncased-finetuned-imdb/checkpoint-2000
Configu

TrainOutput(global_step=14478, training_loss=0.041567070182453515, metrics={'train_runtime': 6098.4602, 'train_samples_per_second': 18.989, 'train_steps_per_second': 2.374, 'total_flos': 3.048102327018701e+16, 'train_loss': 0.041567070182453515, 'epoch': 3.0})

In [None]:
# change the input directory to your own preferences
FILE = "/content/drive/MyDrive/AIBuilders/mlm.pth"
torch.save(model.state_dict(), FILE)

In [None]:
trainer.push_to_hub()# โยนขึ้น hugging face

Saving model checkpoint to wangchanberta-base-att-spm-uncased-finetuned-imdb
Configuration saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/config.json
Model weights saved in wangchanberta-base-att-spm-uncased-finetuned-imdb/pytorch_model.bin


Upload file pytorch_model.bin:   0%|          | 3.34k/411M [00:00<?, ?B/s]

Upload file training_args.bin: 100%|##########| 2.92k/2.92k [00:00<?, ?B/s]

Upload file runs/Jun08_09-11-27_b7c18beb4215/1654679676.0413399/events.out.tfevents.1654679676.b7c18beb4215.76…

Upload file runs/Jun08_09-11-27_b7c18beb4215/events.out.tfevents.1654679676.b7c18beb4215.76.0:  58%|#####7    …

remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/bookpanda/wangchanberta-base-att-spm-uncased-finetuned-imdb
   1a2eda1..32a3c02  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Masked Language Modeling', 'type': 'fill-mask'}}
remote: Enforcing permissions...        
remote: Allowed refs: all        
To https://huggingface.co/bookpanda/wangchanberta-base-att-spm-uncased-finetuned-imdb
   32a3c02..8c99b49  main -> main



'https://huggingface.co/bookpanda/wangchanberta-base-att-spm-uncased-finetuned-imdb/commit/32a3c024beed1e1e6b2b543679d2d4787ab11642'

# Test with tagging model

In [None]:
class BertModel(torch.nn.Module):

    def __init__(self):

        super(BertModel, self).__init__()

        self.bert = BertForTokenClassification.from_pretrained('airesearch/wangchanberta-base-att-spm-uncased', num_labels=2)
        self.bert.resize_token_embeddings(len(tokenizer))

    def forward(self, input_id, mask, label):

        output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)

        return output

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

if use_cuda:
    model = model.cuda()
# change the input directory to your own preferences
FILE = "drive/MyDrive/AIBuilders/mlm/tagging.pth"
loaded_model = BertModel()
loaded_model.load_state_dict(torch.load(FILE, map_location=torch.device('cpu')))
loaded_model.eval()

In [None]:
ids_to_labels = {0: 'f', 1: 'i'}

def align_word_ids(texts):
  
    tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)
    c = tokenizer.convert_ids_to_tokens(tokenized_inputs.input_ids)
    word_ids = tokenized_inputs.word_ids()
    previous_word_idx = None
    label_ids = []
    for word_idx in word_ids:

        if word_idx is None:
            label_ids.append(-100)
        else:
            try:
              label_ids.append(2)
            except:
                label_ids.append(-100)

        previous_word_idx = word_idx
    return label_ids

def evaluate_one_text(model, sentence):
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:
        model = model.cuda()

    text = tokenizer(sentence, padding='max_length', max_length = 512, truncation=True, return_tensors="pt")

    mask = text['attention_mask'][0].unsqueeze(0).to(device)

    input_id = text['input_ids'][0].unsqueeze(0).to(device)
    label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)

    logits = model(input_id, mask, None)
    logits_clean = logits[0][label_ids != -100]

    predictions = logits_clean.argmax(dim=1).tolist()
    prediction_label = [ids_to_labels[i] for i in predictions]
    return prediction_label

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

text = "ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกจิงป่าวคับ"
ans = []
i_f = evaluate_one_text(loaded_model, text)
print(i_f)
a = tokenizer(text)
b = a['input_ids']
c = tokenizer.convert_ids_to_tokens(b)
print(c)
i_f_len = len(i_f)
for j in range(i_f_len):
  if(i_f[j] == 'i'):
    ph = a['input_ids'][j+1]
    a['input_ids'][j+1] = 25004
    print(tokenizer.decode(a['input_ids']))
    b = {'input_ids': torch.Tensor([a['input_ids']]).type(torch.int64).to(device), 'attention_mask': torch.Tensor([a['attention_mask']]).type(torch.int64).to(device)}
    token_logits = model(**b).logits
    mask_token_index = torch.where(b["input_ids"] == tokenizer.mask_token_id)[1]
    mask_token_logits = token_logits[0, mask_token_index, :]
    top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
    ans.append((j, top_5_tokens[0]))
    text = ''.join(tokenizer.convert_ids_to_tokens(a['input_ids']))
    for token in top_5_tokens:
        print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")
    a['input_ids'][j+1] = ph

print(a)
for x,y in ans:
  a['input_ids'][x+1] = y
print(''.join(tokenizer.convert_ids_to_tokens(a['input_ids'])))

['f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'i', 'i', 'i']
['<s>', '▁', 'ประเทศ', 'เรา', 'ผลิต', 'และ', 'ส่งออก', 'ยาสูบ', 'เยอะ', 'สุดในโลก', 'จิง', 'ป่าว', 'คับ', '</s>']
<s> ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลก<mask>ป่าวคับ</s>
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกมากป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกไปป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกอะป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลก_ป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกนะป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกเลยป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกเหรอป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกอย่างนี้ป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกมากไปป่าวคับ</s>'
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกเสียป่าวคับ</s>'
<s> ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกจิง<mask>คับ</s>
'>>> <s>▁ประเทศเราผลิตและส่งออกยาสูบเยอะสุดในโลกจิงไหมคับ</s