In [1]:
!pip3 install indobenchmark-toolkit==0.0.5
!pip3 install datasets==1.16.1
!pip3 install tqdm==4.62.3
!pip3 install transformers==4.14.1
!pip3 install sentencepiece==0.1.96
!pip3 install torch==1.10.1
!pip3 install numpy==1.21.4
!pip3 install torchvision==0.11.2
!pip3 install pandas==1.3.5
!pip3 install scikit-learn==1.0.1
!pip3 install nltk==3.6.5
!pip3 install rouge_score==0.0.4
!pip3 install sacrebleu==2.0.0
!pip3 install huggingface-hub==0.2.1

Collecting indobenchmark-toolkit==0.0.5
  Downloading indobenchmark_toolkit-0.0.5-py3-none-any.whl (8.0 kB)
Collecting datasets==1.4.1
  Downloading datasets-1.4.1-py3-none-any.whl (186 kB)
     |████████████████████████████████| 186 kB 293 kB/s            
Collecting sentencepiece==0.1.95
  Downloading sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2 MB)
     |████████████████████████████████| 1.2 MB 2.2 MB/s            
Collecting huggingface-hub==0.0.2
  Downloading huggingface_hub-0.0.2-py3-none-any.whl (24 kB)
Collecting tqdm<4.50.0,>=4.27
  Downloading tqdm-4.49.0-py2.py3-none-any.whl (69 kB)
     |████████████████████████████████| 69 kB 6.1 MB/s             
Collecting transformers>=4.3.2
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
     |████████████████████████████████| 3.4 MB 16.6 MB/s            
[?25h  Downloading transformers-4.14.1-py3-none-any.whl (3.4 MB)
     |████████████████████████████████| 3.4 MB 53.7 MB/s            
[?25h  Downloadin

In [2]:
from nltk import word_tokenize
import datasets

# Evaluation Metric
bleu = datasets.load_metric('bleu')
rouge = datasets.load_metric('rouge')
sacrebleu = datasets.load_metric('sacrebleu')
squad_v2_metric = datasets.load_metric('squad_v2')

def generation_metrics_fn(list_hyp, list_label):
    # hyp and label are both list of string
    list_hyp_bleu = list(map(lambda x: word_tokenize(x), list_hyp))
    list_label_bleu = list(map(lambda x: [word_tokenize(x)], list_label))
    list_label_sacrebleu = list(map(lambda x: [x], list_label))
    metrics = {}
    
    metrics["BLEU"] = bleu._compute(list_hyp_bleu, list_label_bleu)['bleu'] * 100
    metrics["SacreBLEU"] = sacrebleu._compute(list_hyp, list_label_sacrebleu)['score']
    
    rouge_score = rouge._compute(list_hyp,list_label)
    metrics["ROUGE1"] = rouge_score['rouge1'].mid.fmeasure * 100
    metrics["ROUGE2"] = rouge_score['rouge2'].mid.fmeasure * 100
    metrics["ROUGEL"] = rouge_score['rougeL'].mid.fmeasure * 100
    metrics["ROUGELsum"] = rouge_score['rougeLsum'].mid.fmeasure * 100
    
    qa_hyps = [{'prediction_text': hyp, 'id': str(id), 'no_answer_probability':0} for id, hyp in enumerate(list_hyp)]
    qa_labels = [{'answers': {'answer_start': [0], 'text': [label]}, 'id': str(id)} for id, label in enumerate(list_label)]
    squad_v2_score = squad_v2_metric.compute(predictions=qa_hyps, references=qa_labels)
    
    metrics["EM"] = squad_v2_score['exact']
    metrics["F1"] = squad_v2_score['f1']
    return metrics

Downloading:   0%|          | 0.00/2.49k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.26k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.18k [00:00<?, ?B/s]

In [3]:
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())
    
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [4]:
###
# Inputs:
#    batch_data - one batch of data
#    tokenizer - the tokenizer use for decoding tokens into text
#    model_type - type of the model (only handle special case for indo_gpt2 as it is the only decoder only model)
#    is_inference - whether to run inference on decoder or else use teacher forcing for training
#    is_test - use beam search with the specified `beam_size` if is test else greedy search
#    skip_special_tokens - whether to skip decoding special tokens for producing hypothesis and label strings
#    beam_size - size of beam search
#    max_seq_len - maximum allowed length of the decoding sequence
#    device - device to run the forward function
#
# Outputs
#    loss - loss from the forward function, 0 when doing performing inference
#    list_hyp - list of hypothesis string
#    list_label - list of label string
###
def forward_generation(model, batch_data, tokenizer, model_type, is_inference=False, is_test=False, 
                           skip_special_tokens=True, beam_size=1, max_seq_len=512, device='cpu', length_penalty=1.0, 
                           top_p=1.0, top_k=50, repetition_penalty=1.0, do_sample=False,  **kwargs):
    # Unpack batch data
    if len(batch_data) == 6:
        (id, enc_batch, dec_batch, enc_mask_batch, dec_mask_batch, label_batch) = batch_data
        token_type_batch = None
    elif len(batch_data) == 7:
        (id, enc_batch, dec_batch, enc_mask_batch, dec_mask_batch, label_batch, token_type_batch) = batch_data
    
    # Prepare input & label
    enc_batch = torch.LongTensor(enc_batch) if enc_batch is not None else None
    dec_batch = torch.LongTensor(dec_batch)
    enc_mask_batch = torch.FloatTensor(enc_mask_batch) if enc_mask_batch is not None else None
    dec_mask_batch = torch.FloatTensor(dec_mask_batch) if dec_mask_batch is not None else None
    label_batch = torch.LongTensor(label_batch)
    token_type_batch = torch.LongTensor(token_type_batch) if token_type_batch is not None else None
            
    if device == "cuda":
        enc_batch = enc_batch.cuda() if enc_batch is not None else None
        dec_batch = dec_batch.cuda()
        enc_mask_batch = enc_mask_batch.cuda() if enc_mask_batch is not None else None
        dec_mask_batch = dec_mask_batch.cuda() if dec_mask_batch is not None else None
        label_batch = label_batch.cuda()
        token_type_batch = token_type_batch.cuda()  if token_type_batch is not None else None

    # Forward model
    if not is_inference:
        outputs = model(input_ids=enc_batch, attention_mask=enc_mask_batch, decoder_input_ids=dec_batch, 
                decoder_attention_mask=dec_mask_batch, labels=label_batch)
        loss, logits = outputs[:2]
        hyps = logits.topk(1, dim=-1)[1]
    else:
        loss = 0
        hyps = model.generate(input_ids=enc_batch, attention_mask=enc_mask_batch, num_beams=beam_size if is_test else 1, 
                                max_length=max_seq_len, early_stopping=True, length_penalty=length_penalty, use_cache=True,
                                pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
    # generate prediction & label list
    list_hyp = []
    list_label = []
    list_context = []
    for j in range(len(hyps)):
        hyp = hyps[j]
        label = label_batch[j,:].squeeze()
        if not is_inference:
            hyp = hyp.squeeze()[label != -100]
        list_hyp.append(tokenizer.decode(hyp, skip_special_tokens=skip_special_tokens))
        list_label.append(tokenizer.decode(label[label != -100], skip_special_tokens=skip_special_tokens))
        
    return loss, list_hyp, list_label

In [5]:
import json
import string
import re
from itertools import chain
from torch.utils.data import Dataset, DataLoader

##
# Question Answering
##
class QuestionAnsweringDataset(Dataset):
    # JSON Format
    # [{
    #    'id': 'id_string',
    #    'passage': 'input_string',
    #    'question': 'input_string',
    #    'label': 'target_string'
    # }, ... ]
    def load_dataset(self, path): 
        data = json.load(open(path, 'r'))
        return data
    
    def __init__(self, dataset_path, tokenizer, separator_id, *args, **kwargs):
        self.data = self.load_dataset(dataset_path)
        self.tokenizer = tokenizer
        self.separator_id = separator_id
    
    def __getitem__(self, index):
        data = self.data[index]
        id, context, question, label = data['id'], data['context'], data['question'], data['label']
        context_subwords = self.tokenizer.encode(context.lower(), add_special_tokens=False)
        question_subwords = self.tokenizer.encode(question.lower(), add_special_tokens=False)
        
        input_subwords = np.concatenate([context_subwords, [self.separator_id], question_subwords]).tolist()
        label_subwords = self.tokenizer.encode(label.lower(), add_special_tokens=False)
        
        return data['id'], input_subwords, label_subwords
    
    def __len__(self):
        return len(self.data)

###
# Generation Data Loader
###
class GenerationDataLoader(DataLoader):
    def __init__(self, max_seq_len=512, src_lid_token_id=1, tgt_lid_token_id=2, label_pad_token_id=-100, model_type='baseline-mbart', tokenizer=None, *args, **kwargs):
        super(GenerationDataLoader, self).__init__(*args, **kwargs)
    
        # Set Special Tokens
        self.tokenizer = tokenizer
        self.max_seq_len = max_seq_len
        self.pad_token_id = tokenizer.pad_token_id
        self.bos_token_id = tokenizer.bos_token_id
        self.eos_token_id = tokenizer.eos_token_id
        self.src_lid_token_id = src_lid_token_id
        self.tgt_lid_token_id = tgt_lid_token_id
        self.label_pad_token_id = label_pad_token_id
               
        if model_type == 'baseline-mbart':
            self.collate_fn = self._baseline_mbart_collate_fn
        else:
            raise ValueError(f'Unknown model_type `{model_type}`')

    def _baseline_mbart_collate_fn(self, batch):
        ####
        # We follow mBART pre-training format, there is a discussions for the mBART tokenizer (https://github.com/huggingface/transformers/issues/7416)
        #   which mentioned the format of the labels should be: <langid><sent><eos><langid>
        #   and the mBART model will add the <langid> as a prefix to create the decoder_input_ids during the forward function.
        # 
        # Output format:
        # encoder input
        # <sent><eos><langid>
        # decoder input
        # <langid><sent><eos>
        # decoder output
        # <sent><eos><langid>
        ###
        batch_size = len(batch)
        max_enc_len = min(self.max_seq_len, max(map(lambda x: len(x[1]), batch)) + 2) # + 2 for eos and langid
        max_dec_len = min(self.max_seq_len, max(map(lambda x: len(x[2]), batch)) + 2) # + 2 for eos and langid
        
        id_batch = []
        enc_batch = np.full((batch_size, max_enc_len), self.pad_token_id, dtype=np.int64)
        dec_batch = np.full((batch_size, max_dec_len), self.pad_token_id, dtype=np.int64)
        label_batch = np.full((batch_size, max_dec_len), self.label_pad_token_id, dtype=np.int64)
        enc_mask_batch = np.full((batch_size, max_enc_len), 0, dtype=np.float32)
        dec_mask_batch = np.full((batch_size, max_dec_len), 0, dtype=np.float32)
        
        for i, (id, input_seq, label_seq) in enumerate(batch):
            input_seq, label_seq = input_seq[:max_enc_len-2], label_seq[:max_dec_len - 2]
            
            # Assign content
            enc_batch[i,0:len(input_seq)] = input_seq
            dec_batch[i,1:1+len(label_seq)] = label_seq
            label_batch[i,0:len(label_seq)] = label_seq
            enc_mask_batch[i,:len(input_seq) + 2] = 1
            dec_mask_batch[i,:len(label_seq) + 2] = 1
            
            # Assign special token to encoder input
            enc_batch[i,len(input_seq)] = self.eos_token_id
            enc_batch[i,1+len(input_seq)] = self.src_lid_token_id
            
            # Assign special token to decoder input
            dec_batch[i,0] = self.tgt_lid_token_id
            dec_batch[i,1+len(label_seq)] = self.eos_token_id
            
            # Assign special token to label
            label_batch[i,len(label_seq)] = self.eos_token_id
            label_batch[i,1+len(label_seq)] = self.tgt_lid_token_id
            
            id_batch.append(id)
        
        return id_batch, enc_batch, dec_batch, enc_mask_batch, None, label_batch
#         return id_batch, enc_batch, dec_batch, enc_mask_batch, dec_mask_batch, label_batch

In [6]:
import torch
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm

# Evaluate Function
def evaluate(model, data_loader, forward_fn, metrics_fn, model_type, tokenizer, beam_size=1, max_seq_len=512, is_test=False, device='cpu'):
    model.eval()
    torch.set_grad_enabled(False)
    
    total_loss, total_correct, total_labels = 0, 0, 0

    list_hyp, list_label = [], []

    pbar = tqdm(iter(data_loader), leave=True, total=len(data_loader))
    for i, batch_data in enumerate(pbar):
        batch_seq = batch_data[-1]
        loss, batch_hyp, batch_label = forward_fn(model, batch_data, model_type=model_type, tokenizer=tokenizer, device=device, is_inference=is_test, 
                                                      is_test=is_test, skip_special_tokens=True, beam_size=beam_size, max_seq_len=max_seq_len)
        
        # Calculate evaluation metrics
        list_hyp += batch_hyp
        list_label += batch_label

        if not is_test:
            # Calculate total loss for validation
            test_loss = loss.item()
            total_loss = total_loss + test_loss

            # pbar.set_description("VALID {}".format(metrics_to_string(metrics)))
            pbar.set_description("VALID LOSS:{:.4f}".format(total_loss/(i+1)))
        else:
            pbar.set_description("TESTING... ")
            # pbar.set_description("TEST LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))
    
    metrics = metrics_fn(list_hyp, list_label)        
    if is_test:
        return total_loss/(i+1), metrics, list_hyp, list_label
    else:
        return total_loss/(i+1), metrics

In [7]:
import numpy as np
import pandas as pd
import torch
torch.cuda.is_available()
torch.cuda.device_count()

1

In [8]:
from indobenchmark import IndoNLGTokenizer

tokenizer = IndoNLGTokenizer.from_pretrained('indobenchmark/indobart-v2')

Downloading:   0%|          | 0.00/910k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/315 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/339 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
from transformers import MBartTokenizer, MBartConfig, MBartForQuestionAnswering, MBartForConditionalGeneration

finetuned_model = MBartForConditionalGeneration.from_pretrained('sibckukgvaxsepbkyb/finetuned_model')
base_model = MBartForConditionalGeneration.from_pretrained('sibckukgvaxsepbkyb/base_model')

Downloading:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

In [10]:
test_dataset_path = '../input/indoqav2/test_preprocess.json'

test_dataset = QuestionAnsweringDataset(test_dataset_path, tokenizer, tokenizer.sep_token_id, lowercase=True)

test_loader = GenerationDataLoader(dataset=test_dataset, model_type='baseline-mbart', tokenizer=tokenizer, max_seq_len=256, 
                                    batch_size=16, src_lid_token_id=40002, tgt_lid_token_id=40002, num_workers=8, shuffle=True)

  cpuset_checked))


## Example Question Answering

In [11]:
def question_answering(context, question, model):
    context_subwords = tokenizer.encode(context.lower(), add_special_tokens=False)
    question_subwords = tokenizer.encode(question.lower(), add_special_tokens=False)

    input_subwords = np.concatenate([context_subwords, [tokenizer.sep_token_id], question_subwords]).tolist()

    max_enc_len = min(256, (len(input_subwords) + 2)) # + 2 for eos and langid

    enc_batch = np.full((1, max_enc_len), tokenizer.pad_token_id, dtype=np.int64)
    enc_mask_batch = np.full((1, max_enc_len), 0, dtype=np.float32)

    input_subwords = input_subwords[:max_enc_len-2]

    # Assign content
    enc_batch[0,0:len(input_subwords)] = input_subwords
    enc_mask_batch[0,:len(input_subwords) + 2] = 1

    # Assign special token to encoder input
    enc_batch[0,len(input_subwords)] = tokenizer.eos_token_id
    enc_batch[0,1+len(input_subwords)] = 40002

    enc_batch = torch.LongTensor(enc_batch) if enc_batch is not None else None
    enc_mask_batch = torch.FloatTensor(enc_mask_batch) if enc_mask_batch is not None else None

    enc_batch = enc_batch.cuda() if enc_batch is not None else None
    enc_mask_batch = enc_mask_batch.cuda() if enc_mask_batch is not None else None

    device = 'cuda0'
    if "cuda" in device:
        torch.cuda.set_device(int(device[4:]))
        device = "cuda"
        model = model.cuda()

    loss = 0
    hyps = model.generate(input_ids=enc_batch, attention_mask=enc_mask_batch, num_beams=5, 
                            max_length=256, early_stopping=True, length_penalty=1.0, use_cache=True,
                            pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
    # generate prediction & label list
    list_hyp = []
    list_label = []
    for j in range(len(hyps)):
        hyp = hyps[j]
        list_hyp.append(tokenizer.decode(hyp, skip_special_tokens=True))

    return list_hyp[0]

In [12]:
# Example 1
context = "Era awal perfilman Indonesia ini diawali dengan berdirinya bioskop pertama di Indonesia pada 5 Desember 1900 di daerah Tanah Abang, Batavia dengan nama Gambar Idoep yang menayangkan berbagai film bisu."
question = "Kapan industri film di Indonesia dimulai?"
question_0 = "Di daerah mana industri film di Indonesia dimulai?"

fine_answer = question_answering(context, question, finetuned_model)
base_answer = question_answering(context, question, base_model)

fine_answer_0 = question_answering(context, question_0, finetuned_model)
base_answer_0 = question_answering(context, question_0, base_model)

print(context, '\n', question, '\n======== Fine Tuned ========', fine_answer, '\n======== Base Model ========', base_answer, '\n', sep='\n')
print(question_0, '\n======== Fine Tuned ========', fine_answer_0, '\n======== Base Model ========', base_answer_0, sep='\n')

Era awal perfilman Indonesia ini diawali dengan berdirinya bioskop pertama di Indonesia pada 5 Desember 1900 di daerah Tanah Abang, Batavia dengan nama Gambar Idoep yang menayangkan berbagai film bisu.


Kapan industri film di Indonesia dimulai?

5 desember 1900

kapan industri film di indonesia dimulai?


Di daerah mana industri film di Indonesia dimulai?

tanah abang, batavia

di daerah mana industri film di indonesia dimulai?


In [13]:
# Example 2
context = "Ernest Douwes Dekker wafat dini hari tanggal 28 Agustus 1950 (tertulis di batu nisannya; 29 Agustus 1950 versi van der Veur, 2006) dan dimakamkan di TMP Cikutra, Bandung."
question = "Dimanakah Dr. Ernest François Eugène Douwes Dekker meninggal?"
question_0 = "Kapan Dr. Ernest François Eugène Douwes Dekker meninggal?"

fine_answer = question_answering(context, question, finetuned_model)
base_answer = question_answering(context, question, base_model)

fine_answer_0 = question_answering(context, question_0, finetuned_model)
base_answer_0 = question_answering(context, question_0, base_model)

print(context, '\n', question, '\n======== Fine Tuned ========', fine_answer, '\n======== Base Model ========', base_answer, '\n', sep='\n')
print(question_0, '\n======== Fine Tuned ========', fine_answer_0, '\n======== Base Model ========', base_answer_0, sep='\n')

Ernest Douwes Dekker wafat dini hari tanggal 28 Agustus 1950 (tertulis di batu nisannya; 29 Agustus 1950 versi van der Veur, 2006) dan dimakamkan di TMP Cikutra, Bandung.


Dimanakah Dr. Ernest François Eugène Douwes Dekker meninggal?

tmp c ikut ra

dimanakah dr. ernest fran <0xC3> <0xA7> ois eug <0xC3> <0xA8> ne do uw es dek ker meninggal?


Kapan Dr. Ernest François Eugène Douwes Dekker meninggal?

28 agustus 1950

kapan dr. ernest fran <0xC3> <0xA7> ois eug <0xC3> <0xA8> ne do uw es dek ker meninggal?


In [14]:
# Example 3
context = "Paspor Republik Indonesia adalah dokumen perjalanan yang diterbitkan oleh Direktorat Jenderal Imigrasi, Kementerian Hukum dan Hak Asasi Manusia, dan perwakilan RI di luar negeri. Paspor ini hanya diberikan kepada Warga Negara Indonesia"
question = "Siapa pemegang Paspor Republik Indonesia?"
question_0 = "Siapa yang dapat menerbitkan Paspor Republik Indonesia?"

fine_answer = question_answering(context, question, finetuned_model)
base_answer = question_answering(context, question, base_model)

fine_answer_0 = question_answering(context, question_0, finetuned_model)
base_answer_0 = question_answering(context, question_0, base_model)

print(context, '\n', question, '\n======== Fine Tuned ========', fine_answer, '\n======== Base Model ========', base_answer, '\n', sep='\n')
print(question_0, '\n======== Fine Tuned ========', fine_answer_0, '\n======== Base Model ========', base_answer_0, sep='\n')

Paspor Republik Indonesia adalah dokumen perjalanan yang diterbitkan oleh Direktorat Jenderal Imigrasi, Kementerian Hukum dan Hak Asasi Manusia, dan perwakilan RI di luar negeri. Paspor ini hanya diberikan kepada Warga Negara Indonesia


Siapa pemegang Paspor Republik Indonesia?

warga negara indonesia

siapa pemegang paspor republik indonesia?


Siapa yang dapat menerbitkan Paspor Republik Indonesia?

direktorat jenderal imigrasi, kementerian hukum dan hak asasi manusia, dan perwakilan ri di luar negeri

siapa yang dapat menerbitkan paspor republik indonesia?


In [15]:
# Example 4
context = "Leonardo da Pisa atau Leonardo Pisano (1175 - 1250), dikenal juga sebagai Fibonacci, adalah seorang matematikawan Italia yang dikenal sebagai penemu bilangan Fibonacci dan perannya dalam mengenalkan sistem penulisan dan perhitungan bilangan Arab ke dunia Eropa (algorisma)."
question = "Kapan Leonardo Pisano lahir?"
question_0 = "Kapan Leonardo Pisano meninggal?"
question_1 = "Siapa penemu bilangan Fibonacci?"

fine_answer = question_answering(context, question, finetuned_model)
base_answer = question_answering(context, question, base_model)

fine_answer_0 = question_answering(context, question_0, finetuned_model)
base_answer_0 = question_answering(context, question_0, base_model)

fine_answer_1 = question_answering(context, question_1, finetuned_model)
base_answer_1 = question_answering(context, question_1, base_model)

print(context, '\n', question, '\n======== Fine Tuned ========', fine_answer, '\n======== Base Model ========', base_answer, '\n', sep='\n')
print(question_0, '\n======== Fine Tuned ========', fine_answer_0, '\n======== Base Model ========', base_answer_0, '\n', sep='\n')
print(question_1, '\n======== Fine Tuned ========', fine_answer_1, '\n======== Base Model ========', base_answer_1, sep='\n')

Leonardo da Pisa atau Leonardo Pisano (1175 - 1250), dikenal juga sebagai Fibonacci, adalah seorang matematikawan Italia yang dikenal sebagai penemu bilangan Fibonacci dan perannya dalam mengenalkan sistem penulisan dan perhitungan bilangan Arab ke dunia Eropa (algorisma).


Kapan Leonardo Pisano lahir?

11 75

kapan leonardo pisan o lahir?


Kapan Leonardo Pisano meninggal?

12 50

kapan leonardo pisan o meninggal?


Siapa penemu bilangan Fibonacci?

leonardo pisan o

siapa penemu bilangan fib on ac ci?


In [21]:
# Example 5
context = "Vaksinasi booster COVID-19 dilaksanakan serentak di kabupaten/kota di Indonesia yang telah memenuhi syarat, telah mencapai 70 persen vaksinasi dosis pertama dan 60 persen dosis kedua."
question = "Apa yang telah memenuhi syarat?"
question_0 = "Vaksinasi dosis pertama telah mencapai berapa persen?"
question_1 = "Di mana vaksinasi dilakukan?"

fine_answer = question_answering(context, question, finetuned_model)
base_answer = question_answering(context, question, base_model)

fine_answer_0 = question_answering(context, question_0, finetuned_model)
base_answer_0 = question_answering(context, question_0, base_model)

fine_answer_1 = question_answering(context, question_1, finetuned_model)
base_answer_1 = question_answering(context, question_1, base_model)

print(context, '\n', question, '\n======== Fine Tuned ========', fine_answer, '\n======== Base Model ========', base_answer, '\n', sep='\n')
print(question_0, '\n======== Fine Tuned ========', fine_answer_0, '\n======== Base Model ========', base_answer_0, '\n', sep='\n')
print(question_1, '\n======== Fine Tuned ========', fine_answer_1, '\n======== Base Model ========', base_answer_1, sep='\n')

Vaksinasi booster COVID-19 dilaksanakan serentak di kabupaten/kota di Indonesia yang telah memenuhi syarat, telah mencapai 70 persen vaksinasi dosis pertama dan 60 persen dosis kedua.


Apa yang telah memenuhi syarat?

vaksinasi co vid - 19

booster co vid - 19 dilaksanakan serentak di kabupaten / kota di indonesia yang telah memenuhi syarat, telah mencapai 70 persen dosis pertama dan 60 persen dosis kedua.


Vaksinasi dosis pertama telah mencapai berapa persen?

70 persen

booster co vid - 19 dilaksanakan serentak di kabupaten / kota di indonesia yang telah memenuhi syarat, telah mencapai 70 persen vaksinasi dosis pertama dan 60 persen dosis kedua.


Di mana vaksinasi dilakukan?

kabupaten / kota di indonesia

booster co vid - 19 dilaksanakan serentak di kabupaten / kota di indonesia yang telah memenuhi syarat, telah mencapai 70 persen vaksinasi dosis pertama dan 60 persen dosis kedua.


## Evaluation Metric

In [16]:
# Base Model Performance Summary
epoch = 10
total_loss = 0
list_hyp = []
list_label = []
pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    batch_seq = batch_data[-1]        
    loss, batch_hyp, batch_label = forward_generation(base_model, batch_data, tokenizer, model_type='bart', device='cuda')

    # Calculate total loss
    valid_loss = loss.item()
    total_loss = total_loss + valid_loss

    # Calculate evaluation metrics
    list_hyp += batch_hyp
    list_label += batch_label
    metrics = generation_metrics_fn(list_hyp, list_label)

    pbar.set_description("TEST LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))

metrics = generation_metrics_fn(list_hyp, list_label)
print("(Epoch {}) TEST LOSS:{:.4f} {}".format((epoch),
    total_loss/(i+1), metrics_to_string(metrics)))

for batch in test_loader:
    loss, batch_hyp, batch_label = forward_generation(base_model, batch, tokenizer, model_type='bart', device='cuda')
    for pred, label in zip(batch_hyp, batch_label):
        print('PRED : ' + pred)
        print('LABEL: ' + label)
        print()

TEST LOSS:13.8510 BLEU:34.21 SacreBLEU:34.28 ROUGE1:19.01 ROUGE2:13.16 ROUGEL:18.85 ROUGELsum:18.81 EM:0.00 F1:19.08: 100%|██████████| 54/54 [00:41<00:00,  1.31it/s]


(Epoch 10) TEST LOSS:13.8510 BLEU:34.21 SacreBLEU:34.28 ROUGE1:19.02 ROUGE2:13.26 ROUGEL:18.79 ROUGELsum:18.84 EM:0.00 F1:19.08
PRED : persegi  berapakah
LABEL: 505. 99 2 kilometer persegi

PRED : 
LABEL: 13 37

PRED : 
LABEL: inggris

PRED : 
LABEL: 16 44

PRED : zhao
LABEL: amerika serikat

PRED : 
LABEL: ha vana

PRED : candy
LABEL: ky oko mi zuki

PRED : berapakah
LABEL: 13 15, 43 km 2

PRED : 
LABEL: li yuan

PRED : perairan sekitar 16. 4 70 hektare, berapakah
LABEL: daratan 760 hektare serta luas perairan kolam 16. 4 70 hektare

PRED : kapan
LABEL: 30 desember 2018

PRED : 
LABEL: 1945

PRED : faktor      ataupun vaksin. apakah
LABEL: faktor genetik atau keturunan dan faktor lingkungan seperti pengaruh zat kimiawi ataupun vaksin

PRED : yang yang tangga islami, bangsa yang islami, pemerintahan yang islami, negara yang dipimpin oleh negara - negara islam, menyatukan perpecahan kaum muslimin dan negara mereka yang ter ampas, kemudian membawa bendera jihad dan da <0xE2> <0x80> <0x99

In [17]:
# Fine-Tuned Model Performance Summary
epoch = 10
total_loss = 0
list_hyp = []
list_label = []
pbar = tqdm(test_loader, leave=True, total=len(test_loader))
for i, batch_data in enumerate(pbar):
    batch_seq = batch_data[-1]        
    loss, batch_hyp, batch_label = forward_generation(finetuned_model, batch_data, tokenizer, model_type='bart', device='cuda')

    # Calculate total loss
    valid_loss = loss.item()
    total_loss = total_loss + valid_loss

    # Calculate evaluation metrics
    list_hyp += batch_hyp
    list_label += batch_label
    metrics = generation_metrics_fn(list_hyp, list_label)

    pbar.set_description("TEST LOSS:{:.4f} {}".format(total_loss/(i+1), metrics_to_string(metrics)))

metrics = generation_metrics_fn(list_hyp, list_label)
print("(Epoch {}) TEST LOSS:{:.4f} {}".format((epoch),
    total_loss/(i+1), metrics_to_string(metrics)))

for batch in test_loader:
    loss, batch_hyp, batch_label = forward_generation(finetuned_model, batch, tokenizer, model_type='bart', device='cuda')
    for pred, label in zip(batch_hyp, batch_label):
        print('PRED : ' + pred)
        print('LABEL: ' + label)
        print()

TEST LOSS:0.4027 BLEU:91.68 SacreBLEU:91.71 ROUGE1:84.34 ROUGE2:68.87 ROUGEL:84.35 ROUGELsum:84.32 EM:56.73 F1:84.37: 100%|██████████| 54/54 [00:44<00:00,  1.21it/s]


(Epoch 10) TEST LOSS:0.4027 BLEU:91.68 SacreBLEU:91.71 ROUGE1:84.39 ROUGE2:68.75 ROUGEL:84.30 ROUGELsum:84.33 EM:56.73 F1:84.37
PRED : 48 jerawat, meningkatkan nafsu makan, anti kolesterol, anti inf lamasi, anemia, antioksidan, pencegah kanker, dan ant im ik ro ba
LABEL: obat jerawat, meningkatkan nafsu makan, anti kolesterol, anti inf lamasi, anemia, antioksidan, pencegah kanker, dan ant im ik ro ba

PRED : 1960 -
LABEL: 1960

PRED : komar soekarnoputri
LABEL: megawati soekarnoputri

PRED : abad ke - 19
LABEL: abad ke - 19

PRED : masjid nasional al akbar
LABEL: masjid nasional al akbar

PRED : won korea selatan
LABEL: won korea selatan

PRED : 5 1945
LABEL: oktober 1945

PRED : 03 april 1973
LABEL: 03 april 1973

PRED : 1942
LABEL: 1942

PRED : las se hal ls tr <0xC3> <0xB6> m
LABEL: las se hal ls tr <0xC3> <0xB6> m

PRED : 5 desember 1900
LABEL: 5 desember 1900

PRED : ham adan, iran
LABEL: ham adan, iran

PRED : urutan okasi asam amino akan mengubah fungsi protein, dan lebih lanjut