# Load the fine tuned model

In [10]:
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IN_COLAB = True
except:
    IN_COLAB = False

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
%%capture
if IN_COLAB:
  !pip install -U transformers
  !pip install -U accelerate
  !pip install datasets
  !pip install bertviz
  !pip install evaluate

In [12]:
from bertviz import head_view
import pandas as pd
from transformers import AutoModelForQuestionAnswering, AutoConfig, AutoTokenizer, AutoModel, utils, TrainingArguments, Trainer, pipeline
from datasets import load_dataset

In [13]:
model_path = "/content/drive/MyDrive/MASTERS KU/AUTUMN 2023/NLP/Week 39/qa_model_arabic"
# Create a model configuration
config = AutoConfig.from_pretrained(model_path)
# Load fine-tuned model
fine_tuned_model = AutoModelForQuestionAnswering.from_pretrained(model_path, output_attentions=True) # output attentions for posterior visualization
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [14]:
dataset = load_dataset("copenlu/answerable_tydiqa")
train = load_dataset("copenlu/answerable_tydiqa", split='train')
val = load_dataset("copenlu/answerable_tydiqa", split='validation')


In [15]:
def language_filter(dataset, lang):
    return dataset['language'] == lang
ind = dataset.filter(lambda row: language_filter(row, lang='indonesian'))
ben = dataset.filter(lambda row: language_filter(row, lang='bengali'))
ar = dataset.filter(lambda row: language_filter(row, lang='arabic'))
ind_val = ind['validation']
ben_val = ben['validation']
ar_val = ar['validation']

Filter:   0%|          | 0/116067 [00:00<?, ? examples/s]

Filter:   0%|          | 0/13325 [00:00<?, ? examples/s]

Filter:   0%|          | 0/116067 [00:00<?, ? examples/s]

Filter:   0%|          | 0/13325 [00:00<?, ? examples/s]

## Visualization

### Indonesian

In [16]:
utils.logging.set_verbosity_error()

sample = ind_val.shuffle(seed=42).select(range(1)) # selecting random sample from validation dataset
input_text = sample['question_text'][0]
inputs = tokenizer.encode_plus(input_text, return_tensors='pt')  # tokenize inputs

input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']  # retrieve attention mask
outputs = fine_tuned_model(input_ids, attention_mask=attention_mask)  # run model with input tensors
attention = outputs[-1]  # retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])  # convert input ids to token strings
head_view(attention, tokens)

<IPython.core.display.Javascript object>

### Bengali

In [17]:
utils.logging.set_verbosity_error()

sample = ben_val.shuffle(seed=42).select(range(1)) # selecting random sample from validation dataset
input_text = sample['question_text'][0]
inputs = tokenizer.encode_plus(input_text, return_tensors='pt')  # tokenize inputs

input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']  # retrieve attention mask
outputs = fine_tuned_model(input_ids, attention_mask=attention_mask)  # run model with input tensors
attention = outputs[-1]  # retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])  # convert input ids to token strings
head_view(attention, tokens)

<IPython.core.display.Javascript object>

### Arabic

In [18]:
utils.logging.set_verbosity_error()

sample = ar_val.shuffle(seed=42).select(range(1)) # selecting random sample from validation dataset
input_text = sample['question_text'][0]
inputs = tokenizer.encode_plus(input_text, return_tensors='pt')  # tokenize inputs

input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']  # retrieve attention mask
outputs = fine_tuned_model(input_ids, attention_mask=attention_mask)  # run model with input tensors
attention = outputs[-1]  # retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])  # convert input ids to token strings
head_view(attention, tokens)

<IPython.core.display.Javascript object>