# Google Colab Setup

In [1]:
!git clone https://github.com/haidarihza/sistem-penilaian-kompetensi-v2

!pip install batchbald_redux

import sys
sys.path.append('/content/sistem-penilaian-kompetensi-v2/Penilaian Kompetensi Inggris')

fatal: destination path 'sistem-penilaian-kompetensi-v2' already exists and is not an empty directory.


In [2]:
from google.colab import files
uploaded = files.upload()

Saving dataset.json to dataset (1).json


In [3]:
import json
for key in uploaded.keys():
  raw_data = json.loads(uploaded[key])['data']

# Evaluation Setup

In [4]:
from src.model import *
import torch

In [5]:
device = torch.device('cuda')

In [6]:
# Set seed
torch.manual_seed(0)

<torch._C.Generator at 0x7c3f0c16a630>

# Active Learning Evaluation

Data split: 7 pool sets / 2 eval sets (1 set = 4 data points)<br>
Base model: sentence-transformers/all-mpnet-base-v2<br>

Acquisition function: PowerBALD (α=5)<br>
Acquisition size: 10<br>
Bayesian sample size: 5<br>
Acquisition step: 2<br>

In [7]:
pool_data = raw_data[:7]
eval_data = raw_data[7:]

transcripts = [transcript for d in pool_data for transcript in d['transcripts']]
competence_sets = [d['competence_levels'] for d in pool_data for _ in range(len(d['transcripts']))]
labels = [label for d in pool_data for label in d['labels']]
pool_dataset = TCDataset(transcripts, competence_sets, labels)

transcripts = [transcript for d in eval_data for transcript in d['transcripts']]
competence_sets = [d['competence_levels'] for d in eval_data for _ in range(len(d['transcripts']))]
labels = [label for d in eval_data for label in d['labels']]
eval_dataset = TCDataset(transcripts, competence_sets, labels)

In [8]:
model = CompetenceModel.load('sentence-transformers/all-mpnet-base-v2', 'biencoder', state_dict_path=None, device=device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
# Model prediction on eval dataset (initial)

with torch.no_grad():
    scores = model(eval_dataset.transcripts, eval_dataset.competence_sets)
    acc = torch.sum(torch.argmax(scores, dim=1) == torch.tensor(eval_dataset.label_indices, device=device)).item() / scores.size(0)

print('Predict probabilities:')
print(scores)
print(f'Accuracy: {acc:.2f}')

Predict probabilities:
tensor([[0.2627, 0.1829, 0.1910, 0.3634],
        [0.2645, 0.2284, 0.2038, 0.3033],
        [0.1308, 0.1516, 0.2125, 0.5051],
        [0.1232, 0.1987, 0.2204, 0.4577],
        [0.2134, 0.2728, 0.2259, 0.2878],
        [0.1900, 0.3128, 0.2226, 0.2746],
        [0.1912, 0.2685, 0.2554, 0.2849],
        [0.2083, 0.2866, 0.2056, 0.2995]], device='cuda:0')
Accuracy: 0.38


## Acquisition Step 1

In [10]:
bayesian_model = BayesianCompetenceModel(model)

In [11]:
with torch.no_grad():
    # Split to minimize GPU RAM usage
    scores = []
    n = len(pool_dataset)
    for i in range(0, n, 5):
      score = bayesian_model(pool_dataset.transcripts[i:min(i+5, n)], pool_dataset.competence_sets[i:min(i+5, n)], k=5)
      scores.append(score)

    scores = torch.cat(scores)
    log_scores = torch.log(scores)
    batch = get_powerbald_batch(log_scores, batch_size=10, alpha=5)

Entropy:   0%|          | 0/28 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/28 [00:00<?, ?it/s]

In [12]:
train_dataset = torch.utils.data.Subset(pool_dataset, batch.indices)

transcripts = [t for i, t in enumerate(pool_dataset.transcripts) if i not in batch.indices]
competence_sets = [c for i, c in enumerate(pool_dataset.competence_sets) if i not in batch.indices]
labels = [l for i, l in enumerate(pool_dataset.label_indices) if i not in batch.indices]
pool_dataset = TCDataset(transcripts, competence_sets, labels)

In [13]:
# Model training on 5 epochs

model = CompetenceModel.load('sentence-transformers/all-mpnet-base-v2', 'biencoder', state_dict_path=None, device=device)

model.fit(train_dataset, eval_dataset, epochs=5, batch_size=4, early_stop=False,
          optimizer_cls=torch.optim.Adam, optimizer_params={'lr': 1e-5})

Epoch 1 | Train Loss = 1.4330 | Train Acc = 0.0000 | Val Loss = 1.3008 | Val Acc = 0.3750
Epoch 2 | Train Loss = 1.3152 | Train Acc = 0.2000 | Val Loss = 1.2964 | Val Acc = 0.3750
Epoch 3 | Train Loss = 1.2554 | Train Acc = 0.3000 | Val Loss = 1.2872 | Val Acc = 0.3750
Epoch 4 | Train Loss = 1.1461 | Train Acc = 0.6000 | Val Loss = 1.2683 | Val Acc = 0.5000
Epoch 5 | Train Loss = 0.9969 | Train Acc = 0.6000 | Val Loss = 1.2390 | Val Acc = 0.3750


In [14]:
# Model prediction on eval dataset

with torch.no_grad():
    scores = model(eval_dataset.transcripts, eval_dataset.competence_sets)
    acc = torch.sum(torch.argmax(scores, dim=1) == torch.tensor(eval_dataset.label_indices, device=device)).item() / scores.size(0)

print('Predict probabilities:')
print(scores)
print(f'Accuracy: {acc:.2f}')

Predict probabilities:
tensor([[0.3404, 0.2107, 0.1735, 0.2754],
        [0.2870, 0.2462, 0.2022, 0.2646],
        [0.1069, 0.1777, 0.2057, 0.5097],
        [0.1081, 0.2210, 0.2268, 0.4441],
        [0.2701, 0.2790, 0.1932, 0.2576],
        [0.2165, 0.3354, 0.1961, 0.2520],
        [0.2222, 0.2656, 0.2476, 0.2645],
        [0.2392, 0.2902, 0.1820, 0.2886]], device='cuda:0')
Accuracy: 0.38


## Acquisition Step 2

In [15]:
bayesian_model = BayesianCompetenceModel(model)

In [16]:
with torch.no_grad():
    # Split to minimize GPU RAM usage
    scores = []
    n = len(pool_dataset)
    for i in range(0, n, 5):
      score = bayesian_model(pool_dataset.transcripts[i:min(i+5, n)], pool_dataset.competence_sets[i:min(i+5, n)], k=5)
      scores.append(score)

    scores = torch.cat(scores)
    log_scores = torch.log(scores)
    batch = get_powerbald_batch(log_scores, batch_size=10, alpha=5)

Entropy:   0%|          | 0/18 [00:00<?, ?it/s]

Conditional Entropy:   0%|          | 0/18 [00:00<?, ?it/s]

In [17]:
train_dataset = torch.utils.data.ConcatDataset([train_dataset, torch.utils.data.Subset(pool_dataset, batch.indices)])

transcripts = [t for i, t in enumerate(pool_dataset.transcripts) if i not in batch.indices]
competence_sets = [c for i, c in enumerate(pool_dataset.competence_sets) if i not in batch.indices]
labels = [l for i, l in enumerate(pool_dataset.label_indices) if i not in batch.indices]
pool_dataset = TCDataset(transcripts, competence_sets, labels)

In [18]:
# Model training on 5 epochs

model = CompetenceModel.load('sentence-transformers/all-mpnet-base-v2', 'biencoder', state_dict_path=None, device=device)

model.fit(train_dataset, eval_dataset, epochs=5, batch_size=4, early_stop=False,
          optimizer_cls=torch.optim.Adam, optimizer_params={'lr': 1e-5})

Epoch 1 | Train Loss = 1.3727 | Train Acc = 0.1500 | Val Loss = 1.2761 | Val Acc = 0.3750
Epoch 2 | Train Loss = 1.2674 | Train Acc = 0.4500 | Val Loss = 1.2490 | Val Acc = 0.5000
Epoch 3 | Train Loss = 1.1904 | Train Acc = 0.5500 | Val Loss = 1.2612 | Val Acc = 0.6250
Epoch 4 | Train Loss = 2.7675 | Train Acc = 0.7500 | Val Loss = 5.9870 | Val Acc = 0.6250
Epoch 5 | Train Loss = 2.7546 | Train Acc = 0.6000 | Val Loss = 6.3981 | Val Acc = 0.5000


In [19]:
# Model prediction on eval dataset after training

with torch.no_grad():
    scores = model(eval_dataset.transcripts, eval_dataset.competence_sets)
    acc = torch.sum(torch.argmax(scores, dim=1) == torch.tensor(eval_dataset.label_indices, device=device)).item() / scores.size(0)

print('Predict probabilities:')
print(scores)
print(f'Accuracy: {acc:.2f}')

Predict probabilities:
tensor([[2.5000e-01, 2.5000e-01, 2.5000e-01, 2.5000e-01],
        [1.0000e+00, 1.5564e-19, 1.5564e-19, 1.5564e-19],
        [2.5000e-01, 2.5000e-01, 2.5000e-01, 2.5000e-01],
        [7.6811e-20, 7.6811e-20, 7.6811e-20, 1.0000e+00],
        [2.5000e-01, 2.5000e-01, 2.5000e-01, 2.5000e-01],
        [2.5000e-01, 2.5000e-01, 2.5000e-01, 2.5000e-01],
        [2.5000e-01, 2.5000e-01, 2.5000e-01, 2.5000e-01],
        [3.3057e-01, 2.8138e-01, 2.2768e-20, 3.8805e-01]], device='cuda:0')
Accuracy: 0.50
