In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
config_df = pd.read_csv("config.csv")
config_df.head()

Unnamed: 0,config_id,model_type,model_name,pooling_type,epochs,dropout
0,0,sbert,sentence-transformers/distiluse-base-multiling...,mean,13,0.1
1,1,bert,indobenchmark/indobert-lite-base-p2,cls,24,0.3


In [3]:
import sys
import os
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', '..'))
sys.path.append(src_path)
from transformers import BertTokenizer, AutoTokenizer
from main_regression import RegressionModel, AutomaticScoringDataset as AutomaticScoringDatasetBert
from torch.utils.data import DataLoader
import torch
import torch.nn.functional as F
import joblib
from main_similarity import SiameseModel, AutomaticScoringDataset as AutomaticScoringDatasetSBert

tokenizerBert = BertTokenizer.from_pretrained('indobenchmark/indobert-lite-base-p2')
def predict_bert(df_test, config_id, model):
    test_data = AutomaticScoringDatasetBert(df_test, tokenizerBert)
    test_dataloader = DataLoader(test_data, batch_size=16, shuffle=False, generator=torch.Generator().manual_seed(42), num_workers=0)
    
    checkpoint = torch.load(f"../../../models/best_model/spesific/model_{config_id}.pt", map_location='cuda', weights_only=True)
    model.load_state_dict(checkpoint)
    model.eval()
    all_predictions = []
    all_targets = []
    with torch.no_grad():
        for batchs in test_dataloader:
            try:
                # move to device
                batchs = {k: v.to('cuda') for k, v in batchs.items()}
                predictions = model(
                    batchs['input_ids'], 
                    batchs['attention_mask'], 
                    batchs['token_type_ids']).squeeze(1)
                preds = torch.clamp(predictions, 0, 1)

                all_predictions.extend(preds.detach().cpu().numpy())
                all_targets.extend(batchs['labels'].detach().cpu().numpy())
            except Exception as e:
                torch.cuda.empty_cache()

    return all_predictions, all_targets

tokenizerSBert = AutoTokenizer.from_pretrained("sentence-transformers/distiluse-base-multilingual-cased-v2")
def compute_cosine_similarity(dataloader, model):
        all_outputs = []
        all_scores = []
        with torch.no_grad():
            for batchs in dataloader:
                batchs = {
                    k: {kk: vv.to("cuda") for kk, vv in v.items()} if isinstance(v, dict) else v.to("cuda")
                    for k, v in batchs.items()
                }
                
                # get embedding for each student and reference answer
                reference_emb = model(batchs['reference_answer']['input_ids'], batchs['reference_answer']['attention_mask'])
                student_emb = model(batchs['student_answer']['input_ids'], batchs['student_answer']['attention_mask'])
                scores = batchs['labels'].float().view(-1)

                # Normalize embeddings
                ref_embedding = F.normalize(reference_emb, p=2, dim=1)
                student_embedding = F.normalize(student_emb, p=2, dim=1)

                # get cosine similarity
                similarity = F.cosine_similarity(ref_embedding, student_embedding, dim=1)
                similarity = torch.clamp(similarity, -1.0, 1.0)

                all_outputs.append(similarity)
                all_scores.append(scores)

        # Concatenate all batches
        X = torch.cat(all_outputs, dim=0).cpu().numpy()
        y = torch.cat(all_scores, dim=0).cpu().numpy()

        return X, y

def predict_sbert(df_test, config_id, model):
    test_data = AutomaticScoringDatasetSBert(df_test, tokenizerSBert)
    test_dataloader = DataLoader(test_data, batch_size=16, shuffle=False, generator=torch.Generator().manual_seed(42), num_workers=0)
    
    
    checkpoint = torch.load(f"../../../models/best_model/spesific/model_{config_id}.pt", map_location='cuda', weights_only=True)
    reg_model = joblib.load(f"../../../models/best_model/spesific/reg_{config_id}.pkl")
    model.load_state_dict(checkpoint)
    model.eval()
    X_test, y_test = compute_cosine_similarity(test_dataloader, model)
    y_test_pred = reg_model.predict(X_test.reshape(-1, 1))

    return X_test, y_test_pred, y_test

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'AlbertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


In [17]:
data_answer = {
    "answer": [
        "Buah mengandung banyak vitamin seperti vitamin C yang bisa meningkatkan sistem imun, jadi tubuh tidak mudah sakit.",
        "Buah itu penting karena bisa bikin kenyang dan segar, jadi kita nggak perlu makan makanan berat.",
        "Makan buah bisa bikin tubuh jadi kurus karena buah mengandung banyak lemak yang dibakar saat tidur.",
    ],
}

data = {
    "reference_answer": [
        """Mengonsumsi buah membantu menjaga daya tahan tubuh karena kaya akan vitamin, mineral, dan antioksidan yang dibutuhkan tubuh."""
    ] * len(data_answer['answer']),
    "normalized_score": [
        10
    ] * len(data_answer['answer'])
}

data = data | data_answer

new_data = pd.DataFrame(data)
new_data.head()

Unnamed: 0,reference_answer,normalized_score,answer
0,Mengonsumsi buah membantu menjaga daya tahan t...,10,Buah mengandung banyak vitamin seperti vitamin...
1,Mengonsumsi buah membantu menjaga daya tahan t...,10,Buah itu penting karena bisa bikin kenyang dan...
2,Mengonsumsi buah membantu menjaga daya tahan t...,10,Makan buah bisa bikin tubuh jadi kurus karena ...


In [18]:
for row in config_df.itertuples():
    if row.model_type == "bert":
        model = RegressionModel('indobenchmark/indobert-lite-base-p2', pooling_type=row.pooling_type).to('cuda')
        all_predictions, all_targets = predict_bert(new_data, row.config_id, model)
    else:
        model = SiameseModel("sentence-transformers/distiluse-base-multilingual-cased-v2", pooling_type=row.pooling_type).to('cuda')
        similarity, all_predictions, all_targets = predict_sbert(new_data, row.config_id, model)
        new_data[f'similarity_{row.config_id}'] = similarity

    # Tambahkan prediksi ke new_data
    new_data[f"predicted_score_{row.config_id}"] = all_predictions

Some weights of the model checkpoint at indobenchmark/indobert-lite-base-p2 were not used when initializing AlbertModel: ['pooler.bias', 'pooler.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [19]:
new_data

Unnamed: 0,reference_answer,normalized_score,answer,similarity_0,predicted_score_0,predicted_score_1
0,Mengonsumsi buah membantu menjaga daya tahan t...,10,Buah mengandung banyak vitamin seperti vitamin...,0.523677,0.538415,0.388133
1,Mengonsumsi buah membantu menjaga daya tahan t...,10,Buah itu penting karena bisa bikin kenyang dan...,0.573071,0.586003,0.265608
2,Mengonsumsi buah membantu menjaga daya tahan t...,10,Makan buah bisa bikin tubuh jadi kurus karena ...,0.484408,0.500581,0.30473


In [10]:
data = new_data.drop(columns=['reference_answer', 'normalized_score'])
data.rename({"predicted_score_0": "predicted_score_similarity", "predicted_score_1": "predicted_score_direct"}, inplace=True)
json_data = data.to_json(orient='records', indent=4)
print(json_data)

[
    {
        "answer":"leonal messi adalah seorang pemain sepak bola asal argentina yang telah memenangkan word cup",
        "similarity_0":0.2570176721,
        "predicted_score_0":0.2815035582,
        "predicted_score_1":0.2812641263
    }
]
